diff --git a/StringTools.ecl b/StringTools.ecl index c4e6f22..db28f9b 100644 --- a/StringTools.ecl +++ b/StringTools.ecl @@ -3,21 +3,20 @@ EXPORT StringTools := MODULE EXPORT regexLoopRec := {STRING Regex; STRING repl}; - EXPORT regexLoop(STRING inStr, DATASET(regexLoopRec) regexDS, BOOLEAN noCaseRegex = TRUE, BOOLEAN TidyToo = TRUE) := FUNCTION - - /* ------------------------------------------------------------- - Loops through two sets and conducts a number of regex substitutions. - Takes a database containing regex and replacement (see above RECORD, RegexLoopRec). - - Note this NOCASES by default. - - @param inStr String - text to be replaced - @param regexDS DataSet - See above recordset (RegexLoopRec). List of from and to strings, from can be a regex and replacement can contain capture groups. - @param noCaseRegex Boolean - Should the regex be conducted with nocase? Defaults to TRUE - @param TidyToo Boolean - Shoulde the strings be lowercased and trimmed left and right before commencing? Defaults to TRUE - - @return - String with all regexs applied in order - -------------------------------------------------------------*/ + EXPORT regexLoop(STRING inStr, DATASET(regexLoopRec) regexDS, BOOLEAN noCaseRegex = TRUE, BOOLEAN TidyToo = TRUE) := FUNCTION + /** + * Loops through two sets and conducts a number of regex substitutions. + * Takes a database containing regex and replacement (see above RECORD, RegexLoopRec). + * + * Note this NOCASES by default. + * + * @param inStr text to be replaced + * @param regexDS See above recordset (RegexLoopRec). List of from and to strings, from can be a regex and replacement can contain capturegroups. + * @param noCaseRegex Should the regex be conducted with nocase? Defaults to TRUE + * @param TidyToo Should the strings be lowercased and trimmed left and right before commencing? Defaults to TRUE + * + * @return String with all regexs applied in order + */ LOCAL aString := IF(TidyToo, TRIM(std.Str.ToLowerCase(inStr), LEFT, RIGHT), inStr); LOCAL regexDSBlankRow := DATASET([{' ',' '}], regexLoopRec); LOCAL regexDSconcat := regexDSBlankRow + regexDS; @@ -36,15 +35,14 @@ EXPORT StringTools := MODULE EXPORT LongestWord (STRING InWords, STRING seperator = ' ') := FUNCTION - - /* ------------------------------------------------------------- - Takes a multi word string and returns just the longest word - - @param InWords String - collection of words - @param seperator String - word seperator, defaults to space - - @return - String of the longest word - -------------------------------------------------------------*/ + /** + * Takes a multi word string and returns just the longest word + * + * @param InWords collection of words + * @param seperator word separator, defaults to space + * + * @return String of the longest word + */ SplitWords := STD.Str.SplitWords(InWords, seperator); WordDS := DATASET(SplitWords, {STRING words}); //Convert to DS @@ -66,14 +64,14 @@ EXPORT StringTools := MODULE EXPORT NumberSpacing (STRING InWords) := FUNCTION - /* ------------------------------------------------------------- - Helps to create regex matchihng strings by allowing optional spaces between numbers. - Also controlls for presence of hyphens. - - @param InWords String - Text to be modified - - @return - text with optional regex spaces between numbers - -------------------------------------------------------------*/ + /** + * Helps to create regex matching strings by allowing optional spaces between numbers. + * Also controls for presence of hyphens. + * + * @param InWords Text to be modified + * + * @return text with optional regex spaces between numbers + */ ExtNumbers := REGEXREPLACE('([0-9])' , InWords , '[ ]?$1[ ]?'); noHyph := REGEXREPLACE('[ ]?-[ ]?' , ExtNumbers , '[ ]?'); @@ -85,16 +83,15 @@ EXPORT StringTools := MODULE EXPORT ShortestWordDistance (STRING inString1, STRING inString2) := FUNCTION - - /* ------------------------------------------------------------- - Does a pairwise comparison of all words in each string, - returns the shortest distance between any two words. - - @param inString1 String - Text to be compared 1 - @param inString2 String - Text to be compared 2 - - @return - text of closest word present in both. Or '' if none - -------------------------------------------------------------*/ + /** + * Does a pairwise comparison of all words in each string, + * returns the shortest distance between any two words. + * + * @param inString1 Text to be compared 1 + * @param inString2 Text to be compared 2 + * + * @return text of closest word present in both. Or '' if none + */ //Extract must have's first as cannot be matching on two word strings and cannot be considering numbers as equal to letters. split1 := DATASET(STD.Str.SplitWords(inString1, ' '), {STRING words;}); split2 := DATASET(STD.Str.SplitWords(inString2, ' '), {STRING words;}); @@ -119,15 +116,15 @@ EXPORT StringTools := MODULE EXPORT allWordsPresentRegex (STRING aStr, STRING sep = ' ') := FUNCTION - /* ------------------------------------------------------------- - Create a regex that takes each word in the input string and - states 'all these must be present to match' - - @param aStr String - Text to be converted - @param sep String - word seperator, defaults to ' ' - - @return - Regex that will find all words in a string in any order - -------------------------------------------------------------*/ + /** + * Create a regex that takes each word in the input string and + * states 'all these must be present to match' + * + * @param aStr Text to be converted + * @param sep word seperator, defaults to ' ' + * + * @return Regex that will find all words in a string in any order + */ aStr1 := REGEXREPLACE(sep, aStr, '\\\\b)(?=.*\\\\b'); aStr2 := '^(?=.*\\b' + aStr1 + '\\b).*$'; RETURN aStr2; @@ -135,17 +132,16 @@ EXPORT StringTools := MODULE EXPORT makeBOW(STRING aStr, STRING sep = ' ') := FUNCTION - - /* ------------------------------------------------------------- - Generates a unique, alphabetised word list from a string. - - @param aStr String - Text to be converted - @param sep String - word seperator, defaults to ' ' - - @return - an alphabetised list of all words present. - - TODO: SHOULD BE A MACRO, THIS IS CONVOLUTED. - -------------------------------------------------------------*/ + /** + * Generates a unique, alphabetised word list from a string. + * + * @param aStr Text to be converted + * @param sep word seperator, defaults to ' ' + * + * @return an alphabetised list of all words present. + * + * TODO: SHOULD BE A MACRO, THIS IS CONVOLUTED. + */ lower := std.str.tolowercase(aStr); noPunct := REGEXREPLACE('[^0-9a-z]', lower, ' '); @@ -162,21 +158,18 @@ EXPORT StringTools := MODULE EXPORT regexLoopOld(inStr, regex, replacement) := FUNCTIONMACRO - - /* ------------------------------------------------------------- - - ***DEPRICATION WARNING*** Use new version (at top of this module!) - - Loops through two sets and conducts a number of regex substitutions. - Takes two Sets as regex and replacement. - - - @param inStr - a string to correct - @param regex - a set containing regex statements to sub - @param replacement - what to sub the regex statements with - - @return - string with all regexes applied in order - -------------------------------------------------------------*/ + /** + * DEPRICATION WARNING Use new version (at top of this module!) + * + * Loops through two sets and conducts a number of regex substitutions. + * Takes two Sets as regex and replacement. + * + * @param inStr a string to correct + * @param regex a set containing regex statements to sub + * @param replacement what to sub the regex statements with + * + * @return string with all regexs applied in order + */ IMPORT std; diff --git a/TransformTools.ecl b/TransformTools.ecl index 74cbaef..0a25894 100644 --- a/TransformTools.ecl +++ b/TransformTools.ecl @@ -1,38 +1,38 @@ - /* ------------------------------------------------------------- - This MODULE has been created to ease the proces of data exploration in - ECL. Ever wanted to quickly combine two columns without writing a project? - Wanted to output to CSV but had to lookup the syntax? Do a grouped Count - without writing a RECORD definition? Then this package is for you! The - main aim is to make exploration and QA quicker, more logical and easier - to read quickly by using shorthand notation and descriptive verbs. Try - it, we know you'll love it! Inspired by the very excellent dplyr package - in R. - - Note that to make these functions work from within a module or similar - make sure any function or layout calls are shared. Also, you MUST - have imported transformtools as tt otherwise internal imports will - break. - - Dev note, always trim(,ALL) a #TEXT command, it adds spaces - ------------------------------------------------------------- - */ - - //TODO: Should all functions have a LOCAL inDS := inDataSet? This resolves any concatenations or filters before you start the process - // but may cause massive slowdown as the compiler MIGHT read it and thus seperate out the projects + /** + * This MODULE has been created to ease the proces of data exploration in + * ECL. Ever wanted to quickly combine two columns without writing a project? + * Wanted to output to CSV but had to lookup the syntax? Do a grouped Count + * without writing a RECORD definition? Then this package is for you! The + * main aim is to make exploration and QA quicker, more logical and easier + * to read quickly by using shorthand notation and descriptive verbs. Try + * it, we know you'll love it! Inspired by the very excellent dplyr package + * in R. + * + * Note that to make these functions work from within a module or similar + * make sure any function or layout calls are shared. Also, you MUST + * have imported transformtools as tt otherwise internal imports will + * break. + * + * Dev note, always trim(,ALL) a #TEXT command, it adds spaces + * + */ + +//TODO: Should all functions have a LOCAL inDS := inDataSet? This resolves any concatenations or filters before you start the process +// but may cause massive slowdown as the compiler MIGHT read it and thus seperate out the projects EXPORT TransformTools := MODULE EXPORT NAMES(inDS) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function takes a dataset and returns the names of all the columns as - 'name1, name2....' so note it returns a string that needs parsing. - - @param inDs DataSet - the dataset to get column names from. - - @return - FullList - list of column names, seperated by spaces. - ------------------------------------------------------------- - */ + /** + * Function takes a dataset and returns the names of all the columns as + * 'name1, name2....' so note it returns a string that needs parsing. + * + * @param inDs the dataset to get column names from. + * + * @return list of column names, seperated by spaces. + * + */ IMPORT std; #EXPORTXML(DSxml, RECORDOF(inDS)); @@ -57,23 +57,23 @@ EXPORT TransformTools := MODULE EXPORT DeSelfer(inDS, inComm) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function takes a command in the form of var + var and modifies - it to LEFT.var + LEFT.var. Allows for - function calls such as REGEXFIND('aaa', LEFT.x). It will not - work for joins (ie adding RIGHT). Returns a string - that will need #EXPAND() to use in a project. - - WARNING:if you pass a command where a variable name matches a - function call you will get odd results and errors. Same issue - for strings. In such cases specify LEFT and you'll be fine. - - @param inDS DataSet - the dataset to get colulmn names from. - @param inComm ECL - the command to parse, in raw ECL form, not string. - - @return - ECL - An updataed ECL command with self. and left. added. - ------------------------------------------------------------- - */ + /** + * Function takes a command in the form of var + var and modifies + * it to LEFT.var + LEFT.var. Allows for + * function calls such as REGEXFIND('aaa', LEFT.x). It will not + * work for joins (ie adding RIGHT). Returns a string + * that will need #EXPAND() to use in a project. + * + * WARNING:if you pass a command where a variable name matches a + * function call you will get odd results and errors. Same issue + * for strings. In such cases specify LEFT and you'll be fine. + * + * @param inDS the dataset to get colulmn names from. + * @param inComm the command to parse, in raw ECL form, not string. + * + * @return An updataed ECL command with self. and left. added. + * + */ LOCAL columns1 := tt.names(inDS); LOCAL columns2 := REGEXREPLACE(' ', columns1, '|', NOCASE); @@ -96,16 +96,16 @@ EXPORT TransformTools := MODULE EXPORT DROP(inDS, dropCols) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function takes a dataset and a string containing a list columns to - drop. This should take the form of 'col1, col2, col3......' - - @param inDS DataSet - the dataset to change - @param dropCols String - the columns to drop, seperaeted by , - - @return - DataSet without the given columns - ------------------------------------------------------------- - */ + /** + * Function takes a dataset and a string containing a list columns to + * drop. This should take the form of 'col1, col2, col3......' + * + * @param inDS DataSet - the dataset to change + * @param dropCols String - the columns to drop, seperaeted by , + * + * @return DataSet without the given columns + * + */ LOCAL outDS := PROJECT(inDS, TRANSFORM(RECORDOF(LEFT) AND NOT [#EXPAND(dropCols)], SELF := LEFT)); @@ -114,15 +114,15 @@ EXPORT TransformTools := MODULE EXPORT DROP_ASIS(inDS, dropCol) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function takes a single column name as ECL (not a string) to drop - - @param inDS DataSet - the dataset to change - @param dropCol ECL - the column to drop - - @return - DataSet without the given column - ------------------------------------------------------------- - */ + /** + * Function takes a single column name as ECL (not a string) to drop + * + * @param inDS the dataset to change + * @param dropCol the column to drop + * + * @return DataSet without the given column + * + */ LOCAL outDS := PROJECT(inDS, TRANSFORM(RECORDOF(LEFT) AND NOT [dropCol], SELF := LEFT)); @@ -131,16 +131,16 @@ EXPORT TransformTools := MODULE ENDMACRO; EXPORT RENAME(inDS, currentName, newName) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function renames given column in the input DS - - @param inDS DataSet - the dataset to change - @param currentName ECL - current column name - @param newName ECL - name to replace current with - - @return - DataSet with renamed column - ------------------------------------------------------------- - */ + /** + * Function renames given column in the input DS + * + * @param inDS the dataset to change + * @param currentName current column name + * @param name to replace current with + * + * @return DataSet with renamed column + * + */ LOCAL outRec := RECORD RECORDOF(inDS) AND NOT [currentName]; @@ -157,16 +157,16 @@ EXPORT TransformTools := MODULE EXPORT SELECT(inDS, keepCols) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function takes a dataset and a string containing a list columns to - keep. This should take the form of 'col1, col2, col3......' - - @param inDS DataSet - the dataset to change - @param keepCols String - the columns to keep - - @return - DataSet with all other columns dropped - ------------------------------------------------------------- - */ + /** + * Function takes a dataset and a string containing a list columns to + * keep. This should take the form of 'col1, col2, col3......' + * + * @param inDS the dataset to change + * @param keepCols the columns to keep + * + * @return DataSet with all other columns dropped + * + */ //This line is a bodge, yes. What it does is prevent an error when you select all columns in a DS //Can happen, especially if you use this for function calls LOCAL tempDS := tt.append(inDS, INTEGER1, THISISATEMPORARYFIELDADDEDBYROBMANSFIELDON20180301, 1); @@ -180,19 +180,19 @@ EXPORT TransformTools := MODULE EXPORT SELECT_ASIS(inDS, keepCol) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function takes a dataset and a the name of a column to keep - - @param inDS DataSet - the dataset to change - @param keepCol ECL - the column to keep - - @return - DataSet with all but one column dropped - ------------------------------------------------------------- - */ + /** + * Function takes a dataset and a the name of a column to keep + * + * @param inDS the dataset to change + * @param keepCol the column to keep + * + * @return DataSet with all but one column dropped + * + */ - //This line is a bodge, yes. What it does is prevent an error when you select all columns in a DS - //Can happen, especially if you use this for function calls - LOCAL tempDS := tt.append(inDS, INTEGER1, THISISATEMPORARYFIELDADDEDBYROBMANSFIELDON20180301, 1); + //This line is a bodge, yes. What it does is prevent an error when you select all columns in a DS + //Can happen, especially if you use this for function calls + LOCAL tempDS := tt.append(inDS, INTEGER1, THISISATEMPORARYFIELDADDEDBYROBMANSFIELDON20180301, 1); LOCAL dropCols := {RECORDOF(inDS) AND NOT [keepCol]}; LOCAL outRec := {RECORDOF(inDS) AND NOT dropCols}; LOCAL outDS := PROJECT(inDS, TRANSFORM(outRec, SELF := LEFT)); @@ -201,20 +201,20 @@ EXPORT TransformTools := MODULE EXPORT MUTATE(inDS, mutateColIn, comm) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function performs a transformation command on the given DS. - column definition must already exist in the source DS, if you - want to create a new column use append. Note that you may supply - a transform without SELF or LEFT. It will be parsed with - DeSelfer function. - - @param inDS DataSet - the dataset to change - @param String - mutateColIn - column to mutate - @param comm ECL- transform command - - @return - DataSet with updated column - ------------------------------------------------------------- - */ + /** + * Function performs a transformation command on the given DS. + * column definition must already exist in the source DS, if you + * want to create a new column use append. Note that you may supply + * a transform without SELF or LEFT. It will be parsed with + * DeSelfer function. + * + * @param inDS the dataset to change + * @param mutateColIn column to mutate + * @param comm transform command + * + * @return DataSet with updated column + * + */ LOCAL mutateCol := 'SELF.' + REGEXREPLACE('^SELF\\s*\\.', TRIM(#TEXT(mutateColIn), ALL), '', NOCASE); @@ -227,21 +227,21 @@ EXPORT TransformTools := MODULE EXPORT MUTATE_OLD(inDS, comm) := FUNCTIONMACRO - /* ------------------------------------------------------------- - **DEPRECATION WARNING** - - Function performs a transformation command on the given DS. - SELF definition must already exist in the source DS, if you - want to create a new column use append. Unlike the new form - (above) this version requires the full SELF.x := LEFT.y - transform. Kept for backwards compatability - - @param inDs DataSet - the dataset to change - @param comm ECL - transform command - - @return - DataSet with updated column - ------------------------------------------------------------- - */ + /** + * DEPRECATION WARNING + * + * Function performs a transformation command on the given DS. + * SELF definition must already exist in the source DS, if you + * want to create a new column use append. Unlike the new form + * (above) this version requires the full SELF.x := LEFT.y + * transform. Kept for backwards compatability + * + * @param inDs the dataset to change + * @param comm transform command + * + * @return DataSet with updated column + * + */ LOCAL outDS := PROJECT(inDS, TRANSFORM(RECORDOF(LEFT), comm, @@ -250,45 +250,45 @@ EXPORT TransformTools := MODULE ENDMACRO; EXPORT APPEND(inDS, colType, colName, comm) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function creates a new column in the inserted DS, dictated by - colType and colName. The transform for the new column is dictated - by the comm command. Only one column can be added at a time. Note - that you do not need to specify SELF or LEFT, these are added at - runtime using DeSelfer function. - - @param inDS DataSet - the dataset to change - @param colType ECL - the type of the column to add - @param colName ECL - the name of the column to add - @param comm ECL - transform command - - @return - DataSet with updated column - ------------------------------------------------------------- - */ + /** + * Function creates a new column in the inserted DS, dictated by + * colType and colName. The transform for the new column is dictated + * by the comm command. Only one column can be added at a time. Note + * that you do not need to specify SELF or LEFT, these are added at + * runtime using DeSelfer function. + * + * @param inDS the dataset to change + * @param colType the type of the column to add + * @param colName the name of the column to add + * @param comm transform command + * + * @return DataSet with updated column + * + */ LOCAL outDS := PROJECT(inDS, TRANSFORM({RECORDOF(LEFT), colType colName}, SELF.colName := #EXPAND(tt.deSelfer(inDS, comm)); SELF := LEFT)); RETURN outDS; - ENDMACRO; + ENDMACRO; EXPORT APPEND_OLD(inDS, colType, colName, comm) := FUNCTIONMACRO - /* ------------------------------------------------------------- - **DEPRECATION WARNING** - - Function creates a new column in the inserted DS, dictated by - colType and colName. The transform for the new column is dictated - by the comm command. Only one column can be added at a time. - - @param inDs - the dataset to change - @param colType ECL - the type of the column to add - @param colName ECL - the name of the column to add - @param comm ECL - transform command - - @return - DataSet with added column - ------------------------------------------------------------- - */ + /** + * DEPRECATION WARNING + * + * Function creates a new column in the inserted DS, dictated by + * colType and colName. The transform for the new column is dictated + * by the comm command. Only one column can be added at a time. + * + * @param inDs the dataset to change + * @param colType the type of the column to add + * @param colName the name of the column to add + * @param comm transform command + * + * @return DataSet with added column + * + */ LOCAL outDS := PROJECT(inDS, TRANSFORM({RECORDOF(LEFT), colType colName}, SELF.colName := comm; @@ -298,26 +298,26 @@ EXPORT TransformTools := MODULE EXPORT FILTERSET(inDS, aCol, filterSetIn, isin = TRUE) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function filters a dataset, similarly to the IN command but - without the extra overhead that it entails (basically does a - join under the bonnet). Takes a dataset and a column to filter - on, followed by a set to filter on (which is transformed to a - table at readin). The isin command (defaulted to true) will - determine if you are filtering for x IN y (true) or x NOT IN y - (false). - - Note that using two datasets and tt.filter() is faster and - preferred in most cases. - - @param inDS DataSet - the dataset to change - @param aCol ECL - the column in inDS to filter on - @param filterSetIn Set - a set to filter upon - @param isin Boolean - do you want the filterset to be in the column (true) or not (false) - - @return - DataSet - Filtered Dataset - ------------------------------------------------------------- - */ + /** + * Function filters a dataset, similarly to the IN command but + * without the extra overhead that it entails (basically does a + * join under the bonnet). Takes a dataset and a column to filter + * on, followed by a set to filter on (which is transformed to a + * table at readin). The isin command (defaulted to true) will + * determine if you are filtering for x IN y (true) or x NOT IN y + * (false). + * + * Note that using two datasets and tt.filter() is faster and + * preferred in most cases. + * + * @param inDS the dataset to change + * @param aCol the column in inDS to filter on + * @param filterSetIn a set to filter upon + * @param isin do you want the filterset to be in the column (true) or not (false) + * + * @return Filtered Dataset + * + */ LOCAL filterDSfromSet := DATASET(filterSetIn, {STRING match;}); LOCAL uniqueFilterDS := DEDUP(SORT(DISTRIBUTE(filterDSfromSet, HASH(match)), match, LOCAL), match, LOCAL); @@ -330,52 +330,52 @@ EXPORT TransformTools := MODULE ENDMACRO; - EXPORT FILTER(inDS, filterDS, inCol, filterCol, isin = TRUE) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Function filters a dataset based on the contents of another. - Takes a dataset and a dataset to filter on, followed by a - the relevant column names. The isin command (defaulted to true) will - determine if you are filtering for x IN y (true) or x NOT IN y - (false). - - Note that the input is deduped before filtering, no need to do this - yourself. - - @param inDS DataSet - the dataset to change - @param filterDS DataSet - the DS to filter by - @param inCol ECL - the column in the original dataset - @param filterCol ECL- the column in the dataset to filter by - @param isin Boolean - do you want the filter to be in the column (true) or not (false) - - @return - DataSet - Filtered Dataset - -------------------------------------------------------------*/ + EXPORT FILTER(inDS, filterDS, inCol, filterCol, isin = TRUE) := FUNCTIONMACRO + /** + * Function filters a dataset based on the contents of another. + * Takes a dataset and a dataset to filter on, followed by a + * the relevant column names. The isin command (defaulted to true) will + * determine if you are filtering for x IN y (true) or x NOT IN y + * (false). + * + * Note that the input is deduped before filtering, no need to do this + * yourself. + * + * @param inDS the dataset to change + * @param filterDS the DS to filter by + * @param inCol the column in the original dataset + * @param filterCol the column in the dataset to filter by + * @param isin do you want the filter to be in the column (true) or not (false) + * + * @return Filtered Dataset + */ LOCAL FilterColDS := TABLE(filterDS, {TYPEOF(filterDS.filterCol) filterCol := filterDS.filterCol}); LOCAL uniqueDS := DEDUP(SORT(DISTRIBUTE(FilterColDS, HASH(filterCol)), filterCol, LOCAL), filterCol, LOCAL); LOCAL filteredDS := IF(isin, - JOIN(inDS, uniqueDS, LEFT.inCol = RIGHT.filterCol, TRANSFORM(RECORDOF(LEFT), SELF := LEFT), INNER, SMART), - JOIN(inDS, uniqueDS, LEFT.inCol = RIGHT.filterCol, TRANSFORM(RECORDOF(LEFT), SELF := LEFT), LEFT ONLY, SMART)); + JOIN(inDS, uniqueDS, LEFT.inCol = RIGHT.filterCol, TRANSFORM(RECORDOF(LEFT), SELF := LEFT), INNER, SMART), + JOIN(inDS, uniqueDS, LEFT.inCol = RIGHT.filterCol, TRANSFORM(RECORDOF(LEFT), SELF := LEFT), LEFT ONLY, SMART)); LOCAL outDS := filteredDS; - RETURN outDS; + RETURN outDS; ENDMACRO; EXPORT DISTINCT_ASIS(inputDataSet, DedupOn, DistributeFlag = TRUE) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Performs a dedup with optional distribution, if only DistributeOn - parameter given then it's sorted and deduped on the same value. - Note that this only takes a single column name (although concatenated - columns are allowed(but, careful! I'd add a seperator there!)) - - @param inDS DataSet - the dataset to change - @param DedupOn ECL - column to distribute by (and dedup on if other parameters not given) - @param DistributeFlag Boolean - if FALSE then don't re-distribute - - @return - DataSet that has been deduped - - TODO: allow DedupOn to be '' and dedup on whole dataset. - ------------------------------------------------------------- - */ + /** + * Performs a dedup with optional distribution, if only DistributeOn + * parameter given then it's sorted and deduped on the same value. + * Note that this only takes a single column name (although concatenated + * columns are allowed(but, careful! I'd add a seperator there!)) + * + * @param inDS the dataset to change + * @param DedupOn column to distribute by (and dedup on if other parameters not given) + * @param DistributeFlag if FALSE then don't re-distribute + * + * @return DataSet that has been deduped + * + * TODO: allow DedupOn to be '' and dedup on whole dataset. + * + */ LOCAL distdInDs := IF(DistributeFlag, DISTRIBUTE(inputDataSet, HASH32(DedupOn)), inputDataSet); LOCAL sortedDs := SORT(distdInDs, DedupOn, LOCAL); LOCAL dedDS := DEDUP(sortedDs, DedupOn, LOCAL); @@ -384,22 +384,22 @@ EXPORT TransformTools := MODULE ENDMACRO; EXPORT DISTINCT(inputDataSet, DedupOn, DistributeFlag = TRUE) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Performs a dedup with optional distribution, if only DistributeOn - parameter given then it's sorted and deduped on the same value. - Unlike the _ASIS call (above) this version can take a string in - the form of 'col1, col2, col3....' allowing multiple columns to be - used. - - @param inDS DataSet - the dataset to change - @param DedupOn String - columns to distribute by and dedup on, comma seperated - @param DistributeFlag Boolean - if FALSE then don't re-distribute - - @return - DataSet that has been deduped - - TODO: allow dedup to be '' and distribute on whole dataset. - ------------------------------------------------------------- - */ + /** + * Performs a dedup with optional distribution, if only DistributeOn + * parameter given then it's sorted and deduped on the same value. + * Unlike the _ASIS call (above) this version can take a string in + * the form of 'col1, col2, col3....' allowing multiple columns to be + * used. + * + * @param inDS the dataset to change + * @param DedupOn columns to distribute by and dedup on, comma seperated + * @param DistributeFlag if FALSE then don't re-distribute + * + * @return DataSet that has been deduped + * + * TODO: allow dedup to be '' and distribute on whole dataset. + * + */ LOCAL distdInDs := IF(DistributeFlag, DISTRIBUTE(inputDataSet, HASH32(#EXPAND(DedupOn))), inputDataSet); LOCAL sortedDs := SORT(distdInDs, #EXPAND(DedupOn), LOCAL); LOCAL dedDS := DEDUP(sortedDs, #EXPAND(DedupOn), LOCAL); @@ -410,69 +410,68 @@ EXPORT TransformTools := MODULE EXPORT ARRANGE_ASIS(inputDataSet, SortOn) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Arranges the input data set by the given column name, takes a single - column (not a string, see ARRANGE_TXT for that functionality). - - @param inDS DataSet - the dataset to change - @param SortOn ECL - column to arrange by - - @return - DataSet that has been sorted - - TODO: allow sorton to be '' and sort on whole dataset. - ------------------------------------------------------------- - */ + /** + * Arranges the input data set by the given column name, takes a single + * column (not a string, see ARRANGE_TXT for that functionality). + * + * @param inDS the dataset to change + * @param SortOn column to arrange by + * + * @return DataSet that has been sorted + * + * TODO: allow sorton to be '' and sort on whole dataset. + * + */ LOCAL sortedDs := SORT(inputDataSet, SortOn); - // IF(DistributeFlag, - // SORT(DISTRIBUTE(inputDataSet, HASH(SortOn)), SortOn, LOCAL), - // SORT(inputDataSet, SortOn) - // ); + // IF(DistributeFlag, + // SORT(DISTRIBUTE(inputDataSet, HASH(SortOn)), SortOn, LOCAL), + // SORT(inputDataSet, SortOn) + // ); RETURN sortedDs; ENDMACRO; EXPORT ARRANGE(inputDataSet, SortOn) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Arranges the input data set by the given column name, takes a - string of column names in the form of 'col1, col2...' - (See ARRANGE for a single column call without the string). - - @param inDS DataSet - the dataset to change - @param SortOn String - columns to arrange by, comma seperated - - @return - DataSet that has been sorted - - TODO: allow sorton to be '' and sort on whole dataset. - ------------------------------------------------------------- + /** + * Arranges the input data set by the given column name, takes a + * string of column names in the form of 'col1, col2...' + * (See ARRANGE for a single column call without the string). + * + * @param inDS the dataset to change + * @param SortOn columns to arrange by, comma seperated + * + * @return DataSet that has been sorted + * + * TODO: allow sorton to be '' and sort on whole dataset. + * */ // LOCAL distVar := STD.Str.SplitWords(SortOn,',')[1]; // LOCAL sortedDs := IF(DistributeFlag, - // SORT(DISTRIBUTE(inputDataSet, HASH(#EXPAND(SortOn))), #EXPAND(SortOn), LOCAL), - // SORT(inputDataSet, #EXPAND(SortOn)) - // ); + // SORT(DISTRIBUTE(inputDataSet, HASH(#EXPAND(SortOn))), #EXPAND(SortOn), LOCAL), + // SORT(inputDataSet, #EXPAND(SortOn)) + // ); sortedDs := SORT(inputDataSet, #EXPAND(SortOn)); RETURN sortedDs; ENDMACRO; EXPORT ARRANGEDISTINCT(inDS, DedupOn, SortOn, DistOn, DistributeFlag = TRUE) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Performs a distribute/sort/distinct opeation, with less boiler plate code. - - @param inDS DataSet - the dataset to dedup - @param DedupOn String - columns to Dedup on, comma seperated - @param SortOn String - columns to Sort on, comma seperated - @param DistOn String - columns to Distribute on, comma seperated - @param DistributeFlag Boolean - if FALSE then don't re-distribute - - @return - DataSet that has been dedupped. - - TODO: allow dedup to be '' and do on whole dataset. - ------------------------------------------------------------- - */ + /** + * Performs a distribute/sort/distinct opeation, with less boiler plate code. + * + * @param inDS the dataset to dedup + * @param DedupOn columns to Dedup on, comma seperated + * @param SortOn columns to Sort on, comma seperated + * @param DistOn columns to Distribute on, comma seperated + * @param DistributeFlag if FALSE then don't re-distribute + * + * @return DataSet that has been dedupped. + * + * TODO: allow dedup to be '' and do on whole dataset. + */ LOCAL distdInDs := IF(DistributeFlag, DISTRIBUTE(inDS, HASH32(#EXPAND(DistOn))), inDS); LOCAL sortedDs := SORT(distdInDs, #EXPAND(SortOn), LOCAL); @@ -484,25 +483,24 @@ EXPORT TransformTools := MODULE EXPORT DUPLICATED(inDS, colName) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Adds a column to the given DS that flags if it is a duplicate - or not. ALL duplicates are flagged, not just those after the first - if you want a dedup then see: DISTINCT. New column is called: - duplicated_[columnName]. - - @param inDS DataSet - the dataset to change - @param colName ECL - column to check for dups on - - @return - DataSet with an extra column: duplicated_[colName] - ------------------------------------------------------------- - */ - + /** + * Adds a column to the given DS that flags if it is a duplicate + * or not. ALL duplicates are flagged, not just those after the first + * if you want a dedup then see: DISTINCT. New column is called: + * duplicated_[columnName]. + * + * @param inDS the dataset to change + * @param colName column to check for dups on + * + * @return DataSet with an extra column: duplicated_[colName] + * + */ LOCAL TempDS1 := tt.rename(inDS, colName, grp); LOCAL TempDS := tt.select_asis(TempDS1, grp); LOCAL CountRec := {grp := TempDS.grp; n := COUNT(GROUP)}; LOCAL counts := TABLE(TempDS, CountRec, grp, MERGE); - LOCAL dupColName := 'duplicated_' + #TEXT(colName); + LOCAL dupColName := 'duplicated_' + #TEXT(colName); LOCAL dupedRecs := JOIN(inDS, counts, LEFT.colName = RIGHT.grp, @@ -516,16 +514,15 @@ EXPORT TransformTools := MODULE EXPORT COUNTN(inDS, GroupColumns) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Produce a count based on the given grouping variables. Takes a - string so multiple values can be given - - @param inDS DataSet - the dataset to count - @param GroupColumns String - the columns to group on, comma seperated - - @return - DataSet of counts based on grouping columns - ------------------------------------------------------------- - */ + /** + * Produce a count based on the given grouping variables. Takes a + * string so multiple values can be given + * + * @param inDS the dataset to count + * @param GroupColumns the columns to group on, comma seperated + * + * @return DataSet of counts based on grouping columns + */ LOCAL neededDS := tt.select(inDS, GroupColumns); LOCAL countRec := {neededDS; INTEGER n := COUNT(GROUP);}; @@ -537,16 +534,16 @@ EXPORT TransformTools := MODULE EXPORT TO_CSV(inDS, outName, EXPIRY = 365) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Writes a CSV without having to remember the whole syntax. - - @param inDs DataSet - the dataset to output - @param outName String - the CSV name, will auto add ~ if omitted - @param EXPIRY Integer - expiry of the output, defaults to 365 days. - - @return - String indicating write location - ------------------------------------------------------------- - */ + /** + * Writes a CSV without having to remember the whole syntax. + * + * @param inDs the dataset to output + * @param outName the CSV name, will auto add ~ if omitted + * @param EXPIRY expiry of the output, defaults to 365 days. + * + * @return String indicating write location + * + */ outNameCorr := IF(REGEXFIND('~', outName, NOCASE), outName, '~' + outName); OUTPUT(inDS, , outNameCorr, OVERWRITE, CSV(HEADING(SINGLE), QUOTE('"')), EXPIRE(EXPIRY), OVERWRITE); @@ -556,16 +553,16 @@ EXPORT TransformTools := MODULE EXPORT TO_THOR(inDS, outName, EXPIRY = 365) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Writes a thor file without having to remember the whole syntax. - - @param inDs DataSet - the dataset to output - @param outName String - the CSV name, will auto add ~ if omitted - @param EXPIRY Ineger - expiry of the output, defaults to 365 days. - - @return - String indicating write location - ------------------------------------------------------------- - */ + /** + * Writes a thor file without having to remember the whole syntax. + * + * @param inDs the dataset to output + * @param outName the CSV name, will auto add ~ if omitted + * @param EXPIRY expiry of the output, defaults to 365 days. + * + * @return String indicating write location + * + */ outNameCorr := IF(REGEXFIND('~', outName, NOCASE), outName, '~' + outName); OUTPUT(inDS, ,outNameCorr, THOR, EXPIRE(EXPIRY), OVERWRITE); @@ -575,18 +572,17 @@ EXPORT TransformTools := MODULE EXPORT HEAD(inDS, /* nameIn = '' ,*/ nrows = 100) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Ouputs a table as a named output with only the top few rows visible. - - Do note that multiple calls to the same dataset name will error out your - workunit! - - @param inDS DataSet - the dataset to output - @param nrows Integer - rows to display, defaults to 100. - - @return - Output of the required length and the name of the dataset - ------------------------------------------------------------- - */ + /** + * Ouputs a table as a named output with only the top few rows visible. + * Do note that multiple calls to the same dataset name will error out your + * workunit! + * + * @param inDS the dataset to output + * @param nrows rows to display, defaults to 100. + * + * @return Output of the required length and the name of the dataset + * + */ // LOCAL nameOut := IF(nameIn = '', REGEXREPLACE('[^a-z0-9]', #TEXT(inDataSet), '', NOCASE), REGEXREPLACE('[^a-z0-9]', nameIn, '', NOCASE)); LOCAL nameOut := REGEXREPLACE('[^a-z0-9]', #TEXT(inDS), '', NOCASE); @@ -597,19 +593,18 @@ EXPORT TransformTools := MODULE EXPORT NROWS(inDS) := FUNCTIONMACRO - /* ------------------------------------------------------------- - Counts the rows in a dataset and produces an output with a sensible - name. This allows you to quickly view counts without having to create - a named output box or new DataSet. - - Do note that multiple calls to the same dataset name will error out your - workunit! - - @param inDS DataSet - the dataset to count - - @return - Output of the row number and the name COUNT[dataset name] - ------------------------------------------------------------- - */ + /** + * Counts the rows in a dataset and produces an output with a sensible + * name. This allows you to quickly view counts without having to create + * a named output box or new DataSet. + * + * Do note that multiple calls to the same dataset name will error out your + * workunit! + * + * @param inDS the dataset to count + * + * @return Output of the row number and the name COUNT[dataset name] + */ IMPORT std; LOCAL nameOut := 'COUNT' + std.str.tolowercase(REGEXREPLACE('[^a-z0-9]', #TEXT(inDS), '', NOCASE));