diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..11a34bd --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,26 @@ +name: Deploy +on: + push: + tags: + - '[0-9]*.[0-9]*.[0-9]*' +jobs: + deploy: + runs-on: ubuntu-latest + env: + APP_IMAGE: keboola-component + KBC_DEVELOPERPORTAL_USERNAME: ${{ secrets.KBC_DEVELOPERPORTAL_USERNAME }} + KBC_DEVELOPERPORTAL_PASSWORD: ${{ secrets.KBC_DEVELOPERPORTAL_PASSWORD }} + KBC_DEVELOPERPORTAL_VENDOR: ${{ secrets.KBC_DEVELOPERPORTAL_VENDOR }} + KBC_DEVELOPERPORTAL_APP: ${{ secrets.KBC_DEVELOPERPORTAL_APP }} + steps: + - uses: actions/checkout@v2 + - name: Build image + run: docker build -t ${APP_IMAGE} . + - name: Set tag env + run: echo "GITHUB_TAG=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + - name: Deploy + run: | + chmod +x ./deploy.sh + - name: Run scripts + run: | + ./deploy.sh \ No newline at end of file diff --git a/.github/workflows/pushtest.yml b/.github/workflows/pushtest.yml new file mode 100644 index 0000000..b9ca08d --- /dev/null +++ b/.github/workflows/pushtest.yml @@ -0,0 +1,23 @@ +name: PushToTest +on: push +jobs: + pushtotest: + runs-on: ubuntu-latest + env: + APP_IMAGE: keboola-component + KBC_DEVELOPERPORTAL_USERNAME: ${{ secrets.KBC_DEVELOPERPORTAL_USERNAME }} + KBC_DEVELOPERPORTAL_PASSWORD: ${{ secrets.KBC_DEVELOPERPORTAL_PASSWORD }} + KBC_DEVELOPERPORTAL_VENDOR: ${{ secrets.KBC_DEVELOPERPORTAL_VENDOR }} + KBC_DEVELOPERPORTAL_APP: ${{ secrets.KBC_DEVELOPERPORTAL_APP }} + steps: + - uses: actions/checkout@v2 + - name: Build image + run: docker build -t ${APP_IMAGE} . + - name: Push to ECR + run: | + docker pull quay.io/keboola/developer-portal-cli-v2:latest + export REPOSITORY=`docker run --rm -e KBC_DEVELOPERPORTAL_USERNAME -e KBC_DEVELOPERPORTAL_PASSWORD -e KBC_DEVELOPERPORTAL_URL quay.io/keboola/developer-portal-cli-v2:latest ecr:get-repository $KBC_DEVELOPERPORTAL_VENDOR $KBC_DEVELOPERPORTAL_APP` + docker tag $APP_IMAGE:latest $REPOSITORY:test + eval $(docker run --rm -e KBC_DEVELOPERPORTAL_USERNAME -e KBC_DEVELOPERPORTAL_PASSWORD -e KBC_DEVELOPERPORTAL_URL quay.io/keboola/developer-portal-cli-v2:latest ecr:get-login $KBC_DEVELOPERPORTAL_VENDOR $KBC_DEVELOPERPORTAL_APP) + docker push $REPOSITORY:test + docker pull quay.io/keboola/syrup-cli:latest \ No newline at end of file diff --git a/.gitignore b/.gitignore index b83d222..e4984d4 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target/ +.idea \ No newline at end of file diff --git a/src/main/java/keboola/adform/masterdata_extractor/Extractor.java b/src/main/java/keboola/adform/masterdata_extractor/Extractor.java index 2613d67..bfc79dd 100644 --- a/src/main/java/keboola/adform/masterdata_extractor/Extractor.java +++ b/src/main/java/keboola/adform/masterdata_extractor/Extractor.java @@ -45,18 +45,18 @@ public List downloadAndUnzip(List fileList, String folde return Collections.emptyList(); } //download files - downloadFiles(fileList, folderPath); - - List rawFilePaths; - //unzip archives and delete data - List exFiles = unzip(fileList, folderPath); - //delete zipFiles - try { - FileHandler.deleteFile(folderPath + File.separator + fileList.get(0).getPrefix()); - } catch (IOException ex) { - logger.error(ex.getMessage(), ex); - } - return exFiles; + downloadFiles(fileList, "/data/out/files"); + +// List rawFilePaths; +// //unzip archives and delete data +// List exFiles = unzip(fileList, folderPath); +// //delete zipFiles +// try { +// FileHandler.deleteFile(folderPath + File.separator + fileList.get(0).getPrefix()); +// } catch (IOException ex) { +// logger.error(ex.getMessage(), ex); +// } + return null; } diff --git a/src/main/java/keboola/adform/masterdata_extractor/Runner.java b/src/main/java/keboola/adform/masterdata_extractor/Runner.java index 8df5765..e115c0a 100644 --- a/src/main/java/keboola/adform/masterdata_extractor/Runner.java +++ b/src/main/java/keboola/adform/masterdata_extractor/Runner.java @@ -5,12 +5,7 @@ import java.io.File; import java.io.IOException; import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Collections; -import java.util.Date; -import java.util.List; -import java.util.TimeZone; +import java.util.*; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -73,7 +68,7 @@ public static void main(String[] args) { System.exit(1); } boolean dataExtracted = false; - Extractor ex = new Extractor(config.getParams().getUser(), config.getParams().getPass(), config.getParams().getMdListId(), log); + Extractor ex = new Extractor(config.getParams().getUser(), config.getParams().getPass(), config.getParams().getMdListId(), log); try { //authenticate, get session token ex.client.authenticate(); @@ -128,69 +123,69 @@ public static void main(String[] args) { String resFileFolder = outTablesPath + File.separator + prefix.toLowerCase() + ".csv"; List downloadedFiles = ex.downloadAndUnzip(filesSince, resFileFolder); - /*This should not happen, check anyway*/ - if (downloadedFiles.isEmpty()) { - System.out.print("Error downloading files with prefix: " + prefix); - System.err.print("Error downloading files with prefix: " + prefix); - System.exit(1); - } - - //merge downloaded files - String resFileName = prefix.toLowerCase(); - System.out.println("Preparing sliced tables..."); - String[] headerCols = null; - try { - headerCols = prepareSlicedTables(downloadedFiles, config.getParams().getSrcCharset()); - } catch (Exception e) { - System.err.println("Error processing files." + e.getMessage()); - System.exit(2); - } +// /*This should not happen, check anyway*/ +// if (downloadedFiles.isEmpty()) { +// System.out.print("Error downloading files with prefix: " + prefix); +// System.err.print("Error downloading files with prefix: " + prefix); +// System.exit(1); +// } +// +// //merge downloaded files +// String resFileName = prefix.toLowerCase(); +// System.out.println("Preparing sliced tables..."); +// String[] headerCols = null; +// try { +// headerCols = prepareSlicedTables(downloadedFiles, config.getParams().getSrcCharset()); +// } catch (Exception e) { +// System.err.println("Error processing files." + e.getMessage()); +// System.exit(2); +// } +// +// /*Build manifest file*/ +// try { +// String[] pkey = null; +// if (!config.getParams().getKeyMap().containsKey(prefix)) { +// pkey = new String[] {MD_PRIMARY_KEY}; +// } else { +// pkey = config.getParams().getKeyMap().get(prefix); +// } +// buildManifestFile(resFileName, config.getParams().getBucket(), outTablesPath, headerCols, pkey, true); +// } catch (Exception ex1) { +// System.out.println("Error writing manifest file." + ex1.getMessage()); +// System.err.println(ex1.getMessage()); +// System.exit(2); +// } +// i++; +// dataExtracted = true; +// } +// +// +// if (config.hasMeta()) { +// System.out.println("Downloading meta files"); +// List filesSince = null; +// if (config.getParams().isAlwaysGetMeta()) { +// filesSince = fileList.getFilesByPrefix("meta"); +// } else if (config.getParams().getDate_to() != null) { +// filesSince = fileList.getFilesSince(startInterval, config.getParams().getDate_to(), "meta"); +// } else { +// filesSince = fileList.getFilesSince(startInterval, "meta"); +// } +// +// +// List metaFiles = ex.downloadAndUnzip(filesSince, dataPath); +// +// dataExtracted = processMetaDataFiles(metaFiles, config, dataPath, outTablesPath); - /*Build manifest file*/ - try { - String[] pkey = null; - if (!config.getParams().getKeyMap().containsKey(prefix)) { - pkey = new String[] {MD_PRIMARY_KEY}; - } else { - pkey = config.getParams().getKeyMap().get(prefix); - } - buildManifestFile(resFileName, config.getParams().getBucket(), outTablesPath, headerCols, pkey, true); - } catch (Exception ex1) { - System.out.println("Error writing manifest file." + ex1.getMessage()); - System.err.println(ex1.getMessage()); - System.exit(2); - } - i++; - dataExtracted = true; } - - if (config.hasMeta()) { - System.out.println("Downloading meta files"); - List filesSince = null; - if (config.getParams().isAlwaysGetMeta()) { - filesSince = fileList.getFilesByPrefix("meta"); - } else if (config.getParams().getDate_to() != null) { - filesSince = fileList.getFilesSince(startInterval, config.getParams().getDate_to(), "meta"); - } else { - filesSince = fileList.getFilesSince(startInterval, "meta"); - } - - - List metaFiles = ex.downloadAndUnzip(filesSince, dataPath); - - dataExtracted = processMetaDataFiles(metaFiles, config, dataPath, outTablesPath); - - } - - if (dataExtracted && i > 0) { - System.out.println("Files extracted successfully.."); - } else if (!dataExtracted) { - System.out.println("Proccess finished successfully but no meta files were extracted. Check configuration parameters."); - } else { - System.out.println("Proccess finished successfully but only metadata tables were extracted. Check configuration parameters."); - } - System.exit(0); +// if (dataExtracted && i > 0) { +// System.out.println("Files extracted successfully.."); +// } else if (!dataExtracted) { +// System.out.println("Proccess finished successfully but no meta files were extracted. Check configuration parameters."); +// } else { +// System.out.println("Proccess finished successfully but only metadata tables were extracted. Check configuration parameters."); +// } +// System.exit(0); } catch (ExtractorException ex1) { System.out.print("Error extracting data."); System.err.print(ex1.getMessage()); @@ -202,7 +197,7 @@ private static boolean processMetaDataFiles(List metaFiles, KBCConfi if (metaFiles.isEmpty()) { log.warn("No new metadata were retrieved!"); return false; - } + } /*Convert from JSON to csv*/ JsonToCsvConvertor conv = new JsonToCsvConvertor(); for (String metaF : config.getParams().getMetaFiles()) { @@ -242,15 +237,23 @@ private static boolean processMetaDataFiles(List metaFiles, KBCConfi } return true; } - private static void buildManifestFile(String resFileName, String destination, String outPath, String [] cols, String [] pkey, boolean incremental) throws Exception { - ManifestFile.Builder builder = new ManifestFile.Builder(resFileName, destination + "." + resFileName) - .setIncrementalLoad(incremental).setDelimiter(String.valueOf(DEFAULT_SEPARATOR)).setEnclosure(String.valueOf(DEFAULT_ENCLOSURE)) - .setColumns(cols); - if (pkey != null) { - builder.setPrimaryKey(pkey); - } - ManifestFile manFile = builder.build(); - ManifestBuilder.buildManifestFile(manFile, outPath, resFileName + ".csv"); + + private static void buildManifestFile(String resFileName, String destination, String outPath, String[] cols, String[] pkey, boolean incremental) throws Exception { + // remove BOM because some files contain it and KBC is incapable of handling it + if (cols != null) { + System.out.println(cols[0]); + cols[0] = CsvUtils.removeUTF8BOM(cols[0]); + } + + System.out.println(Arrays.toString(cols)); + ManifestFile.Builder builder = new ManifestFile.Builder(resFileName, destination + "." + resFileName) + .setIncrementalLoad(incremental).setDelimiter(String.valueOf(DEFAULT_SEPARATOR)).setEnclosure(String.valueOf(DEFAULT_ENCLOSURE)) + .setColumns(cols); + if (pkey != null) { + builder.setPrimaryKey(pkey); + } + ManifestFile manFile = builder.build(); + ManifestBuilder.buildManifestFile(manFile, outPath, resFileName + ".csv"); } private static String[] prepareSlicedTables(List downloadedFiles, String charset) throws Exception { @@ -258,32 +261,32 @@ private static String[] prepareSlicedTables(List downloadedFiles, St for(MasterFile mf : downloadedFiles) { files.add(new File(mf.getLocalAbsolutePath())); } - + // get colums String[] headerCols = CsvUtils.readHeader(files.get(0), DEFAULT_SEPARATOR, DEFAULT_ENCLOSURE, DEFAULT_ESCAPE_CHAR, false, false, Charset.forName(charset)); // remove headers and create results for (File mFile : files) { - CsvUtils.removeHeaderFromCsv(mFile, Charset.forName(charset)); + CsvUtils.removeHeaderFromCsv(mFile, Charset.forName(charset)); } //in case some files did not contain any data - CsvUtils.deleteEmptyFiles(files); + CsvUtils.deleteEmptyFiles(files); return headerCols; - + } private static void printEnvStats() { // Get current size of heap in bytes - long heapSize = Runtime.getRuntime().totalMemory(); + long heapSize = Runtime.getRuntime().totalMemory(); // Get maximum size of heap in bytes. The heap cannot grow beyond this size.// Any attempt will result in an OutOfMemoryException. long heapMaxSize = Runtime.getRuntime().maxMemory(); // Get amount of free memory within the heap in bytes. This size will increase // after garbage collection and decrease as new objects are created. long heapFreeSize = Runtime.getRuntime().freeMemory(); - + log.info("Initial Heap size (MB): " + heapSize/1000000); log.info("Max Heap size (MB): " + heapMaxSize/1000000); - log.info("Initial free memory (MB): " + heapFreeSize/1000000); + log.info("Initial free memory (MB): " + heapFreeSize/1000000); } } diff --git a/src/main/java/keboola/adform/masterdata_extractor/utils/CsvUtils.java b/src/main/java/keboola/adform/masterdata_extractor/utils/CsvUtils.java index 0ae4da1..485a877 100644 --- a/src/main/java/keboola/adform/masterdata_extractor/utils/CsvUtils.java +++ b/src/main/java/keboola/adform/masterdata_extractor/utils/CsvUtils.java @@ -22,118 +22,120 @@ import au.com.bytecode.opencsv.CSVWriter; /** - * * @author David Esner * @created 2016 */ public class CsvUtils { - /** - * Removes first line from the specified file. Using NIO - fast. - * - * @param csvFile - * @throws IOException - */ - public static void removeHeaderFromCsv(File csvFile, Charset charset) throws Exception { - File outFile = new File(csvFile.getParent() + File.separator + "tempRes"); - try ( - InputStreamReader fr = new InputStreamReader( new FileInputStream(csvFile),charset); - BufferedReader br = new BufferedReader(fr); - FileWriter fileStream = new FileWriter(outFile); - BufferedWriter out = new BufferedWriter(fileStream); - ) { - String line; - br.readLine(); - while ((line = br.readLine()) != null) { - out.write(line); - out.newLine(); - } - out.close(); - fileStream.close(); - } - - csvFile.delete(); - outFile.renameTo(csvFile); - } - - private static final boolean isNL(int character) { - if ((character == -1)) { - return false; - } - return ((((char) character == '\n') || ((char) character == '\r'))); - } - - public static void deleteEmptyFiles(List files) { - for (File f : files) { - try { - if (isFileEmpty(f)) { - f.delete(); - } - } catch (IOException e) { - // do nothing, I really dont care here - } - } - } - - public static boolean isFileEmpty(File f) throws IOException { - BufferedReader br = null; - try { - br = new BufferedReader(new FileReader(f)); - String line = br.readLine(); - return StringUtils.isBlank(line); - } finally { - if (br != null) - br.close(); - } - } - - private static char[] readLineWithNL(FileInputStream in) throws IOException { - try { - int hLen = 0; - - ArrayList chars = new ArrayList(); - int ch = in.read(); - chars.add((char) ch); - while (!isNL(ch)) { - ch = in.read(); - chars.add((char) ch); - } - boolean isNl = true; - while (isNl) { - ch = in.read(); - if (isNL(ch)) { - chars.add((char) ch); - isNl = true; - } else { - isNl = false; - } - hLen++; - } - char[] charArray = new char[chars.size()]; - for (int i = 0; i < chars.size(); i++) { - charArray[i] = chars.get(i); - } - return charArray; - } catch (IOException ex) { - throw ex; - } - } - - public static String[] readHeader(File csvFile, char separator, char quotechar, char escape, boolean strictQuotes, - boolean ignoreLeadingWhiteSpace, Charset charset) throws Exception { - String[] headers = null; - - try (InputStreamReader freader = new InputStreamReader( new FileInputStream(csvFile),charset); - CSVReader csvreader = new CSVReader(freader, separator, quotechar, escape, 0, strictQuotes, - ignoreLeadingWhiteSpace);) { - - headers = csvreader.readNext(); - if (headers == null) { - throw new Exception("Error reading csv file header: " + csvFile.getName()); - } - freader.close(); - } - return headers; - } + + public static final String UTF8_BOM = "\uFEFF"; + + /** + * Removes first line from the specified file. Using NIO - fast. + * + * @param csvFile + * @throws IOException + */ + public static void removeHeaderFromCsv(File csvFile, Charset charset) throws Exception { + File outFile = new File(csvFile.getParent() + File.separator + "tempRes"); + try ( + InputStreamReader fr = new InputStreamReader(new FileInputStream(csvFile), charset); + BufferedReader br = new BufferedReader(fr); + FileWriter fileStream = new FileWriter(outFile); + BufferedWriter out = new BufferedWriter(fileStream); + ) { + String line; + br.readLine(); + while ((line = br.readLine()) != null) { + out.write(line); + out.newLine(); + } + out.close(); + fileStream.close(); + } + + csvFile.delete(); + outFile.renameTo(csvFile); + } + + private static final boolean isNL(int character) { + if ((character == -1)) { + return false; + } + return ((((char) character == '\n') || ((char) character == '\r'))); + } + + public static void deleteEmptyFiles(List files) { + for (File f : files) { + try { + if (isFileEmpty(f)) { + f.delete(); + } + } catch (IOException e) { + // do nothing, I really dont care here + } + } + } + + public static boolean isFileEmpty(File f) throws IOException { + BufferedReader br = null; + try { + br = new BufferedReader(new FileReader(f)); + String line = br.readLine(); + return StringUtils.isBlank(line); + } finally { + if (br != null) + br.close(); + } + } + + private static char[] readLineWithNL(FileInputStream in) throws IOException { + try { + int hLen = 0; + + ArrayList chars = new ArrayList(); + int ch = in.read(); + chars.add((char) ch); + while (!isNL(ch)) { + ch = in.read(); + chars.add((char) ch); + } + boolean isNl = true; + while (isNl) { + ch = in.read(); + if (isNL(ch)) { + chars.add((char) ch); + isNl = true; + } else { + isNl = false; + } + hLen++; + } + char[] charArray = new char[chars.size()]; + for (int i = 0; i < chars.size(); i++) { + charArray[i] = chars.get(i); + } + return charArray; + } catch (IOException ex) { + throw ex; + } + } + + public static String[] readHeader(File csvFile, char separator, char quotechar, char escape, boolean strictQuotes, + boolean ignoreLeadingWhiteSpace, Charset charset) throws Exception { + String[] headers = null; + + try (InputStreamReader freader = new InputStreamReader(new FileInputStream(csvFile), charset); + CSVReader csvreader = new CSVReader(freader, separator, quotechar, escape, 0, strictQuotes, + ignoreLeadingWhiteSpace);) { + + headers = csvreader.readNext(); + if (headers == null) { + throw new Exception("Error reading csv file header: " + csvFile.getName()); + } + freader.close(); + } + return headers; + } /** * Validates the structure of the merged csv files. @@ -144,9 +146,9 @@ public static String[] readHeader(File csvFile, char separator, char quotechar, * @throws Exception */ public static boolean dataStructureMatch(Collection fileNames, String folderPath) throws Exception { - if (fileNames == null || fileNames.isEmpty()) { - return true; - } + if (fileNames == null || fileNames.isEmpty()) { + return true; + } String[] headers = null; String headerLine = ""; String currFile = ""; @@ -195,4 +197,11 @@ public static boolean dataStructureMatch(Collection fileNames, String fo throw new Exception("Error reading csv file: " + currFile + " " + ex.getMessage()); } } + + public static String removeUTF8BOM(String s) { + if (s.startsWith(UTF8_BOM)) { + s = s.substring(1); + } + return s; + } }