diff --git a/karma-commands/commands-worksheet/src/main/java/edu/isi/karma/controller/command/worksheet/SplitColumnByDelimiter.java b/karma-commands/commands-worksheet/src/main/java/edu/isi/karma/controller/command/worksheet/SplitColumnByDelimiter.java index 68a859f78..c17212d60 100644 --- a/karma-commands/commands-worksheet/src/main/java/edu/isi/karma/controller/command/worksheet/SplitColumnByDelimiter.java +++ b/karma-commands/commands-worksheet/src/main/java/edu/isi/karma/controller/command/worksheet/SplitColumnByDelimiter.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; @@ -10,6 +11,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.kenai.jffi.Array; + import au.com.bytecode.opencsv.CSVReader; import edu.isi.karma.controller.command.selection.SuperSelection; import edu.isi.karma.er.helper.CloneTableUtils; @@ -35,7 +38,7 @@ public class SplitColumnByDelimiter { private SuperSelection selection; private final String newhNodeId; private String splitValueHNodeId; - + private String regExSplitter; private final Logger logger = LoggerFactory.getLogger(this.getClass()); public SplitColumnByDelimiter(String hNodeId, Worksheet worksheet, @@ -47,6 +50,7 @@ public SplitColumnByDelimiter(String hNodeId, Worksheet worksheet, this.workspace = workspace; this.selection = sel; this.newhNodeId = null; + this.regExSplitter = ""; } public SplitColumnByDelimiter(String hNodeId, String newhNodeId, Worksheet worksheet, @@ -58,6 +62,7 @@ public SplitColumnByDelimiter(String hNodeId, String newhNodeId, Worksheet works this.workspace = workspace; this.newhNodeId = newhNodeId; this.selection = sel; + this.regExSplitter = ""; } public String getSplitValueHNodeId() { @@ -86,14 +91,7 @@ public void split(HashMap oldNodeValueMap, } // Convert the delimiter into character primitive type - char delimiterChar; - if (delimiter.equalsIgnoreCase("space")) - delimiterChar = ' '; - else if (delimiter.equalsIgnoreCase("tab")) - delimiterChar = '\t'; - else { - delimiterChar = new Character(delimiter.charAt(0)); - } + char delimiterChar = getDelimiterChar(); Collection nodes = new ArrayList<>(); worksheet.getDataTable().collectNodes(selectedPath, nodes, selection); @@ -125,10 +123,21 @@ else if (delimiter.equalsIgnoreCase("tab")) if (originalVal != null && !originalVal.equals("")) { // Split the values - CSVReader reader = new CSVReader(new StringReader(originalVal), - delimiterChar); + try { - String[] rowValues = reader.readNext(); + String[] rowValues; + int startIndex = 0; + if(delimiterChar == '\u0000') { + rowValues = originalVal.split(regExSplitter); + //Ignore first empty one + if(rowValues.length > 0 && rowValues[0].length() == 0) + startIndex = 1; + } else { + CSVReader reader = new CSVReader(new StringReader(originalVal), + delimiterChar); + rowValues = reader.readNext(); + reader.close(); + } if (rowValues == null || rowValues.length == 0) continue; @@ -136,7 +145,7 @@ else if (delimiter.equalsIgnoreCase("tab")) Table table = node.getNestedTable(); // Add the row one by one - for (int i = 0; i < rowValues.length; i++) { + for (int i = startIndex; i < rowValues.length; i++) { String rowVal = rowValues[i]; if (!rowVal.trim().equals("")) { Row row = table.addRow(factory); @@ -144,7 +153,7 @@ else if (delimiter.equalsIgnoreCase("tab")) NodeStatus.edited, factory); } } - reader.close(); + } catch (IOException e) { logger.error("Error reading Line: " + originalVal, e); } @@ -182,26 +191,28 @@ public void split() throws IOException { RepFactory factory = workspace.getFactory(); HTable ht = factory.getHTable(factory.getHNode(hNodeId).getHTableId()); List tables = new ArrayList<>(); - char delimiterChar; - - if (delimiter.equalsIgnoreCase("space")) - delimiterChar = ' '; - else if (delimiter.equalsIgnoreCase("tab")) - delimiterChar = '\t'; - else { - delimiterChar = new Character(delimiter.charAt(0)); - } + char delimiterChar = getDelimiterChar(); + CloneTableUtils.getDatatable(worksheet.getDataTable(), ht, tables, selection); for (Table t : tables) { for (Row r : t.getRows(0, t.getNumRows(), selection)) { String orgValue = r.getNeighbor(hNodeId).getValue().asString(); - CSVReader reader = new CSVReader(new StringReader(orgValue), - delimiterChar); - String[] rowValues = reader.readNext(); - reader.close(); + String[] rowValues; + int startIndex = 0; + if(delimiterChar == '\u0000') { + rowValues = orgValue.split(regExSplitter); + ////Ignore first empty one + if(rowValues.length > 0 && rowValues[0].length() == 0) + startIndex = 1; + } else { + CSVReader reader = new CSVReader(new StringReader(orgValue), + delimiterChar); + rowValues = reader.readNext(); + reader.close(); + } if(rowValues != null) { Node newNode = r.getNeighbor(newhNodeId); - for (int i = 0; i < rowValues.length; i++) { + for (int i = startIndex; i < rowValues.length; i++) { Row dest = newNode.getNestedTable().addRow(factory); Node destNode = dest.getNeighborByColumnName("Values", factory); destNode.setValue(rowValues[i], NodeStatus.original, factory); @@ -210,4 +221,23 @@ else if (delimiter.equalsIgnoreCase("tab")) } } } + + private char getDelimiterChar() { + char delimiterChar; + if (delimiter.equalsIgnoreCase("space")) + delimiterChar = ' '; + else if (delimiter.equalsIgnoreCase("tab")) + delimiterChar = '\t'; + else if (delimiter.equalsIgnoreCase("character")) { + delimiterChar = '\u0000'; + regExSplitter = ""; + } else if(delimiter.toLowerCase().startsWith("regex:")) { + delimiterChar = '\u0000'; + regExSplitter = delimiter.substring(6); + } else { + delimiterChar = new Character(delimiter.charAt(0)); + } + return delimiterChar; + } + } diff --git a/karma-common/src/main/java/edu/isi/karma/modeling/alignment/GraphUtil.java b/karma-common/src/main/java/edu/isi/karma/modeling/alignment/GraphUtil.java index f94a23811..7c94a4a82 100644 --- a/karma-common/src/main/java/edu/isi/karma/modeling/alignment/GraphUtil.java +++ b/karma-common/src/main/java/edu/isi/karma/modeling/alignment/GraphUtil.java @@ -576,7 +576,7 @@ private static void writeNode(JsonWriter writer, Node node, boolean writeNodeAnn writer.name("rdfLiteralType"); if (cn.getRdfLiteralType() == null) writer.value(nullStr); else writeLabel(writer, cn.getRdfLiteralType()); - if(cn.getLanguage() == null) writer.value(nullStr); + if(cn.getLanguage() == null) writer.name("language").value(nullStr); else writer.name("language").value(cn.getLanguage()); writer.name("userSemanticTypes"); if (cn.getUserSemanticTypes() == null) writer.value(nullStr); diff --git a/karma-spark/src/main/java/edu/isi/karma/spark/UtilitiesDriver.java b/karma-spark/src/main/java/edu/isi/karma/spark/UtilitiesDriver.java new file mode 100644 index 000000000..8be6952aa --- /dev/null +++ b/karma-spark/src/main/java/edu/isi/karma/spark/UtilitiesDriver.java @@ -0,0 +1,76 @@ +package edu.isi.karma.spark; + +import java.io.IOException; + +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.PairFunction; +import org.json.JSONObject; +import org.json.XML; + +import scala.Tuple2; + +public class UtilitiesDriver { + + public static JavaPairRDD XMLToJSON(JavaSparkContext jsc, + JavaPairRDD input) throws IOException { + return input.mapToPair(new PairFunction, String, String>() { + private static final long serialVersionUID = 2878941073410454935L; + + @Override + public Tuple2 call(Tuple2 t) + throws Exception { + String key = t._1(); + JSONObject value = new JSONObject(t._2()); + String raw = value.getString("_rawContent"); + JSONObject json = getJsonFromXml(raw); + value.put("_jsonRep", json); + return new Tuple2(key, value.toString()); + } + }); + } + + public static JavaRDD XMLToJSON(JavaSparkContext jsc, + JavaRDD input) throws IOException { + JavaPairRDD inputPair = input.mapToPair(new PairFunction() { + private static final long serialVersionUID = -4153068088292891034L; + + public Tuple2 call(String s) throws Exception { + int tabIndex = s.indexOf("\t"); + return new Tuple2<>(s.substring(0, tabIndex), s.substring(tabIndex + 1)); + } + }); + + JavaPairRDD pairs = XMLToJSON(jsc, inputPair); + return pairs.map(new Function, String>() { + + private static final long serialVersionUID = 5833358013516510838L; + + @Override + public String call(Tuple2 arg0) throws Exception { + return (arg0._1() + "\t" + arg0._2()); + } + }); + } + + public static org.json.JSONObject getJsonFromXml(String xmlStr) { + return XML.toJSONObject(xmlStr); + } + + /* + * method to convert xml to json + */ + public static String getJsonFromXml(String xmlStr, boolean prettyOutput) { + org.json.JSONObject xmlJSONObj = XML.toJSONObject(xmlStr); + String jsonStr = ""; + if(prettyOutput) + jsonStr = xmlJSONObj.toString(4); + else + jsonStr = xmlJSONObj.toString(); + + + return jsonStr; + } +} \ No newline at end of file diff --git a/karma-typer/pom.xml b/karma-typer/pom.xml index bdfb1d68e..2fe6e95e2 100644 --- a/karma-typer/pom.xml +++ b/karma-typer/pom.xml @@ -36,7 +36,7 @@ org.apache.commons commons-math3 - 3.4-modified + 3.6.1 diff --git a/karma-web/src/main/webapp/js/semanticTypes.js b/karma-web/src/main/webapp/js/semanticTypes.js index 6ca82e02f..46d2af687 100755 --- a/karma-web/src/main/webapp/js/semanticTypes.js +++ b/karma-web/src/main/webapp/js/semanticTypes.js @@ -2635,6 +2635,21 @@ var AddLiteralNodeDialog = (function() { info["workspaceId"] = $.workspaceGlobalInformation.id; var newInfo = []; var literal = $("#literal", dialog).val(); + + if(literal.length == 0) { + alert("Please enter the literal"); + e.preventDefault(); + return; + } + + if(dialogMode == "addWithProperty") { + var property = propertyUI.getSelectedProperty(); + if(property.uri == "") { + alert("Please select a property"); + e.preventDefault(); + return; + } + } var literalType = $("#literalType", dialog).val(); var language = $("#literalLanguage", dialog).val(); var isUri = $("input#isUri").is(":checked"); diff --git a/karma-web/src/main/webapp/js/tableColumnOptions.js b/karma-web/src/main/webapp/js/tableColumnOptions.js index be14f25df..1840c691b 100755 --- a/karma-web/src/main/webapp/js/tableColumnOptions.js +++ b/karma-web/src/main/webapp/js/tableColumnOptions.js @@ -732,11 +732,13 @@ var SplitValueDialog = (function() { if (!delimiter) { validationResult = false; - } else if (delimiter != "space" && delimiter != "tab" && delimiter.length != 1) { + } else if (delimiter != "space" && delimiter != "tab" + && delimiter != "character" && delimiter.indexOf("regex:") != 0 + && delimiter.length != 1) { validationResult = false; } if (!validationResult) { - showError("Length of the delimter should be 1"); + showError("Length of the delimter should be 1 or it should start with regex:"); $("#valueSplitDelimiter", dialog).focus(); return false; } diff --git a/karma-web/src/main/webapp/tableColumnDialogs.jsp b/karma-web/src/main/webapp/tableColumnDialogs.jsp index 49c4abb71..4521a55e8 100644 --- a/karma-web/src/main/webapp/tableColumnDialogs.jsp +++ b/karma-web/src/main/webapp/tableColumnDialogs.jsp @@ -139,6 +139,8 @@ Enter "space" to use single space Enter "tab" to use tab + Enter "character" to split by every character + Enter "regex:" followed by a Regular Expression to split using java's String.split method diff --git a/karma-web/src/main/webapp/version.jsp b/karma-web/src/main/webapp/version.jsp index e491f9a63..a6716dbed 100644 --- a/karma-web/src/main/webapp/version.jsp +++ b/karma-web/src/main/webapp/version.jsp @@ -1 +1 @@ -v2.046 +v2.047