diff --git a/java-lib/modelset-lib/pom.xml b/java-lib/modelset-lib/pom.xml
index 3ad0d01..12c4026 100644
--- a/java-lib/modelset-lib/pom.xml
+++ b/java-lib/modelset-lib/pom.xml
@@ -82,6 +82,12 @@
1.15.0
+
+
+ info.picocli
+ picocli
+ 4.5.2
+
diff --git a/java-lib/modelset-lib/src/main/java/modelset/database/DuplicationAdapterProvider.java b/java-lib/modelset-lib/src/main/java/modelset/database/DuplicationAdapterProvider.java
new file mode 100644
index 0000000..ebe573a
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/database/DuplicationAdapterProvider.java
@@ -0,0 +1,35 @@
+package modelset.database;
+
+import java.sql.SQLException;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import javax.annotation.Nonnull;
+
+import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
+import modelset.process.DuplicationDatabase;
+
+public class DuplicationAdapterProvider implements IFileProvider {
+
+ private IFileProvider provider;
+ private DuplicationDatabase dupDb;
+
+ public DuplicationAdapterProvider(IFileProvider provider, DuplicationDatabase dupDb) {
+ this.provider = provider;
+ this.dupDb = dupDb;
+ }
+
+ @Override
+ public @Nonnull List extends IFileInfo> getLocalFiles() {
+ List extends IFileInfo> files = provider.getLocalFiles();
+ return files.stream().filter(f -> {
+ try {
+ return dupDb.isGroupRepresentive(f.getModelId());
+ } catch (SQLException e) {
+ throw new IllegalStateException(e);
+ }
+ }).collect(Collectors.toList());
+ }
+
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/database/FilterAdapterFileProvider.java b/java-lib/modelset-lib/src/main/java/modelset/database/FilterAdapterFileProvider.java
new file mode 100644
index 0000000..c2a9752
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/database/FilterAdapterFileProvider.java
@@ -0,0 +1,27 @@
+package modelset.database;
+
+import java.util.List;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+
+import javax.annotation.Nonnull;
+
+import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
+
+public class FilterAdapterFileProvider implements IFileProvider {
+
+ private IFileProvider provider;
+ private Predicate predicate;
+
+ public FilterAdapterFileProvider(IFileProvider provider, Predicate predicate) {
+ this.provider = provider;
+ this.predicate = predicate;
+ }
+
+ @Override
+ public @Nonnull List extends IFileInfo> getLocalFiles() {
+ List extends IFileInfo> files = provider.getLocalFiles();
+ return files.stream().filter(this.predicate).collect(Collectors.toList());
+ }
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/database/MarAnalysisFileProvider.java b/java-lib/modelset-lib/src/main/java/modelset/database/MarAnalysisFileProvider.java
new file mode 100644
index 0000000..d217b2f
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/database/MarAnalysisFileProvider.java
@@ -0,0 +1,50 @@
+package modelset.database;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DatabaseMetaData;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.annotation.Nonnull;
+
+import mar.validation.AnalysisDB;
+import mar.validation.AnalysisDB.Model;
+import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
+
+public class MarAnalysisFileProvider implements IFileProvider {
+
+ private List files;
+
+ public MarAnalysisFileProvider(File db, File repoRoot, String type) throws SQLException, IOException {
+ if (! db.exists())
+ throw new IllegalArgumentException("File " + db + " does not exist");
+
+ try (AnalysisDB analysis = new AnalysisDB(db)) {
+ this.files = new ArrayList();
+ for (Model m : analysis.getValidModels(f -> f)) {
+ files.add(new PlainFileInfo(m.getId(), m.getRelativePath().toString(), repoRoot));
+ }
+ }
+ }
+
+ @Override
+ public @Nonnull List extends IFileInfo> getLocalFiles() {
+ return files;
+ }
+
+ public static boolean isMarDb(File f) throws SQLException {
+ try(Connection connection = DriverManager.getConnection(AnalysisDB.getConnectionString(f))) {
+ DatabaseMetaData dmd = connection.getMetaData();
+ ResultSet rs = dmd.getTables(null, null, "models", new String[] { "TABLE" });
+ ResultSet rs2 = dmd.getTables(null, null, "stats", new String[] { "TABLE" });
+ return rs.next() && rs2.next();
+ }
+ }
+
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/database/MarCrawlerFileProvider.java b/java-lib/modelset-lib/src/main/java/modelset/database/MarCrawlerFileProvider.java
new file mode 100644
index 0000000..bc23d73
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/database/MarCrawlerFileProvider.java
@@ -0,0 +1,43 @@
+package modelset.database;
+
+import java.io.File;
+import java.sql.Connection;
+import java.sql.DatabaseMetaData;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.annotation.Nonnull;
+
+import mar.ingestion.CrawlerDB;
+import mar.ingestion.IngestedModel;
+import mar.validation.IFileProvider;
+
+public class MarCrawlerFileProvider implements IFileProvider {
+
+ private List files;
+
+ public MarCrawlerFileProvider(File db, File repoRoot, String type) throws SQLException {
+ CrawlerDB crawlerDb = new CrawlerDB(type, "github", repoRoot.getAbsolutePath(), db);
+ this.files = new ArrayList();
+ for (IngestedModel m : crawlerDb.getModels()) {
+ files.add(new PlainFileInfo(m.getModelId(), m.getRelativePath(), repoRoot));
+ }
+ }
+
+ @Override
+ public @Nonnull List extends PlainFileInfo> getLocalFiles() {
+ return files;
+ }
+
+ public static boolean isCrawlerDb(File f) throws SQLException {
+ try(Connection connection = DriverManager.getConnection(CrawlerDB.getConnectionString(f))) {
+ DatabaseMetaData dmd = connection.getMetaData();
+ ResultSet rs = dmd.getTables(null, null, "data", new String[] { "TABLE" });
+ ResultSet rs2 = dmd.getTables(null, null, "repo_info", new String[] { "TABLE" });
+ return rs.next() && rs2.next();
+ }
+ }
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/database/MaxModelsFileProvider.java b/java-lib/modelset-lib/src/main/java/modelset/database/MaxModelsFileProvider.java
new file mode 100644
index 0000000..9b845f5
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/database/MaxModelsFileProvider.java
@@ -0,0 +1,25 @@
+package modelset.database;
+
+import java.util.List;
+
+import javax.annotation.Nonnull;
+
+import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
+
+public class MaxModelsFileProvider implements IFileProvider {
+
+ private IFileProvider provider;
+ private int size;
+
+ public MaxModelsFileProvider(IFileProvider provider, int size) {
+ this.provider = provider;
+ this.size = size;
+ }
+
+ @Override
+ public @Nonnull List extends IFileInfo> getLocalFiles() {
+ return provider.getLocalFiles().subList(0, size);
+ }
+
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/database/MinSizeAdapterProvider.java b/java-lib/modelset-lib/src/main/java/modelset/database/MinSizeAdapterProvider.java
new file mode 100644
index 0000000..4f2d711
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/database/MinSizeAdapterProvider.java
@@ -0,0 +1,56 @@
+package modelset.database;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import javax.annotation.Nonnull;
+
+import org.eclipse.emf.common.util.TreeIterator;
+import org.eclipse.emf.ecore.EObject;
+import org.eclipse.emf.ecore.resource.Resource;
+
+import mar.modelling.loader.ILoader;
+import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
+
+public class MinSizeAdapterProvider implements IFileProvider {
+
+ private IFileProvider provider;
+ private ILoader loader;
+ private int minSize;
+
+ public MinSizeAdapterProvider(IFileProvider provider, int minSize, ILoader loader) {
+ this.provider = provider;
+ this.loader = loader;
+ this.minSize = minSize;
+ }
+
+ @Override
+ public @Nonnull List extends IFileInfo> getLocalFiles() {
+ List extends IFileInfo> files = provider.getLocalFiles();
+ return files.stream().filter(f -> {
+ try {
+ Resource r = loader.toEMF(f.getFullFile());
+ boolean b = hasEnoughElements(r);
+ r.unload();
+ return b;
+ } catch (IOException e) {
+ e.printStackTrace();
+ return false;
+ }
+ }).collect(Collectors.toList());
+ }
+
+ private boolean hasEnoughElements(Resource r) {
+ int count = 0;
+ TreeIterator it = r.getAllContents();
+ while (it.hasNext()) {
+ count++;
+ if (count > minSize)
+ return true;
+ }
+ return false;
+ }
+
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ModelSetFileInfo.java b/java-lib/modelset-lib/src/main/java/modelset/database/ModelSetFileInfo.java
similarity index 88%
rename from java-lib/modelset-lib/src/main/java/modelset/process/ModelSetFileInfo.java
rename to java-lib/modelset-lib/src/main/java/modelset/database/ModelSetFileInfo.java
index 4f0c224..5971381 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/process/ModelSetFileInfo.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/database/ModelSetFileInfo.java
@@ -1,4 +1,4 @@
-package modelset.process;
+package modelset.database;
import java.io.File;
@@ -40,4 +40,8 @@ public ModelSetFileInfo(String id, String filename, File repoRoot, ParsedMetadat
public ParsedMetadata getMetadata() {
return metadata;
}
+
+ public String getCategory() {
+ return metadata.getValues("category").get(0);
+ }
}
\ No newline at end of file
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ModelSetFileProvider.java b/java-lib/modelset-lib/src/main/java/modelset/database/ModelSetFileProvider.java
similarity index 76%
rename from java-lib/modelset-lib/src/main/java/modelset/process/ModelSetFileProvider.java
rename to java-lib/modelset-lib/src/main/java/modelset/database/ModelSetFileProvider.java
index 55abdea..4c2e4e9 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/process/ModelSetFileProvider.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/database/ModelSetFileProvider.java
@@ -1,7 +1,8 @@
-package modelset.process;
+package modelset.database;
import java.io.File;
import java.sql.Connection;
+import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
@@ -11,7 +12,6 @@
import javax.annotation.Nonnull;
-import mar.validation.IFileInfo;
import mar.validation.IFileProvider;
import modelset.metadata.AnnotationsValidator;
import modelset.metadata.AnnotationsValidator.ParsedMetadata;
@@ -50,4 +50,12 @@ private static String getConnectionString(File file) {
return "jdbc:sqlite:" + file.getAbsolutePath();
}
+ public static boolean isModelSetDb(File f) throws SQLException {
+ try(Connection connection = DriverManager.getConnection(getConnectionString(f))) {
+ DatabaseMetaData dmd = connection.getMetaData();
+ ResultSet rs = dmd.getTables(null, null, "metadata", new String[] { "TABLE" });
+ ResultSet rs2 = dmd.getTables(null, null, "repositories", new String[] { "TABLE" });
+ return rs.next() && rs2.next();
+ }
+ }
}
\ No newline at end of file
diff --git a/java-lib/modelset-lib/src/main/java/modelset/database/PlainFileInfo.java b/java-lib/modelset-lib/src/main/java/modelset/database/PlainFileInfo.java
new file mode 100644
index 0000000..1cd69db
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/database/PlainFileInfo.java
@@ -0,0 +1,36 @@
+package modelset.database;
+
+import java.io.File;
+
+import javax.annotation.Nonnull;
+
+import mar.validation.IFileInfo;
+
+public class PlainFileInfo implements IFileInfo {
+
+ private String id;
+ private String filename;
+ private File repoRoot;
+
+ public PlainFileInfo(String id, String filename, File repoRoot) {
+ this.id = id;
+ this.filename = filename;
+ this.repoRoot = repoRoot;
+ }
+
+ @Override
+ public @Nonnull File getFullFile() {
+ return new File(repoRoot.getAbsolutePath() + File.separator + filename);
+ }
+
+ @Override
+ public @Nonnull String getModelId() {
+ return id;
+ }
+
+ @Override
+ public @Nonnull File getRelativeFile() {
+ return new File(filename);
+ }
+
+}
\ No newline at end of file
diff --git a/java-lib/modelset-lib/src/main/java/modelset/lib/ModelSetDB.java b/java-lib/modelset-lib/src/main/java/modelset/lib/ModelSetDB.java
index d99f005..3b8ce1b 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/lib/ModelSetDB.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/lib/ModelSetDB.java
@@ -5,8 +5,8 @@
import java.util.List;
import java.util.stream.Collectors;
-import modelset.process.ModelSetFileInfo;
-import modelset.process.ModelSetFileProvider;
+import modelset.database.ModelSetFileInfo;
+import modelset.database.ModelSetFileProvider;
public class ModelSetDB {
diff --git a/java-lib/modelset-lib/src/main/java/modelset/lib/ModelSetModel.java b/java-lib/modelset-lib/src/main/java/modelset/lib/ModelSetModel.java
index b97322e..0cc6796 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/lib/ModelSetModel.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/lib/ModelSetModel.java
@@ -6,8 +6,8 @@
import org.eclipse.emf.ecore.resource.Resource;
import mar.indexer.common.configuration.ModelLoader;
+import modelset.database.ModelSetFileInfo;
import modelset.metadata.AnnotationsValidator.ParsedMetadata;
-import modelset.process.ModelSetFileInfo;
public class ModelSetModel {
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/AbstractDuplicateComputation.java b/java-lib/modelset-lib/src/main/java/modelset/process/AbstractDuplicateComputation.java
new file mode 100644
index 0000000..a39073d
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/AbstractDuplicateComputation.java
@@ -0,0 +1,76 @@
+package modelset.process;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collection;
+import java.util.function.Consumer;
+
+import javax.annotation.CheckForNull;
+
+import mar.analysis.duplicates.DuplicateFinder;
+import mar.analysis.duplicates.DuplicateFinder.DuplicationGroup;
+import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
+
+public class AbstractDuplicateComputation {
+
+ private static final double T0 = 0.8;
+ private static final double T1 = 0.7;
+
+ protected Collection> generateDuplicates(IFileProvider provider,
+ DuplicateFinder finder,
+ LoaderFunction loader,
+ @CheckForNull Consumer unloader)
+ throws SQLException, IOException, FileNotFoundException {
+
+
+ System.out.println("Loading files...");
+ int totalLoadedFiles = 0;
+ for (IFileInfo f : provider.getLocalFiles()) {
+ try {
+ System.out.println("Loading " + f.getAbsolutePath());
+ T r = loader.load(f.getFullFile());
+ finder.addResource(f, r);
+ if (unloader != null)
+ unloader.accept(r);
+ totalLoadedFiles++;
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ System.out.println("Total loaded files: " + totalLoadedFiles);
+ System.out.println("Computing duplicates...");
+ return finder.getDuplicates(T0, T1);
+ }
+
+
+ protected void dumpToDatabase(String modelType,
+ IFileProvider provider,
+ DuplicateFinder finder,
+ LoaderFunction loader, @CheckForNull Consumer unloader,
+ File outputFile) throws SQLException, IOException {
+
+ Collection> dups = generateDuplicates(provider, finder, loader, unloader);
+
+ System.out.println("Total duplication groups: " + dups.size());
+
+ DuplicationDatabase ddb = new DuplicationDatabase(outputFile);
+ String groupId = modelType + "_" + T0 + "_" + T1;
+ ddb.addDuplicationRun(groupId, T0, T1);
+ for (DuplicationGroup duplicationGroup : dups) {
+ ddb.addGroup(groupId, duplicationGroup);
+ }
+
+ ddb.close();
+
+ }
+
+
+ @FunctionalInterface
+ public static interface LoaderFunction {
+ public T load(File f) throws IOException;
+ }
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeDuplicates.java b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeDuplicates.java
index db56617..c832749 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeDuplicates.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeDuplicates.java
@@ -1,67 +1,40 @@
package modelset.process;
import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.sql.SQLException;
-import java.util.Collection;
import org.eclipse.emf.ecore.resource.Resource;
-import mar.analysis.duplicates.DuplicateFinder.DuplicationGroup;
import mar.analysis.duplicates.EcoreDuplicateFinder;
import mar.indexer.common.configuration.ModelLoader;
import mar.validation.AnalyserRegistry;
import mar.validation.IFileInfo;
import mar.validation.ResourceAnalyser.Factory;
+import modelset.database.ModelSetFileProvider;
-public class ComputeDuplicates {
-
- private static final double T0 = 0.8;
- private static final double T1 = 0.7;
+public class ComputeDuplicates extends AbstractDuplicateComputation {
public static void main(String[] args) throws Exception {
File repoFolder = new File("../../raw-data/repo-ecore-all");
File db = new File("../../datasets/dataset.ecore/data/ecore.db");
-
+
+ Factory factory = AnalyserRegistry.INSTANCE.getFactory("ecore");
+ factory.configureEnvironment();
+
+ EcoreDuplicateFinder finder = new EcoreDuplicateFinder<>();
+ ModelSetFileProvider provider = new ModelSetFileProvider(db, repoFolder);
+
ModelLoader loader = ModelLoader.DEFAULT;
- Collection> dups = generateDuplicates(repoFolder, db, "ecore", loader);
-
+
new File("../../dups/").mkdirs();
File outputFile = new File("../../dups/ecore-dups.db");
if (outputFile.exists())
outputFile.delete();
-
- DuplicationDatabase ddb = new DuplicationDatabase(outputFile);
- String groupId = "ecore_" + T0 + "_" + T1;
- ddb.addDuplicationRun(groupId, T0, T1);
- for (DuplicationGroup duplicationGroup : dups) {
- ddb.addGroup(groupId, duplicationGroup);
- }
-
- ddb.close();
-
- System.out.println("Finished");
- }
- private static Collection> generateDuplicates(File repoFolder, File db, String modelType, ModelLoader loader)
- throws SQLException, IOException, FileNotFoundException {
-
- EcoreDuplicateFinder finder = new EcoreDuplicateFinder<>();
+ ComputeDuplicates dup = new ComputeDuplicates();
+ dup.dumpToDatabase("ecore", provider, finder, loader::load, (r) -> r.unload(), outputFile);
- Factory factory = AnalyserRegistry.INSTANCE.getFactory(modelType);
- factory.configureEnvironment();
-
- System.out.println("Loading files...");
- ModelSetFileProvider provider = new ModelSetFileProvider(db, repoFolder);
- for (IFileInfo f : provider.getLocalFiles()) {
- Resource r = loader.load(f.getFullFile());
- finder.addResource(f, r);
- r.unload();
- }
-
- System.out.println("Computing duplicates...");
- return finder.getDuplicates(T0, T1);
+ System.out.println("Finished");
}
+
}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeEcoreStats.java b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeEcoreStats.java
index d7b27a9..9e299a7 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeEcoreStats.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeEcoreStats.java
@@ -6,6 +6,7 @@
import mar.validation.ISingleFileAnalyser;
import mar.validation.ResourceAnalyser;
import mar.validation.ResourceAnalyser.Factory;
+import modelset.database.ModelSetFileProvider;
public class ComputeEcoreStats {
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeEmfatic.java b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeEmfatic.java
deleted file mode 100644
index 769a48d..0000000
--- a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeEmfatic.java
+++ /dev/null
@@ -1,429 +0,0 @@
-package modelset.process;
-
-import java.io.File;
-import java.io.IOException;
-import java.sql.SQLException;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.eclipse.emf.ecore.EAttribute;
-import org.eclipse.emf.ecore.EClass;
-import org.eclipse.emf.ecore.EClassifier;
-import org.eclipse.emf.ecore.EDataType;
-import org.eclipse.emf.ecore.EEnum;
-import org.eclipse.emf.ecore.EObject;
-import org.eclipse.emf.ecore.EPackage;
-import org.eclipse.emf.ecore.EReference;
-import org.eclipse.emf.ecore.EStructuralFeature;
-import org.eclipse.emf.ecore.ETypedElement;
-import org.eclipse.emf.ecore.EcorePackage;
-import org.eclipse.emf.ecore.resource.Resource;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.io.Files;
-
-import mar.indexer.common.configuration.ModelLoader;
-import mar.validation.AnalyserRegistry;
-import mar.validation.IFileInfo;
-import mar.validation.ResourceAnalyser.Factory;
-
-public class ComputeEmfatic {
-
- private static enum Mode {
- TOKEN,
- LINE,
- FULL
- }
-
- public static void main(String[] args) throws Exception {
- if (args.length < 3) {
- System.out.println("./ComputeEmfatic mode dups/no-dups output-file");
- return;
- }
-
- Mode mode;
- if (args[0].contains("line")) {
- mode = Mode.LINE;
- } else if (args[0].contains("token")) {
- mode = Mode.TOKEN;
- } else {
- mode = Mode.FULL;
- }
-
- boolean filterDuplicates = false;
- if (args[1].contains("no-dups")) {
- filterDuplicates = true;
- }
-
-
- File outputFile = new File(args[2]);
-
- File repoFolder = new File("../../raw-data/repo-ecore-all");
- File db = new File("../../datasets/dataset.ecore/data/ecore.db");
- DuplicationDatabase dupDb = null;
- if (filterDuplicates) {
- dupDb = new DuplicationDatabase(new File("../../dups/ecore-dups.db"));
- }
-
- ModelLoader loader = ModelLoader.DEFAULT;
- generateTokenization(repoFolder, db, outputFile, "ecore", mode, dupDb, loader);
-
- dupDb.close();
- }
-
- private static void generateTokenization(File repoFolder, File db, File outputFile, String modelType, Mode mode, DuplicationDatabase dupDb, ModelLoader loader) throws SQLException, IOException {
- Factory factory = AnalyserRegistry.INSTANCE.getFactory(modelType);
- factory.configureEnvironment();
-
- Map result = new HashMap<>();
-
- CodeXGlueOutput all = new CodeXGlueOutput(mode);
- ModelSetFileProvider provider = new ModelSetFileProvider(db, repoFolder);
- for (IFileInfo f : provider.getLocalFiles()) {
- if (dupDb != null) {
- if (! dupDb.isGroupRepresentive(f.getModelId()))
- continue;
- }
-
- System.out.println(f.getRelativePath());
- Resource r = loader.load(f.getFullFile());
-
- try {
- CodeXGlueOutput output = new CodeXGlueOutput(mode);
- convertToEmfaticTokens(r, output);
-
- if (mode == Mode.FULL) {
- Map model = new HashMap<>();
- model.put("raw", output.builder.toString());
- result.put(f.getModelId(), model);
- } else {
- all.merge(output);
- }
- } catch (InvalidModelException e) {
- System.out.println("Invalid model: " + f);
- }
- }
-
-
- if (mode == Mode.FULL) {
- ObjectMapper mapper = new ObjectMapper();
- mapper.writer().writeValue(outputFile, result);
- } else {
- Files.write(all.builder.toString().getBytes(), outputFile);
- }
- }
-
- private static void convertToEmfaticTokens(Resource r, CodeXGlueOutput output) {
- try(PieceOfCode c = output.start()) {
- for (EObject obj : r.getContents()) {
- if (obj instanceof EPackage) {
- convertRootPackage((EPackage) obj, output);
- }
- }
- }
- }
-
- private static void convertRootPackage(EPackage obj, CodeXGlueOutput output) {
- //@namespace(uri="AnURI", prefix="uri-name")
- //package ecore;
- convertNamespace(obj, output);
- output.token("package").w().token(obj.getName()).token(";").newLine();
- convertPackageContents(obj, output);
- }
-
- private static void convertPackageContents(EPackage obj, CodeXGlueOutput output) {
- for (EPackage pkg : obj.getESubpackages()) {
- convertPackage(pkg, output);
- }
-
- for (EClassifier classifier : obj.getEClassifiers()) {
- if (classifier instanceof EClass) {
- convertClass((EClass) classifier, output);
- }
- }
- }
-
- private static void convertNamespace(EPackage obj, CodeXGlueOutput output) {
- output.token("@").token("namespace").token("(").
- token("uri").token("=").stringToken(obj.getNsURI()).token(",").w().
- token("prefix").token("=").stringToken(obj.getNsPrefix()).token(")").
- newLine();
- }
-
- private static void convertPackage(EPackage pkg, CodeXGlueOutput output) {
- convertNamespace(pkg, output);
- output.token("package").w().token(pkg.getName()).w().token("{").newLine().indent();
- convertPackageContents(pkg, output);
- output.unindent().token("}").newLine();
- }
-
- // (abstract?) class A { }
- private static void convertClass(EClass c, CodeXGlueOutput output) {
- if (c.isAbstract())
- output.token("abstract").w();
- output.token("class").w().token(nonNull(c.getName())).w();
-
- if (c.getESuperTypes().size() > 0) {
- output.token("extends").w();
- for (int i = 0, len = c.getESuperTypes().size(); i < len; i++) {
- EClass sup = c.getESuperTypes().get(i);
- output.token(nonNull(sup.getName()));
- if (i + 1 != len)
- output.token(",").w();
- }
- }
-
- output.token("{").newLine().indent();
- convertClassContents(c, output);
- output.unindent().token("}").newLine();
- }
-
- private static void convertClassContents(EClass c, CodeXGlueOutput output) {
- for (EStructuralFeature feature : c.getEStructuralFeatures()) {
- if (feature instanceof EAttribute) {
- convertAttribute((EAttribute) feature, output);
- } else {
- convertReference((EReference) feature, output);
- }
- }
- }
-
- private static void convertAttribute(EAttribute attr, CodeXGlueOutput output) {
- EDataType dt = attr.getEAttributeType();
- String type = toEmfaticType(dt);
- String card = applyMode(output.mode, toEmfaticCardinality(attr));
-
- output.token("attr").w();
- output.token(type);
- output.token(card).w();
- output.token(attr.getName());
- output.token(";");
- output.newLine();
- }
-
- private static void convertReference(EReference ref, CodeXGlueOutput output) {
- EClass referenced = ref.getEReferenceType();
- // TODO: This needs to check whether this is an imported package or a subpackage...
-
- String referencedName = nonNull(referenced.getName());
- String refType = ref.isContainment() ? "val" : "ref";
- String card = applyMode(output.mode, toEmfaticCardinality(ref));
-
- output.token(refType).w();
- output.token(referencedName);
- output.token(card).w();
- output.token(ref.getName());
- output.token(";");
- output.newLine();
- }
-
- private static String toEmfaticType(EDataType dt) {
- if (dt instanceof EEnum) {
- EEnum e = (EEnum) dt;
- // FIXME: Todo check, packages
- return e.getName();
- }
-
- if (dt == EcorePackage.Literals.EBOOLEAN)
- return "boolean";
- else if (dt == EcorePackage.Literals.EBOOLEAN_OBJECT) {
- return "Boolean";
- } else if (dt == EcorePackage.Literals.EBYTE) {
- return "byte";
- } else if (dt == EcorePackage.Literals.EBYTE_OBJECT) {
- return "Byte";
- } else if (dt == EcorePackage.Literals.ECHAR) {
- return "char";
- } else if (dt == EcorePackage.Literals.ECHARACTER_OBJECT) {
- return "Character";
- } else if (dt == EcorePackage.Literals.EDOUBLE) {
- return "double";
- } else if (dt == EcorePackage.Literals.EDOUBLE_OBJECT) {
- return "Double";
- } else if (dt == EcorePackage.Literals.EINT) {
- return "int";
- } else if (dt == EcorePackage.Literals.EINTEGER_OBJECT) {
- return "Integer";
- } else if (dt == EcorePackage.Literals.ELONG) {
- return "long";
- } else if (dt == EcorePackage.Literals.ELONG_OBJECT) {
- return "Long";
- } else if (dt == EcorePackage.Literals.ESHORT) {
- return "short";
- } else if (dt == EcorePackage.Literals.ESHORT_OBJECT) {
- return "Short";
- } else if (dt == EcorePackage.Literals.EDATE) {
- return "Date";
- } else if (dt == EcorePackage.Literals.ESTRING) {
- return "String";
- } else if (dt == EcorePackage.Literals.EJAVA_OBJECT) {
- return "Object";
- } else if (dt == EcorePackage.Literals.EJAVA_CLASS) {
- return "Class";
- } else if (dt == EcorePackage.Literals.EOBJECT) {
- // This doesn't look correct, because EObject is an EClass
- return "EObject";
- } else if (dt == EcorePackage.Literals.ECLASS) {
- // This doesn't look correct, because EClass is an EClass
- return "EClass";
- }
-
- String typeName = dt.getInstanceTypeName();
- if (typeName != null) {
- if ("org.eclipse.emf.ecore.EObject".equals(typeName))
- return "EObject";
- if ("org.eclipse.emf.ecore.EClass".equals(typeName))
- return "EClass";
-
- if (typeName.startsWith("org.eclipse.emf.ecore")) {
- String[] parts = typeName.split("\\.");
- return "ecore." + parts[parts.length - 1];
- }
- // FIXME: Not sure about this
- return typeName;
- } else {
- if (dt.eIsProxy())
- throw new InvalidModelException();
-
- throw new UnsupportedOperationException(dt.toString());
- }
-
- }
-
- private static String toEmfaticCardinality(ETypedElement t) {
- if (t.getLowerBound() == 0 && t.getUpperBound() == 1)
- return "[ ? ]"; // Could be empty string
- else if (t.getLowerBound() == 0 && t.getUpperBound() == -1)
- return "[ * ]";
- else if (t.getLowerBound() == 1 && t.getUpperBound() == -1)
- return "[ + ]";
- else if (t.getLowerBound() == 1 && t.getUpperBound() == 1)
- return "[ 1 ]";
- else if (t.getLowerBound() >= 0 && t.getUpperBound() == -1)
- return "[ " + t.getLowerBound() + " .. * ]";
- else if (t.getLowerBound() >= 0 && t.getUpperBound() == -2)
- return "[ " + t.getLowerBound() + " .. ? ]";
- else if (t.getLowerBound() >= 0 && t.getLowerBound() == t.getUpperBound())
- return "[ " + t.getLowerBound() + " ]";
- else if (t.getLowerBound() >= 0 && t.getUpperBound() > 0)
- return "[ " + t.getLowerBound() + " .. " + t.getUpperBound() + " ]";
- throw new UnsupportedOperationException(t.toString());
- }
-
- private static String applyMode(Mode mode, String cardinalityString) {
- return mode == Mode.FULL ? cardinalityString.replace(" ", "") : cardinalityString;
- }
-
- private static T nonNull(T obj) {
- if (obj == null)
- throw new InvalidModelException();
- return obj;
- }
-
-
- // https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/CodeCompletion-token
- private static class CodeXGlueOutput {
-
- private final StringBuilder builder = new StringBuilder();
- private final Mode mode;
- int indent = 0;
-
- public CodeXGlueOutput(Mode mode) {
- this.mode = mode;
- }
-
- public CodeXGlueOutput w() {
- if (mode == Mode.FULL)
- builder.append(" ");
- return this;
- }
-
- public void merge(CodeXGlueOutput output) {
- builder.append(output.builder);
- }
-
- public CodeXGlueOutput newLine() {
- if (mode == Mode.LINE) {
- builder.append(" ");
- } else if (mode == Mode.FULL) {
- builder.append("\n");
- }
- return this;
- }
-
- public CodeXGlueOutput indent() {
- indent++;
- return this;
- }
-
- public CodeXGlueOutput unindent() {
- indent--;
- return this;
- }
- public PieceOfCode start() {
- if (mode != Mode.FULL)
- builder.append(" ");
- return new PieceOfCode(this);
- }
-
- public CodeXGlueOutput token(String string) {
- if (string.isEmpty())
- return this;
-
- doIndentIfNeeded();
-
- if (mode != Mode.FULL)
- builder.append(" ");
- builder.append(string);
- return this;
- }
-
- public CodeXGlueOutput stringToken(String str) {
- doIndentIfNeeded();
-
- if (mode != Mode.FULL)
- builder.append(" ");
- builder.append("\"");
- builder.append(str);
- builder.append("\"");
- return this;
- }
-
- private void doIndentIfNeeded() {
- int size = builder.length();
- if (mode == Mode.FULL && size > 0) {
- char last = builder.charAt(size - 1);
- if (last == '\n') {
- for(int i = 0; i < indent; i++)
- builder.append("\t");
- }
- }
- }
- }
-
- private static class PieceOfCode implements AutoCloseable {
-
- private CodeXGlueOutput output;
-
- public PieceOfCode(CodeXGlueOutput output) {
- this.output = output;
- }
-
- @Override
- public void close() {
- if (output.mode != Mode.FULL) {
- output.builder.append(" ");
- output.builder.append("\n");
- }
- }
- }
-
- private static class InvalidModelException extends RuntimeException {
- private static final long serialVersionUID = 5490556461546321329L;
-
- }
-
- public static class JsonDatasetModel {
-
- }
-}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeGraph.java b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeGraph.java
index 3cc4ecd..ed2feea 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeGraph.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeGraph.java
@@ -13,7 +13,9 @@
import mar.indexer.common.configuration.ModelLoader;
import mar.validation.AnalyserRegistry;
import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
import mar.validation.ResourceAnalyser.Factory;
+import modelset.database.ModelSetFileProvider;
import modelset.graph.Model2Graph;
public class ComputeGraph {
@@ -24,20 +26,21 @@ public static void main(String[] args) throws Exception {
File outputFolder = new File("../../graph/repo-ecore-all");
ModelLoader loader = ModelLoader.DEFAULT;
- generateGraph(repoFolder, db, outputFolder, "ecore", loader);
+ ModelSetFileProvider provider = new ModelSetFileProvider(db, repoFolder);
+ generateGraph(provider, outputFolder, "ecore", loader);
}
- private static void generateGraph(File repoFolder, File db, File outputFolder, String modelType, ModelLoader loader) throws SQLException, IOException {
+ public static void generateGraph(IFileProvider provider, File outputFolder, String modelType, ModelLoader loader) throws SQLException, IOException {
Factory factory = AnalyserRegistry.INSTANCE.getFactory(modelType);
factory.configureEnvironment();
- ModelSetFileProvider provider = new ModelSetFileProvider(db, repoFolder);
+
for (IFileInfo f : provider.getLocalFiles()) {
System.out.println(f.getRelativePath());
+
Resource r = loader.load(f.getFullFile());
-
- Model2Graph converter = new Model2Graph();
- String content = converter.serializeGraphAsJson(r, new SimpleFilter());
-
+ String content = generateSingleGraph(r);
+ r.unload();
+
java.nio.file.Path graphFolder = Paths.get(outputFolder.getAbsolutePath(), f.getRelativePath());
graphFolder.toFile().mkdirs();
String fname = graphFolder.getName(graphFolder.getNameCount() - 1).toString();
@@ -45,6 +48,12 @@ private static void generateGraph(File repoFolder, File db, File outputFolder, S
IOUtils.write(content.getBytes(), new FileOutputStream(outputFile.toFile()));
}
+ }
+
+ static String generateSingleGraph(Resource r) {
+ Model2Graph converter = new Model2Graph();
+ String content = converter.serializeGraphAsJson(r, new SimpleFilter());
+ return content;
}
}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeHF.java b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeHF.java
new file mode 100644
index 0000000..c208fbf
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeHF.java
@@ -0,0 +1,113 @@
+package modelset.process;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.annotation.CheckForNull;
+
+import org.eclipse.emf.ecore.resource.Resource;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import mar.modelling.loader.ILoader;
+import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
+import modelset.database.ModelSetFileInfo;
+
+public class ComputeHF {
+
+ public void generateJsonFormat(IFileProvider provider, @CheckForNull DuplicationDatabase dupDb, ILoader loader, String type, File output) throws IOException {
+ List lines = new ArrayList<>();
+ for (IFileInfo f : provider.getLocalFiles()) {
+ System.out.println(f.getRelativePath());
+
+ try {
+ HFLine line = generateLine(f, dupDb, loader, type);
+ lines.add(line);
+ } catch (IOException | SQLException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (Exception e) {
+ if (e.getClass().getName().contains("DiagnosticWrappedException")) {
+ System.out.println("Invalid file " + f.getAbsolutePath());
+ continue;
+ }
+ // This is because some graphs are not properly serialized, for the moment we just skip them
+ if (e.getCause() instanceof JsonProcessingException) {
+ e.printStackTrace();
+ System.out.println("Can't generate graph " + f.getAbsolutePath());
+ continue;
+ }
+ throw new RuntimeException(e);
+ }
+ }
+
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.writer().writeValue(output, lines);
+
+ }
+
+ private HFLine generateLine(IFileInfo f, @CheckForNull DuplicationDatabase dupDb, ILoader loader, String type) throws IOException, SQLException {
+ Resource r = loader.toEMF(f.getFullFile());
+ String id = f.getModelId();
+ Path path = f.getFullFile().toPath();
+ String label = null;
+ if (f instanceof ModelSetFileInfo) {
+ label = ((ModelSetFileInfo) f).getCategory();
+ }
+
+
+ String txt = ComputeTxt.generateSingleText(r);
+ String graph = ComputeGraph.generateSingleGraph(r);
+ String xmi = Files.readString(path);
+ boolean isDuplicated = dupDb == null ? false : ! dupDb.isGroupRepresentive(id);
+
+ r.unload();
+
+ return new HFLine(label, id, txt, xmi, isDuplicated, graph, type);
+ }
+
+ private static class HFLine {
+ @JsonProperty
+ private String labels;
+
+ @JsonProperty
+ private String ids;
+
+ @JsonProperty
+ private String txt;
+
+ @JsonProperty
+ private String xmi;
+
+ @JsonProperty
+ private boolean is_duplicated;
+
+ @JsonProperty
+ private String graph;
+
+ @JsonProperty
+ private String model_type;
+
+ public HFLine(String labels, String ids, String txt, String xmi, boolean is_duplicated, String graph,
+ String model_type) {
+ super();
+ this.labels = labels;
+ this.ids = ids;
+ this.txt = txt;
+ this.xmi = xmi;
+ this.is_duplicated = is_duplicated;
+ this.graph = graph;
+ this.model_type = model_type;
+ }
+
+ }
+
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeTxt.java b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeTxt.java
index 45b4b51..786da9b 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeTxt.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeTxt.java
@@ -15,7 +15,9 @@
import mar.model2text.Model2TextUtils;
import mar.validation.AnalyserRegistry;
import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
import mar.validation.ResourceAnalyser.Factory;
+import modelset.database.ModelSetFileProvider;
public class ComputeTxt {
@@ -25,25 +27,27 @@ public static void main(String[] args) throws Exception {
File outputFolder = new File("../../txt/repo-ecore-all");
ModelLoader loader = ModelLoader.DEFAULT;
- generateTxt(repoFolder, db, outputFolder, "ecore", loader);
+ ModelSetFileProvider provider = new ModelSetFileProvider(db, repoFolder);
+ generateTxt(provider, outputFolder, "ecore", loader);
repoFolder = new File("../../raw-data/repo-genmymodel-uml");
db = new File("../../datasets/dataset.genmymodel/data/genmymodel.db");
outputFolder = new File("../../txt/repo-genmymodel-uml");
loader = ModelLoader.UML;
- generateTxt(repoFolder, db, outputFolder, "uml", loader);
+ provider = new ModelSetFileProvider(db, repoFolder);
+ generateTxt(provider, outputFolder, "uml", loader);
}
- private static void generateTxt(File repoFolder, File db, File outputFolder, String modelType, ModelLoader loader)
+ public static void generateTxt(IFileProvider provider, File outputFolder, String modelType, ModelLoader loader)
throws SQLException, IOException, FileNotFoundException {
Factory factory = AnalyserRegistry.INSTANCE.getFactory(modelType);
factory.configureEnvironment();
- ModelSetFileProvider provider = new ModelSetFileProvider(db, repoFolder);
for (IFileInfo f : provider.getLocalFiles()) {
System.out.println(f.getRelativePath());
Resource r = loader.load(f.getFullFile());
- String content = Model2TextUtils.model2document(r, new SimpleFilter(), new SimplePathFactory());
+ String content = generateSingleText(r);
+ r.unload();
java.nio.file.Path txtFolder = Paths.get(outputFolder.getAbsolutePath(), f.getRelativePath());
txtFolder.toFile().mkdirs();
@@ -54,4 +58,9 @@ private static void generateTxt(File repoFolder, File db, File outputFolder, Str
IOUtils.write(content.getBytes(), new FileOutputStream(outputFile.toFile()));
}
}
+
+ public static String generateSingleText(Resource r) {
+ String content = Model2TextUtils.model2document(r, new SimpleFilter(), new SimplePathFactory());
+ return content;
+ }
}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeUMLGraph.java b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeUMLGraph.java
index 1bacd4c..ba994f7 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeUMLGraph.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeUMLGraph.java
@@ -14,6 +14,7 @@
import mar.validation.AnalyserRegistry;
import mar.validation.IFileInfo;
import mar.validation.ResourceAnalyser.Factory;
+import modelset.database.ModelSetFileProvider;
import modelset.graph.Model2Graph;
public class ComputeUMLGraph {
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeUMLStats.java b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeUMLStats.java
index 6f8b43a..81af4ce 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeUMLStats.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeUMLStats.java
@@ -13,6 +13,7 @@
import mar.validation.ISingleFileAnalyser;
import mar.validation.ResourceAnalyser;
import mar.validation.ResourceAnalyser.Factory;
+import modelset.database.ModelSetFileProvider;
public class ComputeUMLStats {
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDatabase.java b/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDatabase.java
index 6d58bbe..90b4847 100644
--- a/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDatabase.java
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDatabase.java
@@ -97,7 +97,7 @@ public void addGroup(String runId, DuplicationGroup duplicationGroup)
}
public boolean isGroupRepresentive(String modelId) throws SQLException {
- try(PreparedStatement preparedStatement = connection.prepareStatement("SELECT model_id FROM duplicates WHERE model_id = ?")) {
+ try(PreparedStatement preparedStatement = connection.prepareStatement("SELECT model_id FROM duplicates WHERE group_id = ?")) {
preparedStatement.setString(1, modelId);
ResultSet rs = preparedStatement.executeQuery();
return rs.next();
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDbTest.java b/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDbTest.java
new file mode 100644
index 0000000..dbc7ab9
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDbTest.java
@@ -0,0 +1,60 @@
+package modelset.process;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collection;
+
+import mar.analysis.duplicates.DuplicateFinder.DuplicationGroup;
+import mar.analysis.duplicates.EcoreDuplicateFinder;
+import mar.modelling.loader.ILoader;
+import mar.validation.AnalyserRegistry;
+import mar.validation.IFileInfo;
+import mar.validation.ResourceAnalyser.Factory;
+import modelset.database.PlainFileInfo;
+
+public class DuplicationDbTest {
+
+ public static void main(String[] args) throws IOException, SQLException {
+
+
+ String m = "/home/jesus/projects/mde-ml/model-mate/experiments/data/mar/repo-genmymodel-ecore/data/_DmvCgAwjEeWATac42w0kFQ.ecore";
+
+ String m2 = "/home/jesus/projects/mde-ml/model-mate/experiments/data/mar/repo-genmymodel-ecore/data/_Q_PZgLWgEem8I_zdXKdSGw.ecore";
+ String m3 = "/home/jesus/projects/mde-ml/model-mate/experiments/data/mar/repo-genmymodel-ecore/data/_4DA1oPokEeSE6sazzutmkA.ecore";
+
+ String m4 = "/home/jesus/projects/mde-ml/model-mate/experiments/data/mar/repo-genmymodel-ecore/data/_u_vNsOQjEeiyGtLb2crGgA.ecore";
+
+
+ EcoreDuplicateFinder finder = new EcoreDuplicateFinder<>();
+
+
+ Factory factory = AnalyserRegistry.INSTANCE.getFactory("ecore");
+ factory.configureEnvironment();
+ ILoader loader = factory.newLoader();
+
+
+ finder.addResource(toFI("m1"), loader.toEMF(new File(m)));
+ finder.addResource(toFI("m2"), loader.toEMF(new File(m2)));
+ finder.addResource(toFI("m3"), loader.toEMF(new File(m3)));
+ finder.addResource(toFI("m4"), loader.toEMF(new File(m4)));
+
+
+ Collection> result = finder.getDuplicates(0.7, 0.8);
+ System.out.println("Groups: " + result.size());
+ for (DuplicationGroup duplicationGroup : result) {
+ System.out.println(duplicationGroup);
+ }
+
+ DuplicationDatabase db = new DuplicationDatabase(new File("/tmp/dup.db"));
+ for (DuplicationGroup duplicationGroup : result) {
+ db.addGroup("id", duplicationGroup);
+ }
+
+ db.close();
+ }
+
+ private static IFileInfo toFI(String string) {
+ return new PlainFileInfo(string, null, null);
+ }
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ToDataset.java b/java-lib/modelset-lib/src/main/java/modelset/process/ToDataset.java
new file mode 100644
index 0000000..b675725
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/ToDataset.java
@@ -0,0 +1,104 @@
+package modelset.process;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.concurrent.Callable;
+
+import mar.indexer.common.configuration.ModelLoader;
+import mar.modelling.loader.ILoader;
+import mar.validation.AnalyserRegistry;
+import mar.validation.IFileProvider;
+import mar.validation.ResourceAnalyser.Factory;
+import modelset.database.DuplicationAdapterProvider;
+import modelset.database.MarAnalysisFileProvider;
+import modelset.database.MarCrawlerFileProvider;
+import modelset.database.MinSizeAdapterProvider;
+import modelset.database.ModelSetFileProvider;
+import modelset.process.util.Utils;
+import picocli.CommandLine;
+import picocli.CommandLine.Command;
+import picocli.CommandLine.Option;
+
+@Command(name = "to-dataset", mixinStandardHelpOptions = true, description = "Generates a dataset in different formats")
+public class ToDataset implements Callable {
+
+ @Option(required = true, names = { "-r", "--repo-data" }, description = "Data repository folder")
+ private File repoRoot;
+
+ @Option(required = true, names = { "--type" }, description = "The type of the models")
+ private String type;
+
+ @Option(required = true, names = { "--db" }, description = "Database: either in ModelSet or MAR analysis or crawler style")
+ private File dbFile;
+
+ @Option(required = true, names = { "-o", "--output" }, description = "Output file or folder for the dataset")
+ private File output;
+
+ @Option(required = false, names = { "--dup" }, description = "Duplication database")
+ private File duplicationDbFile = null;
+
+ @Option(required = true, names = { "--target" }, description = "Target dataset: txt, graph, hf")
+ private String target;
+
+ @Option(required = false, names = { "--min_size" }, description = "Minimun size of the model in number of elements")
+ private int minSize = -1;
+
+ public static void main(String[] args) {
+ int exitCode = new CommandLine(new ToDataset()).execute(args);
+ System.exit(exitCode);
+ }
+
+ @Override
+ public Integer call() throws Exception {
+ Target tgt = Target.valueOf(target.toUpperCase());
+ if (tgt == null)
+ throw new IllegalArgumentException("Not allowed target");
+
+ IFileProvider provider = Utils.loadDatabase(dbFile, repoRoot, type);
+ IFileProvider originalProvider = provider;
+
+ DuplicationDatabase dupDb = null;
+ if (duplicationDbFile != null) {
+ if (! duplicationDbFile.exists()) {
+ System.out.println("Duplication db doesn't exist: " + duplicationDbFile);
+ return -1;
+ }
+ dupDb = new DuplicationDatabase(duplicationDbFile);
+ provider = new DuplicationAdapterProvider(provider, dupDb);
+ }
+
+
+ Factory factory = AnalyserRegistry.INSTANCE.getFactory(type);
+ factory.configureEnvironment();
+ ILoader loader = factory.newLoader();
+
+ if (minSize > 0) {
+ System.out.println("Num files: " + provider.getLocalFiles().size());
+ provider = new MinSizeAdapterProvider(provider, minSize, loader);
+ originalProvider = new MinSizeAdapterProvider(provider, minSize, loader);
+ }
+
+ switch (tgt) {
+ case TXT:
+ ComputeTxt.generateTxt(provider, output, type, Utils.getLoader(type));
+ break;
+ case GRAPH:
+ ComputeGraph.generateGraph(provider, output, type, Utils.getLoader(type));
+ break;
+ case HF:
+ ComputeHF hf = new ComputeHF();
+ hf.generateJsonFormat(originalProvider, dupDb, loader, type, output);
+ break;
+ }
+
+ return 0;
+ }
+
+
+ public static enum Target {
+ TXT,
+ GRAPH,
+ HF
+ }
+}
\ No newline at end of file
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ToDuplicatesDB.java b/java-lib/modelset-lib/src/main/java/modelset/process/ToDuplicatesDB.java
new file mode 100644
index 0000000..36f3a5a
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/ToDuplicatesDB.java
@@ -0,0 +1,99 @@
+package modelset.process;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.concurrent.Callable;
+
+import org.eclipse.emf.ecore.resource.Resource;
+
+import mar.analysis.duplicates.EcoreDuplicateFinder;
+import mar.analysis.duplicates.UMLDuplicateFinder;
+import mar.analysis.duplicates.XtextDuplicateFinder;
+import mar.indexer.common.configuration.ModelLoader;
+import mar.modelling.loader.ILoader;
+import mar.validation.AnalyserRegistry;
+import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
+import mar.validation.ResourceAnalyser.Factory;
+import modelset.process.util.Utils;
+import picocli.CommandLine;
+import picocli.CommandLine.Command;
+import picocli.CommandLine.Option;
+
+@Command(name = "compute-duplicates", mixinStandardHelpOptions = true, description = "Compute duplicates of a crawled dataset")
+public class ToDuplicatesDB implements Callable {
+
+ @Option(required = true, names = { "-r", "--repo-data" }, description = "Data repository folder")
+ private File repoFolder;
+
+ @Option(required = true, names = { "-d", "--db" }, description = "Crawler database folder")
+ private File crawlerDb;
+
+ @Option(required = true, names = { "-o", "--output" }, description = "Output file for the dataset")
+ private File outputFile;
+
+ @Option(required = true, names = { "-t", "--type" }, description = "Artefact type")
+ private String type;
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = new CommandLine(new ToDuplicatesDB()).execute(args);
+ System.exit(exitCode);
+ }
+
+ @Override
+ public Integer call() throws Exception {
+ if (outputFile.exists())
+ outputFile.delete();
+ if (! outputFile.getParentFile().exists())
+ outputFile.getParentFile().mkdirs();
+
+ if (! crawlerDb.exists()) {
+ System.out.println("No database file: " + crawlerDb);
+ return -1;
+ }
+
+
+ IFileProvider provider = Utils.loadDatabase(crawlerDb, repoFolder, type);
+
+ switch (type) {
+ case "xtext":
+ XtextDuplicateFinder xtextFinder = new XtextDuplicateFinder();
+ FileBasedDuplicateComputation xdup = new FileBasedDuplicateComputation<>();
+ xdup.dumpToDatabase(type, provider, xtextFinder, f -> f , null, outputFile);
+ break;
+ case "ecore":
+ Factory factory = AnalyserRegistry.INSTANCE.getFactory("ecore");
+ factory.configureEnvironment();
+
+ EcoreDuplicateFinder finder = new EcoreDuplicateFinder();
+ FileBasedDuplicateComputation edup = new FileBasedDuplicateComputation<>();
+ edup.dumpToDatabase(type, provider, finder, f -> ModelLoader.DEFAULT.load(f) , r -> r.unload(), outputFile);
+ break;
+ case "uml":
+ Factory factoryUML = AnalyserRegistry.INSTANCE.getFactory("uml");
+ factoryUML.configureEnvironment();
+ ILoader umlLoader = factoryUML.newLoader();
+
+
+ IFileProvider umlProvider = Utils.loadDatabase(crawlerDb, repoFolder, type);
+ FileBasedDuplicateComputation umlDup = new FileBasedDuplicateComputation<>();
+ UMLDuplicateFinder umlFinder = new UMLDuplicateFinder();
+ umlDup.dumpToDatabase(type, umlProvider, umlFinder, f -> umlLoader.toEMF(f) , r -> r.unload(), outputFile);
+ break;
+ default:
+ break;
+ }
+ System.out.println("Finished");
+
+ return 0;
+ }
+
+ protected static class FileBasedDuplicateComputation extends AbstractDuplicateComputation {
+ @Override
+ public void dumpToDatabase(String modelType, mar.validation.IFileProvider provider, mar.analysis.duplicates.DuplicateFinder finder, AbstractDuplicateComputation.LoaderFunction loader, java.util.function.Consumer unloader, File outputFile) throws SQLException, IOException {
+ super.dumpToDatabase(modelType, provider, finder, loader, unloader, outputFile);
+ }
+ }
+
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/textual/AbstractSyntaxPrinter.java b/java-lib/modelset-lib/src/main/java/modelset/process/textual/AbstractSyntaxPrinter.java
new file mode 100644
index 0000000..1c47193
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/textual/AbstractSyntaxPrinter.java
@@ -0,0 +1,176 @@
+package modelset.process.textual;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.fasterxml.jackson.core.exc.StreamWriteException;
+import com.fasterxml.jackson.databind.DatabindException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.io.Files;
+
+import mar.validation.IFileInfo;
+import mar.validation.IFileProvider;
+import modelset.database.ModelSetFileProvider;
+import modelset.process.DuplicationDatabase;
+import modelset.process.textual.CodeXGlueOutput.Mode;
+
+public abstract class AbstractSyntaxPrinter {
+
+ protected static enum Dataset {
+ ECORE,
+ UML
+ }
+
+ protected void runMain(String name, Dataset dataset, String[] args) throws Exception {
+ if (args.length < 3) {
+ System.out.println("./" + name + " mode dups/no-dups output-file");
+ return;
+ }
+
+ Mode mode = getMode(args[0]);
+ boolean filterDuplicates = getFilterDuplicates(args[1]);
+
+ File outputFile = new File(args[2]);
+
+
+ File repoFolder;
+ File db;
+ File dupDbFile;
+
+ if (dataset == Dataset.ECORE) {
+ repoFolder = new File("../../raw-data/repo-ecore-all");
+ db = new File("../../datasets/dataset.ecore/data/ecore.db");
+ dupDbFile = new File("../../dups/ecore-dups.db");
+ } else if (dataset == Dataset.UML) {
+ repoFolder = new File("../../raw-data/repo-genmymodel-uml");
+ db = new File("../../datasets/dataset.genmymodel/data/genmymodel.db");
+ dupDbFile = new File("../../dups/uml-dups.db");
+ } else {
+ throw new IllegalArgumentException("Invalid dataset");
+ }
+
+ DuplicationDatabase dupDb = null;
+ if (filterDuplicates) {
+ dupDb = new DuplicationDatabase(dupDbFile);
+ }
+
+ ModelSetFileProvider provider = new ModelSetFileProvider(db, repoFolder);
+ generateTokenization(provider, outputFile, mode, dupDb);
+
+ if (dupDb != null)
+ dupDb.close();
+
+ }
+
+ protected boolean getFilterDuplicates(String dupsStr) {
+ boolean filterDuplicates = false;
+ if (dupsStr.contains("no-dups")) {
+ filterDuplicates = true;
+ }
+ return filterDuplicates;
+ }
+
+ protected Mode getMode(String modeStr) {
+ Mode mode;
+ if (modeStr.contains("line")) {
+ mode = Mode.LINE;
+ } else if (modeStr.contains("token")) {
+ mode = Mode.TOKEN;
+ } else {
+ mode = Mode.FULL;
+ }
+ return mode;
+ }
+
+ protected void generateTokenization(IFileProvider provider, File outputFile, Mode mode, DuplicationDatabase dupDb) throws SQLException, IOException {
+ Map result = new HashMap<>();
+
+ CodeXGlueOutput all = new CodeXGlueOutput(mode);
+ int processed = 0;
+ for (IFileInfo f : provider.getLocalFiles()) {
+ if (dupDb != null) {
+ if (! dupDb.isGroupRepresentive(f.getModelId()))
+ continue;
+ }
+
+ System.out.println(f.getRelativePath());
+
+ try {
+ List printableElements = getElements(f.getFullFile());
+
+ int i = 0;
+ for (T t : printableElements) {
+ CodeXGlueOutput output = new CodeXGlueOutput(mode);
+ convertToTokens(t, output);
+
+ if (mode == Mode.FULL) {
+ Map model = new HashMap<>();
+ model.put("raw", output.builder.toString());
+
+ String id = f.getModelId() + "#" + i;
+ result.put(id, model);
+ } else {
+ all.merge(output);
+ }
+
+ unload(t);
+ }
+
+ processed++;
+ if (processed % 10 == 0) {
+ System.out.println("Writing intermediate results with " + processed + " files");
+ writeResult(outputFile, mode, result, all);
+ }
+ } catch (InvalidModelException e) {
+ System.out.println("Invalid model: " + f.getAbsolutePath());
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+
+ writeResult(outputFile, mode, result, all);
+ }
+
+ private void writeResult(File outputFile, Mode mode, Map result, CodeXGlueOutput all)
+ throws IOException, StreamWriteException, DatabindException {
+ if (mode == Mode.FULL) {
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.writer().writeValue(outputFile, result);
+ } else {
+ System.out.println("Writing to: " + outputFile);
+ Files.write(all.builder.toString().getBytes(), outputFile);
+ }
+ }
+
+ protected void unload(T t) {
+ // To be implemented by subclasses
+ }
+
+ protected abstract List getElements(File f) throws IOException;
+
+ protected abstract void convertToTokens(T r, CodeXGlueOutput output);
+
+ @SuppressWarnings("unused")
+ protected
+ static T nonNull(T obj) {
+ if (obj == null)
+ throw new InvalidModelException("Null value");
+ return obj;
+ }
+
+ public static class InvalidModelException extends RuntimeException {
+ public InvalidModelException(String string) {
+ super(string);
+ }
+
+ private static final long serialVersionUID = 5490556461546321329L;
+
+ }
+
+
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/textual/CodeXGlueOutput.java b/java-lib/modelset-lib/src/main/java/modelset/process/textual/CodeXGlueOutput.java
new file mode 100644
index 0000000..8c6e540
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/textual/CodeXGlueOutput.java
@@ -0,0 +1,114 @@
+package modelset.process.textual;
+
+// https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/CodeCompletion-token
+public class CodeXGlueOutput {
+
+ public static enum Mode {
+ TOKEN,
+ LINE,
+ FULL
+ }
+
+ final StringBuilder builder = new StringBuilder();
+ final Mode mode;
+ int indent = 0;
+
+ public CodeXGlueOutput(Mode mode) {
+ this.mode = mode;
+ }
+
+ public Mode getMode() {
+ return mode;
+ }
+
+ public CodeXGlueOutput w() {
+ if (mode == Mode.FULL)
+ builder.append(" ");
+ return this;
+ }
+
+ public void merge(CodeXGlueOutput output) {
+ builder.append(output.builder);
+ }
+
+ public CodeXGlueOutput newLine() {
+ if (mode == Mode.LINE) {
+ builder.append(" ");
+ } else if (mode == Mode.FULL) {
+ builder.append("\n");
+ }
+ return this;
+ }
+
+ public CodeXGlueOutput indent() {
+ indent++;
+ return this;
+ }
+
+ public CodeXGlueOutput unindent() {
+ indent--;
+ return this;
+ }
+ public PieceOfCode start() {
+ if (mode != Mode.FULL)
+ builder.append("");
+ return new PieceOfCode(this);
+ }
+
+ public CodeXGlueOutput token(String string) {
+ if (string.isEmpty())
+ return this;
+
+ doIndentIfNeeded();
+
+ if (mode != Mode.FULL)
+ builder.append(" ");
+ builder.append(string);
+ return this;
+ }
+
+ public CodeXGlueOutput stringToken(String str) {
+ doIndentIfNeeded();
+
+ if (mode != Mode.FULL)
+ builder.append(" ");
+ builder.append("\"");
+ builder.append(str);
+ builder.append("\"");
+ return this;
+ }
+
+ private void doIndentIfNeeded() {
+ int size = builder.length();
+ if (mode == Mode.FULL && size > 0) {
+ char last = builder.charAt(size - 1);
+ if (last == '\n') {
+ for(int i = 0; i < indent; i++)
+ builder.append("\t");
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return builder.toString();
+ }
+
+
+ public static class PieceOfCode implements AutoCloseable {
+
+ private CodeXGlueOutput output;
+
+ public PieceOfCode(CodeXGlueOutput output) {
+ this.output = output;
+ }
+
+ @Override
+ public void close() {
+ if (output.mode != Mode.FULL) {
+ output.builder.append(" ");
+ output.builder.append("\n");
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/textual/ComputeEmfatic.java b/java-lib/modelset-lib/src/main/java/modelset/process/textual/ComputeEmfatic.java
new file mode 100644
index 0000000..26a196f
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/textual/ComputeEmfatic.java
@@ -0,0 +1,194 @@
+package modelset.process.textual;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+
+import org.eclipse.emf.ecore.EAttribute;
+import org.eclipse.emf.ecore.EClass;
+import org.eclipse.emf.ecore.EClassifier;
+import org.eclipse.emf.ecore.EDataType;
+import org.eclipse.emf.ecore.EEnum;
+import org.eclipse.emf.ecore.EEnumLiteral;
+import org.eclipse.emf.ecore.EObject;
+import org.eclipse.emf.ecore.EPackage;
+import org.eclipse.emf.ecore.EReference;
+import org.eclipse.emf.ecore.EStructuralFeature;
+import org.eclipse.emf.ecore.ETypedElement;
+import org.eclipse.emf.ecore.EcorePackage;
+import org.eclipse.emf.ecore.resource.Resource;
+
+import mar.indexer.common.configuration.ModelLoader;
+import mar.validation.AnalyserRegistry;
+import mar.validation.ResourceAnalyser.Factory;
+import modelset.process.textual.CodeXGlueOutput.Mode;
+import modelset.process.textual.CodeXGlueOutput.PieceOfCode;
+
+public class ComputeEmfatic extends AbstractSyntaxPrinter {
+
+ public static void main(String[] args) throws Exception {
+ Factory factory = AnalyserRegistry.INSTANCE.getFactory("ecore");
+ factory.configureEnvironment();
+
+ new ComputeEmfatic().runMain("ComputeEmfatic", Dataset.ECORE, args);
+ }
+
+
+ @Override
+ protected List getElements(File f) throws IOException {
+ Resource r = ModelLoader.DEFAULT.load(f);
+ return Collections.singletonList(r);
+ }
+
+ @Override
+ protected void convertToTokens(Resource r, CodeXGlueOutput output) {
+ try(PieceOfCode c = output.start()) {
+ for (EObject obj : r.getContents()) {
+ if (obj instanceof EPackage) {
+ convertRootPackage((EPackage) obj, output);
+ }
+ }
+ }
+ }
+
+ /**
+ * This is intended to be called by clients which wants to generate Emfatic files outside ModelSet
+ */
+ public static void toTokens(Resource r, CodeXGlueOutput output) {
+ new ComputeEmfatic().convertToTokens(r, output);
+ }
+
+ protected void convertRootPackage(EPackage obj, CodeXGlueOutput output) {
+ //@namespace(uri="AnURI", prefix="uri-name")
+ //package ecore;
+ convertNamespace(obj, output);
+ output.token("package").w().token(obj.getName()).token(";").newLine();
+ convertPackageContents(obj, output);
+ }
+
+ protected void convertPackageContents(EPackage obj, CodeXGlueOutput output) {
+ for (EPackage pkg : obj.getESubpackages()) {
+ convertPackage(pkg, output);
+ }
+
+ for (EClassifier classifier : obj.getEClassifiers()) {
+ if (classifier instanceof EClass) {
+ convertClass((EClass) classifier, output);
+ } else if (classifier instanceof EEnum) {
+ convertEnum((EEnum) classifier, output);
+ } else if (classifier instanceof EDataType) {
+ convertDataType((EDataType) classifier, output);
+ }
+ }
+ }
+
+ protected void convertEnum(EEnum c, CodeXGlueOutput output) {
+ output.token("enum").w().token(nonNull(c.getName())).w();
+ output.token("{").newLine().indent();
+ for (EEnumLiteral l : c.getELiterals()) {
+ output.token(nonNull(l.getName())).w().token("=").w().token(Integer.toString(l.getValue())).token(";");
+ }
+ output.unindent().token("}").newLine();
+ }
+
+
+ protected void convertDataType(EDataType c, CodeXGlueOutput output) {
+ if (c.getInstanceTypeName() == null)
+ return;
+
+ output.
+ token("datatype").w().token(nonNull(c.getName())).w().
+ token(":").w().
+ token(c.getInstanceTypeName()).
+ token(";").newLine();
+ }
+
+
+ protected void convertNamespace(EPackage obj, CodeXGlueOutput output) {
+ output.token("@").token("namespace").token("(").
+ token("uri").token("=").stringToken(obj.getNsURI()).token(",").w().
+ token("prefix").token("=").stringToken(obj.getNsPrefix()).token(")").
+ newLine();
+ }
+
+ protected void convertPackage(EPackage pkg, CodeXGlueOutput output) {
+ convertNamespace(pkg, output);
+ output.token("package").w().token(pkg.getName()).w().token("{").newLine().indent();
+ convertPackageContents(pkg, output);
+ output.unindent().token("}").newLine();
+ }
+
+ // (abstract?) class A { }
+ protected void convertClass(EClass c, CodeXGlueOutput output) {
+ convertClassHeader(c, output);
+ output.token("{").newLine().indent();
+ convertClassContents(c, output);
+ output.unindent().token("}").newLine();
+ }
+
+
+ protected void convertClassHeader(EClass c, CodeXGlueOutput output) {
+ if (c.isAbstract())
+ output.token("abstract").w();
+ output.token("class").w().token(nonNull(c.getName())).w();
+
+ if (c.getESuperTypes().size() > 0) {
+ output.token("extends").w();
+ for (int i = 0, len = c.getESuperTypes().size(); i < len; i++) {
+ EClass sup = c.getESuperTypes().get(i);
+ output.token(nonNull(sup.getName()));
+ if (i + 1 != len)
+ output.token(",").w();
+ }
+ }
+ }
+
+ protected void convertClassContents(EClass c, CodeXGlueOutput output) {
+ for (EStructuralFeature feature : c.getEStructuralFeatures()) {
+ if (feature instanceof EAttribute) {
+ convertAttribute((EAttribute) feature, output);
+ } else {
+ convertReference((EReference) feature, output);
+ }
+ }
+ }
+
+ protected void convertAttribute(EAttribute attr, CodeXGlueOutput output) {
+ EDataType dt = attr.getEAttributeType();
+ String type = toEmfaticType(dt);
+ String card = toEmfaticCardinality(attr, output.mode);
+
+ output.token("attr").w();
+ output.token(type);
+ output.token(card).w();
+ output.token(attr.getName());
+ output.token(";");
+ output.newLine();
+ }
+
+ protected void convertReference(EReference ref, CodeXGlueOutput output) {
+ EClass referenced = ref.getEReferenceType();
+ // TODO: This needs to check whether this is an imported package or a subpackage...
+
+ String referencedName = nonNull(referenced.getName());
+ String refType = ref.isContainment() ? "val" : "ref";
+ String card = toEmfaticCardinality(ref, output.mode);
+
+ output.token(refType).w();
+ output.token(referencedName);
+ output.token(card).w();
+ output.token(ref.getName());
+ output.token(";");
+ output.newLine();
+ }
+
+ protected String toEmfaticType(EDataType dt) {
+ return TextUtils.toEmfaticType(dt);
+ }
+
+ protected String toEmfaticCardinality(ETypedElement t, Mode mode) {
+ return TextUtils.toEmfaticCardinality(t, mode);
+ }
+
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/textual/ComputePlantUML.java b/java-lib/modelset-lib/src/main/java/modelset/process/textual/ComputePlantUML.java
new file mode 100644
index 0000000..67ec446
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/textual/ComputePlantUML.java
@@ -0,0 +1,224 @@
+package modelset.process.textual;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.eclipse.emf.common.util.TreeIterator;
+import org.eclipse.emf.ecore.EObject;
+import org.eclipse.emf.ecore.resource.Resource;
+import org.eclipse.uml2.uml.Constraint;
+import org.eclipse.uml2.uml.FinalState;
+import org.eclipse.uml2.uml.OpaqueExpression;
+import org.eclipse.uml2.uml.Pseudostate;
+import org.eclipse.uml2.uml.Region;
+import org.eclipse.uml2.uml.State;
+import org.eclipse.uml2.uml.StateMachine;
+import org.eclipse.uml2.uml.Transition;
+import org.eclipse.uml2.uml.ValueSpecification;
+import org.eclipse.uml2.uml.Vertex;
+
+import mar.indexer.common.configuration.ModelLoader;
+import mar.validation.AnalyserRegistry;
+import mar.validation.ResourceAnalyser.Factory;
+import modelset.process.textual.CodeXGlueOutput.PieceOfCode;
+
+/**
+ * Docs: https://plantuml.com/en/state-diagram
+ *
+ */
+public class ComputePlantUML extends AbstractSyntaxPrinter {
+
+ public static void main(String[] args) throws Exception {
+ Factory factory = AnalyserRegistry.INSTANCE.getFactory("uml");
+ factory.configureEnvironment();
+
+
+ new ComputePlantUML().runMain("ComputePlantUML", Dataset.UML, args);
+ }
+
+ @Override
+ protected List getElements(File f) throws IOException {
+ Resource r = ModelLoader.UML.load(f);
+
+ List elements = new ArrayList<>();
+ TreeIterator it = r.getAllContents();
+ while (it.hasNext()) {
+ EObject obj = it.next();
+ if (obj instanceof StateMachine) {
+ if (! isEmpty((StateMachine) obj))
+ elements.add((StateMachine) obj);
+ }
+ }
+ return elements;
+ }
+
+ private boolean isEmpty(StateMachine obj) {
+ return obj.getRegions().isEmpty();
+ }
+
+ @Override
+ protected void convertToTokens(StateMachine sm, CodeXGlueOutput output) {
+ try(PieceOfCode c = output.start()) {
+ generate(sm, output);
+ System.out.println("---");
+ System.out.println(output.builder.toString());
+ }
+ }
+
+ public void generate(StateMachine sm, CodeXGlueOutput output) {
+
+ List regions = sm.getRegions();
+
+ output.token("@startuml").newLine();
+
+ String sep = "";
+ for (Region region : regions) {
+ if (! sep.isEmpty())
+ output.token(sep).newLine();
+
+ mapRegion(region, output);
+ sep = "---";
+ }
+
+ output.token("@enduml").newLine();
+ }
+
+ // repo-uml-pruned/sm/lituss/LobbyServer/LobbyServer/master/sim.uml
+
+ public void mapRegion(Region r, CodeXGlueOutput output) {
+ for (Transition transition : r.getTransitions()) {
+ if (transition.getSource() == null || transition.getTarget() == null) {
+ System.out.println("Malformed model, transition without source or target: " + transition);
+ continue;
+ }
+
+ String src = toStateId(transition.getSource());
+ String tgt = toStateId(transition.getTarget());
+
+ // : EvNewValue
+
+ String name = transition.getName();
+ String label = name == null ? "" : name + " ";
+ List rules = transition.getOwnedRules();
+ if (rules.size() > 0) {
+ // Pick one just to show something
+ // Maybe the rest could go in a comment?
+ Constraint c = rules.get(0);
+ ValueSpecification spec = c.getSpecification();
+ if (spec instanceof OpaqueExpression) {
+ OpaqueExpression e = (OpaqueExpression) spec;
+ String value = String.join("&&", e.getBodies());
+ label += value;
+ }
+ }
+
+ output.token(src).w().token("-->").w().token(tgt);
+
+ if (label != null && !label.trim().isEmpty()) {
+ output.w().token(":").w().token("\"" + label + "\"");
+ }
+
+ output.newLine();
+ }
+
+ for (Vertex v : r.getSubvertices()) {
+ if (v instanceof State && !(v instanceof FinalState)) {
+ List subRegions = ((State) v).getRegions();
+ String sep = "";
+ String stateId = toStateId(v);
+
+
+ output.token("state").w().token(stateId).w().token("{").newLine().indent();
+ for (Region region : subRegions) {
+ if (! sep.isEmpty())
+ output.token(sep).newLine();
+
+ mapRegion(region, output);
+ sep = "---";
+ }
+ output.unindent().token("}").newLine();
+ } else if (v instanceof Pseudostate) {
+ // state join_state <>
+ switch(((Pseudostate) v).getKind()) {
+ case FORK_LITERAL:
+ output.token("state").w().token(toStateId(v)).w().token("<>").newLine();;
+ case JOIN_LITERAL:
+ output.token("state").w().token(toStateId(v)).w().token("<>").newLine();;
+ //text.line("state " + toStateId(v) + " <>");
+ case JUNCTION_LITERAL:
+ //text.line("state " + toStateId(v)); // TOOD: Change color
+ output.token("state").w().token(toStateId(v)).newLine();
+ }
+ // Ignore rest?
+ }
+ }
+ }
+
+ private Map names = new HashMap<>();
+
+ public String toStateId(Vertex vertex) {
+ if (names.containsKey(vertex))
+ return names.get(vertex);
+
+ String name = toStateId_(vertex, names.size());
+ names.put(vertex, name);
+ return name;
+ }
+
+ public String toStateId_(Vertex vertex, int idx) {
+ if (vertex instanceof FinalState) {
+ return "[*]";
+ } else if (vertex instanceof State) {
+ State state = (State) vertex;
+ return normalizeName(state.getName());
+ } else if (vertex instanceof Pseudostate) {
+ Pseudostate state = (Pseudostate) vertex;
+
+ switch(state.getKind()) {
+ case INITIAL_LITERAL:
+ return "[*]";
+ case CHOICE_LITERAL:
+ case DEEP_HISTORY_LITERAL:
+ case SHALLOW_HISTORY_LITERAL:
+ return toSynthesizedName(state.getName(), "H", idx);
+ case ENTRY_POINT_LITERAL:
+ case EXIT_POINT_LITERAL:
+ case FORK_LITERAL:
+ if (vertex.getName() == null)
+ return "fork_" + idx;
+ return vertex.getName() + "_" + idx;
+ case JOIN_LITERAL:
+ if (vertex.getName() == null)
+ return "join_" + idx;
+ return vertex.getName() + "_" + idx;
+ case JUNCTION_LITERAL:
+ return "junction_" + idx;
+ case TERMINATE_LITERAL:
+ // TODO: Mark this specially, with a comment?
+ return "[*]";
+ default:
+ break;
+
+ }
+ }
+ throw new UnsupportedOperationException("Vertex not supported: " + vertex);
+ }
+
+ private String normalizeName(String name) {
+ //if (!name.matches("^[A-Za-z0-9_]+$")) {
+ // return "\"" + name + "\"";
+ //}
+ return name.replaceAll("[^A-Za-z0-9_]", "_");
+ }
+
+ private String toSynthesizedName(String name, String hint, int idx) {
+ name = normalizeName(name);
+ return name == null ? (hint + "_" + idx) : (name + "_" + hint);
+ }
+
+
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/textual/TextUtils.java b/java-lib/modelset-lib/src/main/java/modelset/process/textual/TextUtils.java
new file mode 100644
index 0000000..6e81337
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/textual/TextUtils.java
@@ -0,0 +1,118 @@
+package modelset.process.textual;
+
+import org.eclipse.emf.ecore.EDataType;
+import org.eclipse.emf.ecore.EEnum;
+import org.eclipse.emf.ecore.ETypedElement;
+import org.eclipse.emf.ecore.EcorePackage;
+
+import modelset.process.textual.AbstractSyntaxPrinter.InvalidModelException;
+import modelset.process.textual.CodeXGlueOutput.Mode;
+
+public class TextUtils {
+
+ public static String toEmfaticType(EDataType dt) {
+ if (dt instanceof EEnum) {
+ EEnum e = (EEnum) dt;
+ // FIXME: Todo check, packages
+ return AbstractSyntaxPrinter.nonNull(e.getName());
+ }
+
+ if (dt == EcorePackage.Literals.EBOOLEAN)
+ return "boolean";
+ else if (dt == EcorePackage.Literals.EBOOLEAN_OBJECT) {
+ return "Boolean";
+ } else if (dt == EcorePackage.Literals.EBYTE) {
+ return "byte";
+ } else if (dt == EcorePackage.Literals.EBYTE_OBJECT) {
+ return "Byte";
+ } else if (dt == EcorePackage.Literals.ECHAR) {
+ return "char";
+ } else if (dt == EcorePackage.Literals.ECHARACTER_OBJECT) {
+ return "Character";
+ } else if (dt == EcorePackage.Literals.EDOUBLE) {
+ return "double";
+ } else if (dt == EcorePackage.Literals.EDOUBLE_OBJECT) {
+ return "Double";
+ } else if (dt == EcorePackage.Literals.EINT) {
+ return "int";
+ } else if (dt == EcorePackage.Literals.EINTEGER_OBJECT) {
+ return "Integer";
+ } else if (dt == EcorePackage.Literals.ELONG) {
+ return "long";
+ } else if (dt == EcorePackage.Literals.ELONG_OBJECT) {
+ return "Long";
+ } else if (dt == EcorePackage.Literals.ESHORT) {
+ return "short";
+ } else if (dt == EcorePackage.Literals.ESHORT_OBJECT) {
+ return "Short";
+ } else if (dt == EcorePackage.Literals.EDATE) {
+ return "Date";
+ } else if (dt == EcorePackage.Literals.ESTRING) {
+ return "String";
+ } else if (dt == EcorePackage.Literals.EJAVA_OBJECT) {
+ return "Object";
+ } else if (dt == EcorePackage.Literals.EJAVA_CLASS) {
+ return "Class";
+ } else if (dt == EcorePackage.Literals.EOBJECT) {
+ // This doesn't look correct, because EObject is an EClass
+ return "EObject";
+ } else if (dt == EcorePackage.Literals.ECLASS) {
+ // This doesn't look correct, because EClass is an EClass
+ return "EClass";
+ }
+
+ String typeName = dt.getInstanceTypeName();
+ String name = dt.getName();
+ if (typeName != null) {
+ if ("org.eclipse.emf.ecore.EObject".equals(typeName))
+ return "EObject";
+ if ("org.eclipse.emf.ecore.EClass".equals(typeName))
+ return "EClass";
+
+ if (typeName.startsWith("org.eclipse.emf.ecore")) {
+ String[] parts = typeName.split("\\.");
+ return "ecore." + parts[parts.length - 1];
+ }
+ // FIXME: Not sure about this
+ return typeName;
+ } else if ("String".equals(name)) {
+ return "String";
+ } else if ("Integer".equals(name)) {
+ return "Integer";
+ } else if ("Double".equals(name)) {
+ return "Double";
+ } else if ("Boolean".equals(name)) {
+ return "Boolean";
+ } else {
+ if (dt.eIsProxy())
+ throw new InvalidModelException("Proxy");
+
+ throw new UnsupportedOperationException(dt.toString());
+ }
+
+ }
+
+ public static String toEmfaticCardinality(ETypedElement t, Mode mode) {
+ if (t.getLowerBound() == 0 && t.getUpperBound() == 1)
+ return applyMode("[ ? ]", mode); // Could be empty string
+ else if (t.getLowerBound() == 0 && t.getUpperBound() == -1)
+ return applyMode("[ * ]", mode);
+ else if (t.getLowerBound() == 1 && t.getUpperBound() == -1)
+ return applyMode("[ + ]", mode);
+ else if (t.getLowerBound() == 1 && t.getUpperBound() == 1)
+ return applyMode("[ 1 ]", mode);
+ else if (t.getLowerBound() >= 0 && t.getUpperBound() == -1)
+ return applyMode("[ " + t.getLowerBound() + " .. * ]", mode);
+ else if (t.getLowerBound() >= 0 && t.getUpperBound() == -2)
+ return applyMode("[ " + t.getLowerBound() + " .. ? ]", mode);
+ else if (t.getLowerBound() >= 0 && t.getLowerBound() == t.getUpperBound())
+ return applyMode("[ " + t.getLowerBound() + " ]", mode);
+ else if (t.getLowerBound() >= 0 && t.getUpperBound() > 0)
+ return applyMode("[ " + t.getLowerBound() + " .. " + t.getUpperBound() + " ]", mode);
+ throw new UnsupportedOperationException(t.toString());
+ }
+
+ protected static String applyMode(String cardinalityString, Mode mode) {
+ return mode == Mode.FULL ? cardinalityString.replace(" ", "") : cardinalityString;
+ }
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/util/Utils.java b/java-lib/modelset-lib/src/main/java/modelset/process/util/Utils.java
new file mode 100644
index 0000000..c5c04e6
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/util/Utils.java
@@ -0,0 +1,38 @@
+package modelset.process.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.SQLException;
+
+import mar.indexer.common.configuration.ModelLoader;
+import mar.validation.IFileProvider;
+import modelset.database.MarAnalysisFileProvider;
+import modelset.database.MarCrawlerFileProvider;
+import modelset.database.ModelSetFileProvider;
+
+public class Utils {
+
+ public static ModelLoader getLoader(String type) {
+ if (type.equals("ecore"))
+ return ModelLoader.DEFAULT;
+ else if (type.equals("uml"))
+ return ModelLoader.UML;
+ throw new IllegalArgumentException("Unknown type");
+ }
+
+ public static IFileProvider loadDatabase(File db, File repoRoot, String type) {
+ try {
+ if (ModelSetFileProvider.isModelSetDb(db)) {
+ return new ModelSetFileProvider(db, repoRoot);
+ } else if (MarCrawlerFileProvider.isCrawlerDb(db)) {
+ return new MarCrawlerFileProvider(db, repoRoot, type);
+ } else if (MarAnalysisFileProvider.isMarDb(db)) {
+ return new MarAnalysisFileProvider(db, repoRoot, type);
+ }
+ } catch (SQLException | IOException e) {
+ e.printStackTrace();
+ throw new RuntimeException(e);
+ }
+ throw new IllegalArgumentException("Invalid database");
+ }
+}