diff --git a/bin/generate.sh b/bin/generate.sh
index 1882113..2e8333f 100755
--- a/bin/generate.sh
+++ b/bin/generate.sh
@@ -11,4 +11,5 @@ mvn test exec:java -Dexec.mainClass=modelset.process.ComputeUMLStats
mvn test exec:java -Dexec.mainClass=modelset.process.ComputeTxt
mvn test exec:java -Dexec.mainClass=modelset.process.ComputeGraph
mvn test exec:java -Dexec.mainClass=modelset.process.ComputeUMLGraph
+mvn test exec:java -Dexec.mainClass=modelset.process.ComputeDuplicates
popd
diff --git a/java-lib/modelset-lib/pom.xml b/java-lib/modelset-lib/pom.xml
index c3d539e..eaef3c2 100644
--- a/java-lib/modelset-lib/pom.xml
+++ b/java-lib/modelset-lib/pom.xml
@@ -1,6 +1,4 @@
-
+
4.0.0
io.github.models-lab.modelset
modelset-lib
@@ -54,6 +52,24 @@
mar-modelling
1.0-SNAPSHOT
+
+
+ io.github.models-lab.mar
+ mar-modelling-transformations
+ 1.0-SNAPSHOT
+
+
+
+
+ org.agrona
+ agrona
+ 1.15.0
+
+
@@ -109,4 +125,4 @@
-
+
\ No newline at end of file
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeDuplicates.java b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeDuplicates.java
new file mode 100644
index 0000000..db56617
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeDuplicates.java
@@ -0,0 +1,67 @@
+package modelset.process;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.sql.SQLException;
+import java.util.Collection;
+
+import org.eclipse.emf.ecore.resource.Resource;
+
+import mar.analysis.duplicates.DuplicateFinder.DuplicationGroup;
+import mar.analysis.duplicates.EcoreDuplicateFinder;
+import mar.indexer.common.configuration.ModelLoader;
+import mar.validation.AnalyserRegistry;
+import mar.validation.IFileInfo;
+import mar.validation.ResourceAnalyser.Factory;
+
+public class ComputeDuplicates {
+
+ private static final double T0 = 0.8;
+ private static final double T1 = 0.7;
+
+ public static void main(String[] args) throws Exception {
+ File repoFolder = new File("../../raw-data/repo-ecore-all");
+ File db = new File("../../datasets/dataset.ecore/data/ecore.db");
+
+ ModelLoader loader = ModelLoader.DEFAULT;
+ Collection> dups = generateDuplicates(repoFolder, db, "ecore", loader);
+
+ new File("../../dups/").mkdirs();
+
+ File outputFile = new File("../../dups/ecore-dups.db");
+ if (outputFile.exists())
+ outputFile.delete();
+
+ DuplicationDatabase ddb = new DuplicationDatabase(outputFile);
+ String groupId = "ecore_" + T0 + "_" + T1;
+ ddb.addDuplicationRun(groupId, T0, T1);
+ for (DuplicationGroup duplicationGroup : dups) {
+ ddb.addGroup(groupId, duplicationGroup);
+ }
+
+ ddb.close();
+
+ System.out.println("Finished");
+ }
+
+ private static Collection> generateDuplicates(File repoFolder, File db, String modelType, ModelLoader loader)
+ throws SQLException, IOException, FileNotFoundException {
+
+ EcoreDuplicateFinder finder = new EcoreDuplicateFinder<>();
+
+ Factory factory = AnalyserRegistry.INSTANCE.getFactory(modelType);
+ factory.configureEnvironment();
+
+ System.out.println("Loading files...");
+ ModelSetFileProvider provider = new ModelSetFileProvider(db, repoFolder);
+ for (IFileInfo f : provider.getLocalFiles()) {
+ Resource r = loader.load(f.getFullFile());
+ finder.addResource(f, r);
+ r.unload();
+ }
+
+ System.out.println("Computing duplicates...");
+ return finder.getDuplicates(T0, T1);
+ }
+}
diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDatabase.java b/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDatabase.java
new file mode 100644
index 0000000..d86f1de
--- /dev/null
+++ b/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDatabase.java
@@ -0,0 +1,98 @@
+package modelset.process;
+
+import java.io.File;
+import java.sql.Connection;
+import java.sql.DatabaseMetaData;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Statement;
+
+import javax.annotation.Nonnull;
+
+import mar.analysis.duplicates.DuplicateFinder.DuplicationGroup;
+import mar.validation.IFileInfo;
+
+/**
+ * A wrapper to handle the duplication database.
+ *
+ * @author jesus
+ *
+ */
+public class DuplicationDatabase implements AutoCloseable {
+ private Connection connection;
+
+ @Nonnull
+ public DuplicationDatabase(File file) {
+ String url = getConnectionString(file);
+
+ try {
+ Connection conn = DriverManager.getConnection(url);
+ if (conn != null) {
+ if (! file.exists()) {
+ DatabaseMetaData meta = conn.getMetaData();
+ System.out.println("The driver name is " + meta.getDriverName());
+ System.out.println("A new database has been created.");
+ }
+
+ String duplication_run = "CREATE TABLE IF NOT EXISTS duplication_run (\n"
+ + " id varchar(255) NOT NULL,\n"
+ + " t0 float NOT NULL,\n"
+ + " t1 float NOT NULL\n"
+ + ");";
+
+ String duplicates = "CREATE TABLE IF NOT EXISTS duplicates (\n"
+ + " model_id varchar(255) NOT NULL,\n"
+ + " group_id varchar (255) NOT NULL,\n"
+ + " duplication_run varchar (255) NOT NULL\n"
+ + ");";
+
+ Statement stmt = conn.createStatement();
+ stmt.execute(duplication_run);
+
+ stmt = conn.createStatement();
+ stmt.execute(duplicates);
+ }
+
+ this.connection = conn;
+ this.connection.setAutoCommit(false);
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void close() throws SQLException {
+ this.connection.commit();
+ this.connection.close();
+ }
+
+ @Nonnull
+ public static String getConnectionString(File file) {
+ return "jdbc:sqlite:" + file.getAbsolutePath();
+ }
+
+
+ public void addDuplicationRun(String id, double t0, double t1) throws SQLException {
+ PreparedStatement preparedStatement = connection.prepareStatement("INSERT INTO duplication_run(id, t0, t1) VALUES (?, ?, ?)");
+ preparedStatement.setString(1, id);
+ preparedStatement.setDouble(2, t0);
+ preparedStatement.setDouble(3, t1);
+ preparedStatement.execute();
+ preparedStatement.close();
+ }
+
+ public void addGroup(String runId, DuplicationGroup duplicationGroup) throws SQLException {
+ try(PreparedStatement preparedStatement = connection.prepareStatement("INSERT INTO duplicates(model_id, group_id, duplication_run) VALUES (?, ?, ?)")) {
+ String groupId = duplicationGroup.getRepresentative().getModelId();
+ for (IFileInfo f : duplicationGroup) {
+ String id = f.getModelId();
+ preparedStatement.setString(1, id);
+ preparedStatement.setString(2, groupId);
+ preparedStatement.setString(3, runId);
+ preparedStatement.execute();
+ }
+ }
+ }
+
+}