diff --git a/bin/generate.sh b/bin/generate.sh index 1882113..2e8333f 100755 --- a/bin/generate.sh +++ b/bin/generate.sh @@ -11,4 +11,5 @@ mvn test exec:java -Dexec.mainClass=modelset.process.ComputeUMLStats mvn test exec:java -Dexec.mainClass=modelset.process.ComputeTxt mvn test exec:java -Dexec.mainClass=modelset.process.ComputeGraph mvn test exec:java -Dexec.mainClass=modelset.process.ComputeUMLGraph +mvn test exec:java -Dexec.mainClass=modelset.process.ComputeDuplicates popd diff --git a/java-lib/modelset-lib/pom.xml b/java-lib/modelset-lib/pom.xml index c3d539e..eaef3c2 100644 --- a/java-lib/modelset-lib/pom.xml +++ b/java-lib/modelset-lib/pom.xml @@ -1,6 +1,4 @@ - + 4.0.0 io.github.models-lab.modelset modelset-lib @@ -54,6 +52,24 @@ mar-modelling 1.0-SNAPSHOT + + + io.github.models-lab.mar + mar-modelling-transformations + 1.0-SNAPSHOT + + + + + org.agrona + agrona + 1.15.0 + + @@ -109,4 +125,4 @@ - + \ No newline at end of file diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/ComputeDuplicates.java b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeDuplicates.java new file mode 100644 index 0000000..db56617 --- /dev/null +++ b/java-lib/modelset-lib/src/main/java/modelset/process/ComputeDuplicates.java @@ -0,0 +1,67 @@ +package modelset.process; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.sql.SQLException; +import java.util.Collection; + +import org.eclipse.emf.ecore.resource.Resource; + +import mar.analysis.duplicates.DuplicateFinder.DuplicationGroup; +import mar.analysis.duplicates.EcoreDuplicateFinder; +import mar.indexer.common.configuration.ModelLoader; +import mar.validation.AnalyserRegistry; +import mar.validation.IFileInfo; +import mar.validation.ResourceAnalyser.Factory; + +public class ComputeDuplicates { + + private static final double T0 = 0.8; + private static final double T1 = 0.7; + + public static void main(String[] args) throws Exception { + File repoFolder = new File("../../raw-data/repo-ecore-all"); + File db = new File("../../datasets/dataset.ecore/data/ecore.db"); + + ModelLoader loader = ModelLoader.DEFAULT; + Collection> dups = generateDuplicates(repoFolder, db, "ecore", loader); + + new File("../../dups/").mkdirs(); + + File outputFile = new File("../../dups/ecore-dups.db"); + if (outputFile.exists()) + outputFile.delete(); + + DuplicationDatabase ddb = new DuplicationDatabase(outputFile); + String groupId = "ecore_" + T0 + "_" + T1; + ddb.addDuplicationRun(groupId, T0, T1); + for (DuplicationGroup duplicationGroup : dups) { + ddb.addGroup(groupId, duplicationGroup); + } + + ddb.close(); + + System.out.println("Finished"); + } + + private static Collection> generateDuplicates(File repoFolder, File db, String modelType, ModelLoader loader) + throws SQLException, IOException, FileNotFoundException { + + EcoreDuplicateFinder finder = new EcoreDuplicateFinder<>(); + + Factory factory = AnalyserRegistry.INSTANCE.getFactory(modelType); + factory.configureEnvironment(); + + System.out.println("Loading files..."); + ModelSetFileProvider provider = new ModelSetFileProvider(db, repoFolder); + for (IFileInfo f : provider.getLocalFiles()) { + Resource r = loader.load(f.getFullFile()); + finder.addResource(f, r); + r.unload(); + } + + System.out.println("Computing duplicates..."); + return finder.getDuplicates(T0, T1); + } +} diff --git a/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDatabase.java b/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDatabase.java new file mode 100644 index 0000000..d86f1de --- /dev/null +++ b/java-lib/modelset-lib/src/main/java/modelset/process/DuplicationDatabase.java @@ -0,0 +1,98 @@ +package modelset.process; + +import java.io.File; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; + +import javax.annotation.Nonnull; + +import mar.analysis.duplicates.DuplicateFinder.DuplicationGroup; +import mar.validation.IFileInfo; + +/** + * A wrapper to handle the duplication database. + * + * @author jesus + * + */ +public class DuplicationDatabase implements AutoCloseable { + private Connection connection; + + @Nonnull + public DuplicationDatabase(File file) { + String url = getConnectionString(file); + + try { + Connection conn = DriverManager.getConnection(url); + if (conn != null) { + if (! file.exists()) { + DatabaseMetaData meta = conn.getMetaData(); + System.out.println("The driver name is " + meta.getDriverName()); + System.out.println("A new database has been created."); + } + + String duplication_run = "CREATE TABLE IF NOT EXISTS duplication_run (\n" + + " id varchar(255) NOT NULL,\n" + + " t0 float NOT NULL,\n" + + " t1 float NOT NULL\n" + + ");"; + + String duplicates = "CREATE TABLE IF NOT EXISTS duplicates (\n" + + " model_id varchar(255) NOT NULL,\n" + + " group_id varchar (255) NOT NULL,\n" + + " duplication_run varchar (255) NOT NULL\n" + + ");"; + + Statement stmt = conn.createStatement(); + stmt.execute(duplication_run); + + stmt = conn.createStatement(); + stmt.execute(duplicates); + } + + this.connection = conn; + this.connection.setAutoCommit(false); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + @Override + public void close() throws SQLException { + this.connection.commit(); + this.connection.close(); + } + + @Nonnull + public static String getConnectionString(File file) { + return "jdbc:sqlite:" + file.getAbsolutePath(); + } + + + public void addDuplicationRun(String id, double t0, double t1) throws SQLException { + PreparedStatement preparedStatement = connection.prepareStatement("INSERT INTO duplication_run(id, t0, t1) VALUES (?, ?, ?)"); + preparedStatement.setString(1, id); + preparedStatement.setDouble(2, t0); + preparedStatement.setDouble(3, t1); + preparedStatement.execute(); + preparedStatement.close(); + } + + public void addGroup(String runId, DuplicationGroup duplicationGroup) throws SQLException { + try(PreparedStatement preparedStatement = connection.prepareStatement("INSERT INTO duplicates(model_id, group_id, duplication_run) VALUES (?, ?, ?)")) { + String groupId = duplicationGroup.getRepresentative().getModelId(); + for (IFileInfo f : duplicationGroup) { + String id = f.getModelId(); + preparedStatement.setString(1, id); + preparedStatement.setString(2, groupId); + preparedStatement.setString(3, runId); + preparedStatement.execute(); + } + } + } + +}