Skip to content

Commit

Permalink
Merge pull request #19 from europeana/migration-fix
Browse files Browse the repository at this point in the history
Several general improvement including migration
  • Loading branch information
nshweta90 authored Jul 4, 2024
2 parents 3f011e3 + 056067a commit bda0b41
Show file tree
Hide file tree
Showing 75 changed files with 2,834 additions and 866 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ public interface CC

public static final String License = "License";
public static final String deprecatedOn = "deprecatedOn";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public interface FormatWriter<T> {

/**
* Method for serialising list of values
* @param value list of value to be formatted
* @param value list of objects to be serialised
* @param out output stream
* @throws IOException
*/
Expand Down
5 changes: 5 additions & 0 deletions record-api-common/src/main/resources/mediacategories.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
<format mediaType="application/dash+xml" label="MPEG-DASH" type="Video" support="Browser"/>
<format mediaType="video/x-flv" label="FLV" type="Video" support="Rendered"/>
<format mediaType="audio/x-ms-wma" label="WMA" type="Sound" support="Rendered"/>
<format mediaType="audio/amr" label="AMR" type="Sound" support="Rendered"/> <!-- new -->
<format mediaType="audio/x-aiff" label="AIFF" type="Sound" support="Browser"/> <!-- new (the browser should be able to play) -->
<format mediaType="video/x-matroska" label="MKV" type="Video" support="Rendered"/> <!-- new (browser cannot natively played it but it is possible with plugin) -->
<format mediaType="video/x-ms-wmv" label="WMV" type="Video" support="Rendered"/>
<format mediaType="video/x-msvideo" label="AVI" type="Video" support="Rendered"/>
<format mediaType="image/jp2" label="JP2000" type="Image" support="Rendered"/>
Expand All @@ -42,4 +45,6 @@
<format mediaType="application/epub+zip" label="EPUB" type="Text" support="Rendered"/>
<format type="Video" support="EUScreen"/>
<format type="Sound" support="EUScreen"/>
<format mediaType="application/json+oembed" label="oEmbed" support="Rendered"/>
<format mediaType="application/xml+oembed" label="oEmbed" support="Rendered"/>
</config>
53 changes: 53 additions & 0 deletions record-api-migration/dependency-reduced-pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>record-api</artifactId>
<groupId>eu.europeana.api</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>record-api-migration</artifactId>
<description>This is a temporary module to migrate all the data from the DB</description>
<build>
<plugins>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>3.6.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
<configuration>
<shadedArtifactAttached>false</shadedArtifactAttached>
<transformers>
<transformer>
<mainClass>eu.europeana.api.record.migration.MigrationCommand</mainClass>
</transformer>
<transformer />
<transformer />
<transformer>
<addHeader>false</addHeader>
</transformer>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>**/Log4j2Plugins.dat</exclude>
</excludes>
</filter>
</filters>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<maven.compiler.target>17</maven.compiler.target>
<maven.compiler.source>17</maven.compiler.source>
<spring.boot.mainclass>eu.europeana.api.record.migration.RunMigration</spring.boot.mainclass>
</properties>
</project>
81 changes: 79 additions & 2 deletions record-api-migration/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,89 @@
<spring.boot.mainclass>eu.europeana.api.record.migration.RunMigration</spring.boot.mainclass>
</properties>

<build>
<plugins>

<!--
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>eu.europeana.api.record.migration.MigrationCommand</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
-->

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.6.0</version>
<configuration>
<shadedArtifactAttached>false</shadedArtifactAttached>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>eu.europeana.api.record.migration.MigrationCommand</mainClass>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer" />
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
<addHeader>false</addHeader>
</transformer>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>**/Log4j2Plugins.dat</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>

</plugins>

</build>

<!--
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>**/Log4j2Plugins.dat</exclude>
</excludes>
</filter>
</filters>
-->

<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<artifactId>spring-boot-starter</artifactId>
<exclusions>
<!-- use log4j2 instead -->
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/**
*
*/
package eu.europeana.api.record.migration;

import java.time.OffsetDateTime;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;

import org.apache.jena.rdf.model.Literal;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.sparql.vocabulary.FOAF;
import org.apache.jena.vocabulary.DCTerms;
import org.apache.jena.vocabulary.OWL;
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.SKOS;

import eu.europeana.jena.edm.EDM;
import eu.europeana.jena.edm.ORE;

import static eu.europeana.api.record.migration.JenaUtils.*;

/**
* @author Hugo
* @since 18 Jun 2024
*/
public class EdmExternalGenerator
{
private static List<Property> SINGLE_VALUE
= Arrays.asList(EDM.currentLocation, EDM.isRepresentationOf);

private static List<Property> DISMISS
= Arrays.asList(ORE.proxyFor, ORE.lineage, ORE.proxyIn, RDF.type);

public Resource generateExternal(Resource cho) {
Model m = cho.getModel();

Collection<Property> props = new HashSet();

List<Resource> proxies = asList(m.listResourcesWithProperty(ORE.proxyFor, cho));
for ( Resource proxy : proxies ) {
StmtIterator iter = proxy.listProperties();
while ( iter.hasNext() ) {
Statement stmt = iter.next();
Property prop = stmt.getPredicate();
if ( DISMISS.contains(prop) ) { continue; }

props.add(prop);
cho.addProperty(prop, stmt.getObject());
}
}

Collection<Literal> values = new HashSet();
for ( Property p : props ) {
List<Statement> stmts = asList(cho.listProperties(p));
values = getValues(cho, stmts, values);

for ( Statement stmt : stmts ) {
RDFNode node = stmt.getObject();
if ( !isDuplicate(node, values, stmts) ) { continue; }
m.remove(stmt);
}

values.clear();
}

//take care of single valued properties
//Example: /293/item_2D7OVZT2VJRFZH5CXHZMHWCAXZESTNU5
for ( Property p : SINGLE_VALUE ) {
StmtIterator iter = cho.listProperties(p);
if ( !iter.hasNext() ) { continue; }

List<Statement> stmts = asList(iter);
for ( int i = stmts.size() - 1; i > 0; i--) {
m.remove(stmts.get(1));
}
}

// add aggregation
String uri = cho.getURI();
String aggrURI = uri.replace("http://data.europeana.eu/item/"
, "http://data.europeana.eu/aggregation/");
Resource aggr = m.getResource(aggrURI);
aggr.addProperty(RDF.type, ORE.Aggregation);


String dt = RecordJenaProcessor.DATETIME_FORMAT.format(OffsetDateTime.now());
Literal now = m.createLiteral(dt/*, XSD.dateTime.getURI()*/);
aggr.addProperty(DCTerms.created, now);
aggr.addProperty(DCTerms.modified, now);
cho.addProperty(ORE.isAggregatedBy, aggr);

return cho;
}

private Collection<Literal> getValues(Resource cho
, Collection<Statement> stmts
, Collection<Literal> values) {
for ( Statement stmt : stmts ) {
RDFNode node = stmt.getObject();
if ( !node.isResource() ) { continue; }

Resource r = node.asResource();
getLanguageTaggedLiterals(values, r.listProperties(SKOS.prefLabel));
getLanguageTaggedLiterals(values, r.listProperties(SKOS.altLabel));
getLanguageTaggedLiterals(values, r.listProperties(SKOS.hiddenLabel));
}
return values;
}


private boolean isDuplicate(RDFNode node, Collection<Literal> values
, List<Statement> stmts) {
if ( node.isLiteral() && isDuplicateLiteral(node.asLiteral(), values) ) {
return true;
}

if ( node.isURIResource() && isDuplicate(node.asResource(), stmts) ) {
return true;
}
return false;
}

private boolean isDuplicateLiteral(Literal l, Collection<Literal> values) {
if ( hasDatatype(l) ) { return false; }

return ( hasLanguage(l) ? isDuplicate(l, values)
: isDupLangIgnore(l, values) );
}

private boolean isDupLangIgnore(Literal l1, Collection<Literal> list) {
for ( Literal l2 : list ) {
if ( isDupLangIgnore(l1, l2) ) { return true; }
}
return false;
}

private boolean isDupLangIgnore(Literal l1, Literal l2) {
return l1.getString().equalsIgnoreCase(l2.getString());
}

//Language aware duplicate check
private boolean isDuplicate(Literal l1, Collection<Literal> list) {
for ( Literal l2 : list ) {
if ( isDuplicate(l1, l2) ) { return true; }
}
return false;
}

//Language aware duplicate check
private boolean isDuplicate(Literal l1, Literal l2) {
if ( l1.getLanguage().equals(l2.getLanguage()) ) {
return l1.getString().equalsIgnoreCase(l2.getString());
}
return false;
}


private boolean isDuplicate(Resource r, List<Statement> stmts) {
if ( r.getURI().startsWith("http://data.europeana.eu/") ) { return false; }

for ( Statement stmt : stmts ) {
RDFNode node = stmt.getObject();
if ( r == null || !node.isURIResource() ) { continue; }

Resource r2 = node.asResource();
if ( r2.hasProperty(OWL.sameAs, r)
|| r2.hasProperty(org.apache.jena.vocabulary.SKOS.exactMatch, r) ) { return true; }
}
return false;
}

public boolean isSingleValue(Property prop) {
return SINGLE_VALUE.contains(prop);
}
}
Loading

0 comments on commit bda0b41

Please sign in to comment.