Skip to content

Commit d384e53

Browse files
committed
Merge branch 'develop'
2 parents de458bb + 3c5b351 commit d384e53

27 files changed

+7345
-360
lines changed

goobi-viewer-indexer/pom.xml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<modelVersion>4.0.0</modelVersion>
77
<groupId>io.goobi.viewer</groupId>
88
<artifactId>viewer-indexer</artifactId>
9-
<version>24.02</version>
9+
<version>24.03-SNAPSHOT</version>
1010

1111

1212
<name>Goobi viewer - Indexer</name>
@@ -41,14 +41,14 @@
4141

4242
<!-- intranda libraries -->
4343
<alto.version>1.5.13</alto.version>
44-
<iiif-api-model.version>2.6.2</iiif-api-model.version>
44+
<iiif-api-model.version>2.6.3</iiif-api-model.version>
4545
<normdataimporter.version>1.10.9</normdataimporter.version>
4646

4747
<!-- other libraries -->
4848
<angus-mail.version>2.0.3</angus-mail.version>
4949
<commons-beanutils.version>1.9.4</commons-beanutils.version>
50-
<commons-configuration2.version>2.10.0</commons-configuration2.version>
51-
<commons-io.version>2.15.1</commons-io.version>
50+
<commons-configuration2.version>2.10.1</commons-configuration2.version>
51+
<commons-io.version>2.16.0</commons-io.version>
5252
<commons-jxpath.version>1.3</commons-jxpath.version>
5353
<commons-lang3.version>3.14.0</commons-lang3.version>
5454
<commons-text.version>1.11.0</commons-text.version>
@@ -67,7 +67,7 @@
6767
<junit.version>5.10.2</junit.version>
6868
<metadata-extractor.version>2.19.0</metadata-extractor.version>
6969
<solr.version>9.5.0</solr.version>
70-
<sf-geojson.version>3.3.2</sf-geojson.version>
70+
<sf-geojson.version>3.3.3</sf-geojson.version>
7171
</properties>
7272

7373

@@ -300,7 +300,7 @@
300300
</plugin>
301301
<plugin>
302302
<artifactId>maven-compiler-plugin</artifactId>
303-
<version>3.12.1</version>
303+
<version>3.13.0</version>
304304
<configuration>
305305
<compilerArgument>-Xlint:deprecation</compilerArgument>
306306
<compilerArgument>-Xlint:unchecked</compilerArgument>
@@ -390,7 +390,7 @@
390390
</plugin>
391391
<plugin>
392392
<artifactId>maven-assembly-plugin</artifactId>
393-
<version>3.7.0</version>
393+
<version>3.7.1</version>
394394
<executions>
395395
<execution>
396396
<phase>package</phase>
@@ -422,7 +422,7 @@
422422
<plugin>
423423
<groupId>org.jacoco</groupId>
424424
<artifactId>jacoco-maven-plugin</artifactId>
425-
<version>0.8.11</version>
425+
<version>0.8.12</version>
426426
<executions>
427427
<execution>
428428
<goals>
@@ -477,7 +477,7 @@
477477
<dependency>
478478
<groupId>com.puppycrawl.tools</groupId>
479479
<artifactId>checkstyle</artifactId>
480-
<version>10.14.1</version>
480+
<version>10.15.0</version>
481481
</dependency>
482482
</dependencies>
483483
<executions>

goobi-viewer-indexer/src/main/java/io/goobi/viewer/indexer/CmsPageIndexer.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ public String[] index(Path cmsFile, Map<String, Path> dataFolders, final ISolrWr
185185
IndexObject indexObj = new IndexObject(getNextIddoc(SolrIndexerDaemon.getInstance().getSearchIndex()));
186186
logger.debug("IDDOC: {}", indexObj.getIddoc());
187187

188+
indexObj.setSourceDocFormat(FileFormat.CMS);
189+
188190
// LOGID
189191
indexObj.setLogId("LOG0000");
190192

@@ -224,9 +226,7 @@ public String[] index(Path cmsFile, Map<String, Path> dataFolders, final ISolrWr
224226
ret[1] = "PI not found.";
225227
throw new IndexerException(ret[1]);
226228
}
227-
228-
// Set source doc format
229-
indexObj.addToLucene(SolrConstants.SOURCEDOCFORMAT, FileFormat.CMS.name());
229+
230230
prepareUpdate(indexObj);
231231

232232
// Set title

goobi-viewer-indexer/src/main/java/io/goobi/viewer/indexer/DenkXwebIndexer.java

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@
2929
import java.util.List;
3030
import java.util.Map;
3131
import java.util.Set;
32-
import java.util.regex.Matcher;
33-
import java.util.regex.Pattern;
3432

3533
import org.apache.commons.io.FilenameUtils;
3634
import org.apache.commons.lang3.StringUtils;
@@ -241,9 +239,7 @@ public String[] index(Document doc, Map<String, Path> dataFolders, ISolrWriteStr
241239
}
242240
pi = MetadataHelper.applyIdentifierModifications(pi);
243241
// Do not allow identifiers with illegal characters
244-
Pattern p = Pattern.compile("[^\\w|-]");
245-
Matcher m = p.matcher(pi);
246-
if (m.find()) {
242+
if (!Utils.validatePi(pi)) {
247243
ret[1] = "PI contains illegal characters: " + pi;
248244
throw new IndexerException(ret[1]);
249245
}
@@ -282,9 +278,6 @@ public String[] index(Document doc, Map<String, Path> dataFolders, ISolrWriteStr
282278
logger.info("Solr write strategy injected by caller: {}", writeStrategy.getClass().getName());
283279
}
284280

285-
// Set source doc format
286-
indexObj.addToLucene(SolrConstants.SOURCEDOCFORMAT, FileFormat.DENKXWEB.name());
287-
288281
prepareUpdate(indexObj);
289282

290283
// Process TEI files
@@ -592,6 +585,8 @@ private static List<LuceneField> mapPagesToDocstruct(IndexObject indexObj, ISolr
592585
* @throws FatalIndexerException
593586
*/
594587
private static void setSimpleData(IndexObject indexObj) throws FatalIndexerException {
588+
indexObj.setSourceDocFormat(FileFormat.DENKXWEB);
589+
595590
Element structNode = indexObj.getRootStructNode();
596591

597592
// Set type

goobi-viewer-indexer/src/main/java/io/goobi/viewer/indexer/DublinCoreIndexer.java

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@
3232
import java.util.List;
3333
import java.util.Map;
3434
import java.util.Set;
35-
import java.util.regex.Matcher;
36-
import java.util.regex.Pattern;
3735

3836
import org.apache.commons.io.FilenameUtils;
3937
import org.apache.commons.lang3.StringUtils;
@@ -238,9 +236,7 @@ public String[] index(Path dcFile, Map<String, Path> dataFolders, final ISolrWri
238236
logger.info("Record PI: {}", pi);
239237

240238
// Do not allow identifiers with characters that cannot be used in file names
241-
Pattern p = Pattern.compile("[^\\w|-]");
242-
Matcher m = p.matcher(pi);
243-
if (m.find()) {
239+
if (!Utils.validatePi(pi)) {
244240
ret[1] = new StringBuilder("PI contains illegal characters: ").append(pi).toString();
245241
throw new IndexerException(ret[1]);
246242
}
@@ -278,9 +274,7 @@ public String[] index(Path dcFile, Map<String, Path> dataFolders, final ISolrWri
278274
checkOldDataFolder(dataFolders, DataRepository.PARAM_CMS, pi);
279275
checkOldDataFolder(dataFolders, DataRepository.PARAM_TEIMETADATA, pi);
280276
checkOldDataFolder(dataFolders, DataRepository.PARAM_ANNOTATIONS, pi);
281-
282-
// Set source doc format
283-
indexObj.addToLucene(SolrConstants.SOURCEDOCFORMAT, FileFormat.DUBLINCORE.name());
277+
284278
prepareUpdate(indexObj);
285279

286280
// Process TEI files
@@ -717,11 +711,12 @@ void generatePageDocument(Element eleImage, String iddoc, String pi, Integer ord
717711
* Sets DMDID, ID, TYPE and LABEL from the METS document.
718712
*
719713
* @param indexObj {@link IndexObject}
720-
* @throws FatalIndexerException
721714
*/
722-
private static void setSimpleData(IndexObject indexObj) throws FatalIndexerException {
715+
private static void setSimpleData(IndexObject indexObj) {
723716
logger.trace("setSimpleData(IndexObject) - start");
724717

718+
indexObj.setSourceDocFormat(FileFormat.DUBLINCORE);
719+
725720
// LOGID
726721
indexObj.setLogId("LOD_0000");
727722
logger.trace("LOGID: {}", indexObj.getLogId());

0 commit comments

Comments
 (0)