Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MCR-3198 event handler for merging duplicate categories #2267

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,7 @@ public Source resolve(String href, String base) {
}
} catch (Exception ex) {
LOGGER.info("MCRNotNullResolver caught exception: {}", ex.getLocalizedMessage());
LOGGER.debug(ex.getStackTrace());
LOGGER.debug(ex);
LOGGER.debug("MCRNotNullResolver returning empty xml");
return new JDOMSource(new Element("null"));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
package org.mycore.mods.merger;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jdom2.Element;
import org.mycore.common.MCRConstants;
import org.mycore.common.events.MCREvent;
import org.mycore.common.events.MCREventHandlerBase;
import org.mycore.datamodel.classifications2.MCRCategoryID;
import org.mycore.datamodel.metadata.MCRObject;
import org.mycore.mods.MCRMODSWrapper;
import org.mycore.mods.classification.MCRClassMapper;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;

/**
* Checks for and removes redundant classifications in Mods-Documents. If a classification category and
* the classification's child category are both present in the document, the parent classification will
* be removed. <br><br>
* This abstract class can be extended by overriding the following abstract methods:
* <ol>
* <li>{@link MCRAbstractRedundantModsEventHandler#isConsistent(Element, Element)}: custom checks for
* the two classifications' consistency.</li>
* <li>{@link MCRAbstractRedundantModsEventHandler#getClassificationElementName()}: the name of
* the mods-element that the EventHandler is checking duplicates for.</li>
* </ol>
*/
public abstract class MCRAbstractRedundantModsEventHandler extends MCREventHandlerBase {

private final Logger logger = LogManager.getLogger(getClass());

@Override
protected void handleObjectCreated(MCREvent evt, MCRObject obj) {
mergeCategories(obj);
}

@Override
protected void handleObjectUpdated(MCREvent evt, MCRObject obj) {
mergeCategories(obj);
}

@Override
protected void handleObjectRepaired(MCREvent evt, MCRObject obj) {
mergeCategories(obj);
}

/**
* Merging classifications by detaching parent-categories inside an {@link MCRObject}.
* Mods-element is traversed for classifications, found relatedItems are processed separately from
* the rest of the document.
* @param obj the handled object
*/
protected void mergeCategories(MCRObject obj) {
MCRMODSWrapper mcrmodsWrapper = new MCRMODSWrapper(obj);
if (mcrmodsWrapper.getMODS() == null) {
return;
}
logger.info("merge redundant " + getClassificationElementName() + " categories for {}", obj.getId());

Element mods = mcrmodsWrapper.getMODS();
List<Element> supportedElements = getAllDescendants(mods).stream()
.filter(element -> element.getName().equals(getClassificationElementName()))
.filter(element -> MCRClassMapper.getCategoryID(element) != null).toList();
dropRedundantCategories(supportedElements);

List<Element> relatedItems = getAllRelatedItems(mods);
for (Element relatedItem : relatedItems) {
if (relatedItem.getAttribute("href", MCRConstants.XLINK_NAMESPACE) == null) {
dropRedundantCategories(getAllDescendants(relatedItem));
}
}
}

/**
* Recursively writes all child-elements of a given element into a list and returns the list once completed.
* @param element the parent element for which all children should be listed
* @return a list with all child-elements
*/
protected static List<Element> getAllDescendants(Element element) {
List<Element> descendants = new ArrayList<>();

for (Element child : element.getChildren()) {
if (!child.getName().equals("relatedItem")) {
descendants.add(child);
descendants.addAll(getAllDescendants(child));
}
}
return descendants;
}

/**
* Returns all relatedItem-Elements from a mods-Element. Assumes that relatedItems are only used at top-level.
* @param mods The mods-Element to be searched for relatedItems
* @return a List of all Elements with the name "relatedItem"
*/
protected static List<Element> getAllRelatedItems(Element mods) {
return mods.getChildren().stream().filter(child -> "relatedItem".equals(child.getName())).toList();
}

/**
* Iterates through a list of classification elements and for each element pair checks if one of the element
* is a parent category of the other. Calls
* {@link MCRAbstractRedundantModsEventHandler#isConsistent(Element, Element)} and only detaches parent element
* if the method returns true.
* @param elements a list of classification elements that are all compared to each other in pairs
*/
protected void dropRedundantCategories(List<Element> elements) {
for (int i = 0; i < elements.size(); i++) {
for (int j = i + 1; j < elements.size(); j++) {

Element element1 = elements.get(i);
Element element2 = elements.get(j);
Element parentElement = MCRCategoryMerger.getElementWithParentCategory(element1, element2);
if (parentElement != null && isConsistent(element1, element2)) {
parentElement.detach();
}
}
}
}

/**
* Parses the authority name from an element.
* @param element the element using an authority
* @return the String of the authority
*/
protected String getAuthority(Element element) {
return element.getAttributeValue("authorityURI") != null ?
element.getAttributeValue("authorityURI") : element.getAttributeValue("authority");
}

/**
* Compares two classification elements for the same authority
* @param el1 first element to be compared
* @param el2 second element to be compared
* @return true if both have the same authority, or if none of them has an authority
*/
protected boolean hasSameAuthority(Element el1, Element el2) {
return Objects.equals(el1.getAttributeValue("authorityURI"),
el2.getAttributeValue("authorityURI")) &&
Objects.equals(el1.getAttributeValue("authority"), el2.getAttributeValue("authority"));
}

/**
* Get the name of an element's classification or return value "unknown".
* @param element the element that contains the classification
* @return the name of the classification or the string "unknown"
*/
protected String getClassificationName(Element element) {
return Optional.ofNullable(MCRClassMapper.getCategoryID(element)).map(MCRCategoryID::toString)
.orElse("unknown");
}

/**
* Method can be overridden to implement custom checks to two categories' consistency regarding attributes.
* @param element1 the first element to be compared
* @param element2 the first element to be compared
* @return will always return true
*/
protected abstract boolean isConsistent(Element element1, Element element2);

/**
* Returns the name of the classification element that the specific EventHandler is handling.
* @return name of the classification element
*/
protected abstract String getClassificationElementName();
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@
package org.mycore.mods.merger;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

import org.jdom2.Element;
import org.mycore.common.config.MCRConfiguration2;
import org.mycore.datamodel.classifications2.MCRCategory;
import org.mycore.datamodel.classifications2.MCRCategoryDAO;
Expand Down Expand Up @@ -88,12 +91,39 @@ static boolean oneIsDescendantOfTheOther(MCRCategoryID idThis, MCRCategoryID idO
}

private static List<MCRCategory> getAncestorsAndSelf(MCRCategoryID categoryID) {
List<MCRCategory> ancestorsAndSelf = new ArrayList<>(DAO.getParents(categoryID));
List<MCRCategory> ancestorsAndSelf = new ArrayList<>(Optional.ofNullable(DAO.getParents(categoryID)).orElse(
Collections.emptyList()));
ancestorsAndSelf.remove(DAO.getRootCategory(categoryID, 0));
ancestorsAndSelf.add(DAO.getCategory(categoryID, 0));
return ancestorsAndSelf;
}

/**
* Compares two {@link Element Elements} that are assumed to be categories.
* If it is determined that one Element is a parent category of the other, return the parent, else return null.
* @param element1 first Element to compare
* @param element2 second Element to compare
* @return the parent Element or null
*/
public static Element getElementWithParentCategory(Element element1, Element element2) {
MCRCategoryID idThis = MCRClassMapper.getCategoryID(element1);
MCRCategoryID idOther = MCRClassMapper.getCategoryID(element2);
if (idThis == null || idOther == null) {
return null;
}

final String p = CONFIG_PREFIX + idThis.getRootID();
if (idThis.getRootID().equals(idOther.getRootID()) && !MCRConfiguration2.getBoolean(p).orElse(true)) {
return null;
}

if (idThis.equals(idOther) || !oneIsDescendantOfTheOther(idThis, idOther)) {
return null;
}

return getAncestorsAndSelf(idThis).containsAll(getAncestorsAndSelf(idOther)) ? element2 : element1;
}

@Override
public void mergeFrom(MCRMerger other) {
MCRCategoryMerger cmo = (MCRCategoryMerger) other;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.mycore.mods.merger;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jdom2.Element;

import java.util.Objects;

/**
* Checks for and removes redundant classifications in Mods-Documents. If a classification category and
* the classification's child category are both present in the document, the parent classification will
* be removed.
*/
public class MCRRedundantModsClassificationEventHandler extends MCRAbstractRedundantModsEventHandler {

private static final Logger LOGGER = LogManager.getLogger(MCRRedundantModsClassificationEventHandler.class);

protected static final String CLASSIFICATION_ELEMENT_NAME = "classification";

@Override
protected String getClassificationElementName() {
return CLASSIFICATION_ELEMENT_NAME;
}

/**
* Returns false if the authorities of the two classification elements are the same, but
* the displayLabels differ from each other.
* @param el1 the first element to be compared
* @param el2 the first element to be compared
* @return false if inconsistent
*/
@Override
protected boolean isConsistent(Element el1, Element el2) {
return !hasSameAuthority(el1, el2) || checkDisplayLabelConsistence(el1, el2);
}

/**
* Checks if both elements have the same displayLabel. Logs a warning if not.
* @param el1 first element to check
* @param el2 second element to check
* @return true, if both elements have the same displayLabel (or both have none)
*/
private boolean checkDisplayLabelConsistence(Element el1, Element el2) {
final String displayLabel1 = el1.getAttributeValue("displayLabel");
final String displayLabel2 = el2.getAttributeValue("displayLabel");

final String classificationName1 = getClassificationName(el1);
final String classificationName2 = getClassificationName(el2);

if (!Objects.equals(displayLabel1, displayLabel2)) {

String logMessage = """
There are inconsistencies found between the classifications {} and {}.
They have the same authority "{}" but {} has the displayLabel "{}" and {} has the displayLabel "{}".""";

LOGGER.warn(logMessage, classificationName1, classificationName2, getAuthority(el1),
classificationName1, displayLabel1, classificationName2, displayLabel2);
return false;
}
return true;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package org.mycore.mods.merger;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jdom2.Element;

import java.util.Objects;

/**
* Checks for and removes redundant genres in Mods-Documents. If a genre category and
* the genre's child category are both present in the document, the parent genre will
* be removed.
*/
public class MCRRedundantModsGenreEventHandler extends MCRAbstractRedundantModsEventHandler {

private static final Logger LOGGER = LogManager.getLogger(MCRRedundantModsGenreEventHandler.class);

protected static final String CLASSIFICATION_ELEMENT_NAME = "genre";

@Override
protected String getClassificationElementName() {
return CLASSIFICATION_ELEMENT_NAME;
}

/**
* Returns false if the authorities of the two genre elements are the same, but
* the displayLabels or the types differ from each other.
* @param el1 the first element to be compared
* @param el2 the first element to be compared
* @return false if inconsistent
*/
@Override
protected boolean isConsistent(Element el1, Element el2) {
return !hasSameAuthority(el1, el2) ||
(checkDisplayLabelConsistence(el1, el2) && checkTypeConsistence(el1, el2));
}

/**
* Checks if both elements have the same displayLabel. Logs a warning if not.
* @param el1 first element to check
* @param el2 second element to check
* @return true, if both elements have the same displayLabel (or both have none)
*/
private boolean checkDisplayLabelConsistence(Element el1, Element el2) {
final String displayLabel1 = el1.getAttributeValue("displayLabel");
final String displayLabel2 = el2.getAttributeValue("displayLabel");

final String classificationName1 = getClassificationName(el1);
final String classificationName2 = getClassificationName(el2);

if (!Objects.equals(displayLabel1, displayLabel2)) {

String logMessage = """
There are inconsistencies found between the classifications {} and {}.
They have the same authority "{}" but {} has the displayLabel "{}" and {} has the displayLabel "{}".""";

LOGGER.warn(logMessage, classificationName1, classificationName2, getAuthority(el1),
classificationName1, displayLabel1, classificationName2, displayLabel2);
return false;
}
return true;
}

/**
* Checks if both elements have the same type. Logs a warning if not.
* @param el1 first element to check
* @param el2 second element to check
* @return true, if both elements have the same type (or both have none)
*/
private boolean checkTypeConsistence(Element el1, Element el2) {
final String type1 = el1.getAttributeValue("type");
final String type2 = el2.getAttributeValue("type");

final String classificationName1 = getClassificationName(el1);
final String classificationName2 = getClassificationName(el2);

if (!Objects.equals(type1, type2)) {
String logMessage = """
There are inconsistencies found between the classifications {} and {}.
They have the same authority "{}" but {} has the type "{}" and {} has the type "{}".""";

LOGGER.warn(logMessage, classificationName1, classificationName2, getAuthority(el1),
classificationName1, type1, classificationName2, type2);

return false;
}
return true;
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
MCR.Metadata.Type.mods=true
MCR.Metadata.ShareAgent.mods=org.mycore.mods.MCRMODSMetadataShareAgent
MCR.EventHandler.MCRObject.040.Class=org.mycore.mods.MCRMODSLinksEventHandler
# MCR.EventHandler.MCRObject.016a.Class=org.mycore.mods.merger.MCRRedundantModsClassificationEventHandler
# MCR.EventHandler.MCRObject.016b.Class=org.mycore.mods.merger.MCRRedundantModsGenreEventHandler
MCR.MODS.NewObjectType=mods
MCR.MODS.Types=mods

Expand Down
Loading