Skip to content

Commit

Permalink
Fix : Ajout gestion des approximations dans requête DAT + suppression…
Browse files Browse the repository at this point in the history
…s de la casse
  • Loading branch information
pierre-maraval committed Jan 9, 2025
1 parent 7ac8657 commit 5aa3377
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public List<String> datToPpn(Integer date, String auteur, String titre) throws U
if (titre == null || titre.isEmpty()) {
throw new IllegalArgumentException("Le titre ne peut pas être null");
}
log.debug("params : date : " + date + " auteur : " + auteur + " titre : " + titre);
StringBuilder request = new StringBuilder("SELECT DISTINCT a.PPN");
if (date != null) {
request.append(" FROM AUTORITES.biblio_table_generale a");
Expand Down
21 changes: 15 additions & 6 deletions src/main/java/fr/abes/sudoc/utils/Utilitaire.java
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public static String replaceDiacritics(String src) {

public static String formatString(String chaine) {
if (chaine != null && !chaine.isEmpty()) {
return suppReservedWords(suppCaracters(chaine));
return ajoutNearBetweenWords(banalisationReservedWords(suppCaracters(chaine)));
}
return chaine;
}
Expand All @@ -112,12 +112,21 @@ private static String suppCaracters(String chaine) {
return chaine.replaceAll(regex, "");
}

private static String suppReservedWords(String chaine) {
List<String> wordsToRemove = Arrays.asList("ABOUT", "ACCUM", "AND", "BT", "BTG", "BTI", "BTP", "EQUIV", "FUZZY", "HASPATH", "INPATH", "MDATA", "MINUS", "NEAR", "NOT", "NT", "NTG", "NTI", "NTP", "OR", "PATTERN", "PT", "RT", "SQE", "SYN", "TR", "TRSYN", "TT", "WITHIN");
// Construire une expression régulière avec les caractères à supprimer
String regex = "\\b(" + String.join("|", wordsToRemove) + ")\\b";
return chaine.replaceAll("(?i)" + regex, "").replaceAll("\\s+", " ").trim();
private static String banalisationReservedWords(String chaine) {
List<String> reservedWords = Arrays.asList("ABOUT", "ACCUM", "AND", "BT", "BTG", "BTI", "BTP", "EQUIV", "FUZZY", "HASPATH", "INPATH", "MDATA", "MINUS", "NEAR", "NOT", "NT", "NTG", "NTI", "NTP", "OR", "PATTERN", "PT", "RT", "SQE", "SYN", "TR", "TRSYN", "TT", "WITHIN");
// Transformer la liste des mots réservés en une expression régulière
String regex = "\\b(" + String.join("|", reservedWords) + ")\\b";

// Remplacer les mots réservés par eux-mêmes entourés d'accolades
return chaine.toUpperCase().replaceAll(regex, "{$1}");
}

private static String ajoutNearBetweenWords(String chaine) {
// Split la chaîne en mots en éliminant les espaces multiples
String[] words = chaine.trim().split("\\s+");

// Utilise String.join pour insérer "NEAR" entre les mots
return String.join(" NEAR ", words);
}


Expand Down
6 changes: 3 additions & 3 deletions src/test/java/fr/abes/sudoc/utils/UtilitaireTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ void testReplaceDiacritics() {
@Test
@DisplayName("test reformatage chaine DAT")
void testReformatageChaineDat() {
Assertions.assertEquals("Assez de ces fichus TU", Utilitaire.formatString("Assez ! de _ ces ~fichus *TU"));
Assertions.assertEquals("Je suis content", Utilitaire.formatString("Je AND suis! WITHIN content"));
Assertions.assertEquals("Je suis content", Utilitaire.formatString("Je AND suis! 'WITHIN' content"));
Assertions.assertEquals("Assez NEAR de NEAR ces NEAR fichus NEAR TU".toUpperCase(), Utilitaire.formatString("Assez ! de _ ces ~fichus *TU"));
Assertions.assertEquals("Je NEAR {AND} NEAR suis NEAR {WITHIN} NEAR content".toUpperCase(), Utilitaire.formatString("Je AND suis! within content"));
Assertions.assertEquals("Je NEAR {AND} NEAR suis NEAR {WITHIN} NEAR content".toUpperCase(), Utilitaire.formatString("Je AND suis! 'WITHIN' content"));
}
}

0 comments on commit 5aa3377

Please sign in to comment.