Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/updateTestsAfterEndOfWave3'
Browse files Browse the repository at this point in the history
  • Loading branch information
dr0i committed Sep 19, 2023
2 parents 2f45f28 + 2b55bcb commit 1c9c561
Show file tree
Hide file tree
Showing 52 changed files with 1,171 additions and 368 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ public void run() {
fixVariables.put("createEndTime", "1"); // 1 <=> true
fixVariables.put("institution-code", "DE-605");
// the './' is mandatory to get play to use the "conf" directory. Base is the root directory of the fix, which is "alma":
fixVariables.put("deweyLabels", "./../deweyLabels.tsv");
fixVariables.put("deweyLabels", "./maps/deweyLabels.tsv");
fixVariables.put("dnbSachgruppen", "./maps/dnbSachgruppen.tsv");
fixVariables.put("nwbib-spatial", "./../nwbib-spatial.tsv");
fixVariables.put("wd_itemLabelTypesCoordinates", "./../wd_itemLabelTypesCoordinates.tsv");
fixVariables.put("maps-institutions.tsv", "./maps/institutions.tsv");
Expand Down
54 changes: 54 additions & 0 deletions src/main/resources/alma/fix/macros.fix
Original file line number Diff line number Diff line change
Expand Up @@ -324,4 +324,58 @@ do put_macro("subjectLabel")
copy_field("$i.z","subject[].$last.label.$append")
copy_field("$i.v","subject[].$last.label.$append")
join_field("subject[].$last.label"," / ")
end

do put_macro("publication")
do list(path:"$[field]", "var":"$i")
add_field("publication[].$append.test","")
do list(path: "$i.c", "var":"$j")
replace_all("$j", "\\[|\\]|ca. |c ", "")
unless exists("publication[].$last.startDate")
if any_match("$j",".*?([01]\\d{3}|20\\d{2}).*")
paste("publication[].$last.startDate", "$j")
end
end
unless exists("publication[].$last.endDate")
if any_match("$j",".*-[ ]?([01]\\d{3}|20\\d{2})$")
paste("publication[].$last.endDate", "$j")
end
end
end
set_array("publication[].$last.type[]","PublicationEvent")
set_array("publication[].$last.location[]")
copy_field("$i.a", "publication[].$last.location[].$append")
set_array("publication[].$last.publishedBy[]")
copy_field("$i.b", "publication[].$last.publishedBy[].$append")
if exists("$i.6")
copy_field("$i.6","$i.linkageTest")
do list(path:"880??","var":"$880")
if in ("$i.linkageTest","$880.linkageTest")
if in ("$880.@script.id","alternateGraphicRepresentation[].*.script.id")
do list(path:"alternateGraphicRepresentation[]","var":"$AGR")
if in ("$880.@script.id","$AGR.script.id")
unless exists("$AGR.record.publication[]")
set_array("$AGR.record.publication[]")
end
add_field ("$AGR.record.publication[].$append.dummi","")
set_array("$AGR.record.publication[].$last.location[]")
copy_field("$880.a", "$AGR.record.publication[].$last.location[].$append")
set_array("$AGR.record.publication[].$last.publishedBy[]")
copy_field("$880.b", "$AGR.record.publication[].$last.publishedBy[].$append")
end
end
else
copy_field("$880.@script.id","alternateGraphicRepresentation[].$append.script.id")
copy_field("$880.@script.label","alternateGraphicRepresentation[].$last.script.label")
set_array("alternateGraphicRepresentation[].$last.publication[]")
add_field ("alternateGraphicRepresentation[].$last.publication[].$append.dummi","")
set_array("alternateGraphicRepresentation[].$last.publication[].$last.location[]")
copy_field("$880.a", "alternateGraphicRepresentation[].$last.publication[].$last.location[].$append")
set_array("alternateGraphicRepresentation[].$last.publication[].$last.publishedBy[]")
copy_field("$880.b", "alternateGraphicRepresentation[].$last.publication[].$last.publishedBy[].$append")
end
end
end
end
end
end
4 changes: 4 additions & 0 deletions src/main/resources/alma/fix/maps.fix
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ put_filemap("$[nwbibWikidataLabelTypeCoords.tsv]","nwbibWikidataLabel", sep_char
put_filemap("$[nwbibWikidataLabelTypeCoords.tsv]","nwbibWikidataTypes", sep_char:"\t",key_column:"0",value_column:"3",expected_columns:"-1")
put_filemap("$[nwbibWikidataLabelTypeCoords.tsv]","nwbibWikidataGeodata", sep_char:"\t",key_column:"0",value_column:"4",expected_columns:"-1")

# DDC & DNB-Sachgruppen
put_filemap("$[deweyLabels]","deweyLabels",sep_char:"\t")
put_filemap("$[dnbSachgruppen]","dnbSachgruppen",sep_char:"\t")

# Library Classifications
put_filemap("$[classification.tsv]","classificationCode2Label", sep_char:"\t",key_column:"0",value_column:"1",expected_columns:"-1")
put_filemap("$[classification.tsv]","classificationCode2Uri", sep_char:"\t",key_column:"0",value_column:"2",expected_columns:"-1")
Expand Down
40 changes: 31 additions & 9 deletions src/main/resources/alma/fix/subjects.fix
Original file line number Diff line number Diff line change
Expand Up @@ -236,17 +236,15 @@ end
# 082 - Dewey Decimal Classification Number (R) - Subfield: $a (R), $2 (NR)

do list(path:"0820 |08200", "var":"$i")
set_array("subject[].$append.type[]","Concept")
add_field("subject[].$last.source.label","Dewey-Dezimalklassifikation")
add_field("subject[].$last.source.id","https://d-nb.info/gnd/4149423-4")
do list(path:"$i.a", "var": "$j")
unless exists("subject[].$last.label")
copy_field("$i.a","subject[].$last.label")
lookup("subject[].$last.label", "$[deweyLabels]","sep_char":"\t")
copy_field("$i.a","subject[].$last.notation")
end
set_array("subject[].$append.type[]","Concept")
add_field("subject[].$last.source.label","Dewey-Dezimalklassifikation")
add_field("subject[].$last.source.id","https://d-nb.info/gnd/4149423-4")
copy_field("$j","subject[].$last.label")
lookup("subject[].$last.label", "deweyLabels", delete:"true")
copy_field("$j","subject[].$last.notation")
copy_field("$i.2","subject[].$last.version")
end
copy_field("$i.2","subject[].$last.version")
end

# 084 - Other Classification Number (R) - Subfield: $a (R), $2 (NR)
Expand Down Expand Up @@ -292,6 +290,30 @@ do list(path:"084??", "var":"$i")
add_field("subject[].$last.source.label", "RPB-Sachsystematik")
replace_all("subject[].$last.notation","rpb(.*)","$1")
end
elsif any_match("$i.2", "sdnb|zdbs")
if any_match("$j","\\d{3}(\\.\\d{1,3})?|[BKS]")
if any_equal("$i.q","DE-600")
add_field("subject[].$last.source.label","DDC-Sachgruppen der ZDB")
elsif any_equal("$i.2","zdbs")
add_field("subject[].$last.source.label","DDC-Sachgruppen der ZDB")
else
add_field("subject[].$last.source.label","Sachgruppen der DNB")
add_field("subject[].$last.source.id","https://bartoc.org/en/node/20049")
end
copy_field("$j","subject[].$last.label")
lookup("subject[].$last.label", "dnbSachgruppen")
# Also use all DNB and ZDB Sachgruppen as DDC too.
set_array("subject[].$append.type[]","Concept")
add_field("subject[].$last.source.label","Dewey-Dezimalklassifikation")
add_field("subject[].$last.source.id","https://d-nb.info/gnd/4149423-4")
copy_field("$j","subject[].$last.label")
lookup("subject[].$last.label", "deweyLabels")
copy_field("$j","subject[].$last.notation")
copy_field("$i.2","subject[].$last.version")
elsif any_match("$j","\\d{2}")
add_field("subject[].$last.source.label","Systematik der DNB (bis 2003)")
add_field("subject[].$last.source.id","https://bartoc.org/en/node/18497")
end
else
copy_field("$i.2", "subject[].$last.source.label")
lookup("subject[].$last.source.label","classificationCode2Label")
Expand Down
135 changes: 67 additions & 68 deletions src/main/resources/alma/fix/titleRelatedFields.fix
Original file line number Diff line number Diff line change
Expand Up @@ -221,86 +221,85 @@ end
# 264 - Production, Publication, Distribution, Manufacture, and Copyright Notice (R) - Subfield: $a (R), $b (R), $c (R)
# 008,18 for frequency
# 515 - Numbering Peculiarities Note (R)
# Prefer 264 over 260 since it can create duplicate info and 260 is discontinued in RDA.

set_array("publication[]")
do list(path:"260[ 23] |264[ 23][ 1]", "var":"$i")
add_field("publication[].$append.test","")
copy_field("362??.a","publication[].$last.publicationHistory")
do list(path: "$i.c", "var":"$j")
replace_all("$j", "\\[|\\]|ca. |c ", "")
unless exists("publication[].$last.startDate")
if any_match("$j",".*?([01]\\d{3}|20\\d{2}).*")
paste("publication[].$last.startDate", "$j")
if exists("264[ 23][ 1]")
call_macro("publication",field:"2643[ 1]") # 3 - Current/Latest
call_macro("publication",field:"2642[ 1]") # 2 - Intervening
call_macro("publication",field:"264 [ 1]") # # - Not applicable/No information provided/Earliest
else
call_macro("publication",field:"2603[ 1]") # 3 - Current/Latest
call_macro("publication",field:"2602[ 1]") # 2 - Intervening
call_macro("publication",field:"260 [ 1]") # # - Not applicable/No information provided/Earliest
end

# Only add additional publication info to the first publication-object since it is the latest.

if exists("publication[].$first")
copy_field("362??.a","publication[].$first.publicationHistory")
set_array("publication[].$first.frequency[]")
if any_match("leader","^.{6}(a[bis]|m[bis]).*$") # checks if continous ressource
unless any_match("008","^.{18}[#\\| u].*$") # filters out not matching values and also the value unknown
copy_field("008","publication[].$first.frequency[].$append.id")
replace_all("publication[].$first.frequency[].$last.id", "^.{18}(.).*$", "http://marc21rdf.info/terms/continuingfre#$1")
end
end
unless exists("publication[].$last.endDate")
if any_match("$j",".*-[ ]?([01]\\d{3}|20\\d{2})$")
paste("publication[].$last.endDate", "$j")
elsif any_match("006","^s.*$")
do list(path: "006", "var":"$z")
if any_match("$z","^s.*$")
unless any_match("$z","^.[#\\| u].*$")
copy_field("$z","publication[].$first.frequency[].$append.id")
replace_all("publication[].$first.frequency[].$last.id", "^.(.).*$", "http://marc21rdf.info/terms/continuingfre#$1")
end
end
end
end
end
# TODO is there a way to distinguish PublicationEvent and SecondaryPublicationEvent?
set_array("publication[].$last.type[]","PublicationEvent")
set_array("publication[].$last.location[]")
copy_field("$i.a", "publication[].$last.location[].$append")
set_array("publication[].$last.publishedBy[]")
copy_field("$i.b", "publication[].$last.publishedBy[].$append")
set_array("publication[].$last.frequency[]")
if any_match("leader","^.{6}(a[bis]|m[bis]).*$") # checks if continous ressource
unless any_match("008","^.{18}[#\\| u].*$") # filters out not matching values and also the value unknown
copy_field("008","publication[].$last.frequency[].$append.id")
replace_all("publication[].$last.frequency[].$last.id", "^.{18}(.).*$", "http://marc21rdf.info/terms/continuingfre#$1")
set_array("publication[].$first.note[]")
copy_field("515??.a","publication[].$first.note[].$append")
do list(path:"500 ", "var":"$i")
if any_match("$i.a", "^.*saṃ. \\d{4}=(\\d{4}).*Chr.*")
remove_field("publication[].$first.startDate")
copy_field("$i.a","publication[].$first.startDate")
replace_all("publication[].$first.startDate","^.*saṃ. \\d{4}=(\\d{4}).*Chr.*","$1")
end
elsif any_match("006","^s.*$")
do list(path: "006", "var":"$z")
if any_match("$z","^s.*$")
unless any_match("$z","^.[#\\| u].*$")
copy_field("$z","publication[].$last.frequency[].$append.id")
replace_all("publication[].$last.frequency[].$last.id", "^.(.).*$", "http://marc21rdf.info/terms/continuingfre#$1")
end
# Add fallbacks for missing publication dates and other publication info.
do list(path:"260[ 3][ 1]", "var":"$i")
do list(path: "$i.c", "var":"$j")
replace_all("$j", "\\[|\\]|ca. |c ", "")
unless exists("publication[].$first.startDate")
if any_match("$j",".*?([01]\\d{3}|20\\d{2}).*")
paste("publication[].$first.startDate", "$j")
end
end
end
end
set_array("publication[].$last.note[]")
copy_field("515??.a","publication[].$last.note[].$append")
if exists("$i.6")
copy_field("$i.6","$i.linkageTest")
do list(path:"880??","var":"$880")
if in ("$i.linkageTest","$880.linkageTest")
if in ("$880.@script.id","alternateGraphicRepresentation[].*.script.id")
do list(path:"alternateGraphicRepresentation[]","var":"$AGR")
if in ("$880.@script.id","$AGR.script.id")
unless exists("$AGR.record.publication[]")
set_array("$AGR.record.publication[]")
end
add_field ("$AGR.record.publication[].$append.dummi","")
set_array("$AGR.record.publication[].$last.location[]")
copy_field("$880.a", "$AGR.record.publication[].$last.location[].$append")
set_array("$AGR.record.publication[].$last.publishedBy[]")
copy_field("$880.b", "$AGR.record.publication[].$last.publishedBy[].$append")
end
end
else
copy_field("$880.@script.id","alternateGraphicRepresentation[].$append.script.id")
copy_field("$880.@script.label","alternateGraphicRepresentation[].$last.script.label")
set_array("alternateGraphicRepresentation[].$last.publication[]")
add_field ("alternateGraphicRepresentation[].$last.publication[].$append.dummi","")
set_array("alternateGraphicRepresentation[].$last.publication[].$last.location[]")
copy_field("$880.a", "alternateGraphicRepresentation[].$last.publication[].$last.location[].$append")
set_array("alternateGraphicRepresentation[].$last.publication[].$last.publishedBy[]")
copy_field("$880.b", "alternateGraphicRepresentation[].$last.publication[].$last.publishedBy[].$append")
unless exists("publication[].$first.endDate")
if any_match("$j",".*-[ ]?([01]\\d{3}|20\\d{2})$")
paste("publication[].$last.endDate", "$j")
end
end
unless exists("publication[].$first.location[].1")
copy_field("$i.a", "publication[].$first.location[].$append")
end
unless exists("publication[].$first.publishedBy[].1")
copy_field("$i.b", "publication[].$first.publishedBy[].$append")
end
end
end
end


do list(path:"500 ", "var":"$i")
if any_match("$i.a", "^.*saṃ. \\d{4}=(\\d{4}).*Chr.*")
remove_field("publication[].$last.startDate")
copy_field("$i.a","publication[].$last.startDate")
replace_all("publication[].$last.startDate","^.*saṃ. \\d{4}=(\\d{4}).*Chr.*","$1")
unless exists("publication[].$first.startDate[]")
if any_match("008","^.{6}[brestikm](\\d{4}).*$")
copy_field("008","@008startDate")
replace_all("@008startDate","^.{7}(\\d{4}).*$","$1")
copy_field("@008startDate","publication[].$first.startDate")
end
end
unless exists("publication[].$first.endDate[]")
if any_match("008","^.{6}[km]\\d{4}(\\d{4}).*$")
copy_field("008","@008endDate")
replace_all("@008endDate","^.{11}(\\d{4}).*$","$1")
unless any_equal("@008endDate","9999")
copy_field("@008endDate","publication[].$first.endDate")
end
end
end
end

Expand Down
4 changes: 2 additions & 2 deletions src/main/resources/alma/maps/classification.tsv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
udc UDC (Universal Decimal Classification) https://d-nb.info/gnd/4114037-0
sdnb DNB (Systematik der Deutschen Nationalbibliographie) http://bartoc.org/en/node/18497
sdnb Sachgruppen der DNB https://bartoc.org/en/node/20049
methepp Methode Eppelsheimer
bkl BK (Basisklassifikation) http://bartoc.org/en/node/18785
rvk RVK (Regensburger Verbundklassifikation) https://d-nb.info/gnd/4449787-8
Expand All @@ -17,4 +17,4 @@ stub Systematik der TUB München http://bartoc.org/en/node/495
dopaed DOPAED der UB Erlangen
ifzs IFZ-Systematik http://bartoc.org/en/node/1245
sbb Systematik der Bayerischen Bibliographie http://bartoc.org/en/node/1983
zdbs ZDB-Systematik http://bartoc.org/en/node/18915
zdbs DDC-Sachgruppen der ZDB https://zeitschriftendatenbank.de/fileadmin/user_upload/ZDB/pdf/zdbformat/5080.pdf
Loading

0 comments on commit 1c9c561

Please sign in to comment.