-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Oliver Schihin
committed
Jun 23, 2015
1 parent
6a8519b
commit f80e867
Showing
6 changed files
with
171 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Created by .ignore support plugin (hsz.mobi) | ||
### JetBrains template | ||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion | ||
|
||
*.iml | ||
|
||
## Directory-based project format: | ||
.idea/ | ||
# if you remove the above rule, at least ignore the following: | ||
|
||
# User-specific stuff: | ||
# .idea/workspace.xml | ||
# .idea/tasks.xml | ||
# .idea/dictionaries | ||
|
||
# Sensitive or high-churn files: | ||
# .idea/dataSources.ids | ||
# .idea/dataSources.xml | ||
# .idea/sqlDataSources.xml | ||
# .idea/dynamic.xml | ||
# .idea/uiDesigner.xml | ||
|
||
# Gradle: | ||
# .idea/gradle.xml | ||
# .idea/libraries | ||
|
||
# Mongo Explorer plugin: | ||
# .idea/mongoSettings.xml | ||
|
||
## File-based project format: | ||
*.ipr | ||
*.iws | ||
|
||
## Plugin-specific files: | ||
|
||
# IntelliJ | ||
/out/ | ||
|
||
# mpeltonen/sbt-idea plugin | ||
.idea_modules/ | ||
|
||
# JIRA plugin | ||
atlassian-ide-plugin.xml | ||
|
||
# Crashlytics plugin (for Android Studio and IntelliJ) | ||
com_crashlytics_export_strings.xml | ||
crashlytics.properties | ||
crashlytics-build.properties | ||
|
||
# Repository specifics | ||
*.jar | ||
*.lic | ||
*.xml | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Script to process files with HAN data (HAN-Marc) with an xslt-transformation to get MARC21 records | ||
|
||
basedir=$1 | ||
|
||
inputdir=$1/raw.hanmarc | ||
outputdir=$1/out.swissbib-MARC | ||
xslt=$basedir/xslt/ | ||
output=HAN.marc21.nr | ||
cp=$1/libs/saxon9.jar | ||
institutioncode=$2 | ||
|
||
nr=1 | ||
|
||
echo "start HAN-Marc -> Marc21 transformation" | ||
|
||
for datei in $inputdir/*.xml | ||
do | ||
|
||
echo "file: "$datei | ||
java -Xms2024m -Xmx2024m -cp $cp net.sf.saxon.Transform -s:$datei -xsl:$xslt -o:$outputdir/$output$nr.xml institutioncode=$institutioncode | ||
nr=$(($nr+1)) | ||
|
||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#!/usr/bin/perl | ||
|
||
|
||
use strict; | ||
|
||
sub ltrim($); | ||
|
||
my $first_line = '<collection>'; | ||
my $start_record = '<record'; | ||
my $end_record = '</record>'; | ||
my $leerzeile = '^$'; | ||
my $last_line = '</collection>'; | ||
my $xml_declaration = '<\?xml'; | ||
|
||
|
||
|
||
my $line_to_write = ""; | ||
my $in_declaration_section = 0; | ||
my $in_record_section = 0; | ||
|
||
|
||
|
||
while (<>) { | ||
|
||
if (/$leerzeile/) { | ||
next; | ||
}elsif (/($xml_declaration|$first_line|$last_line)/) { | ||
writeline($_); | ||
next; | ||
} elsif (/$start_record/) { | ||
chomp; | ||
$line_to_write = ltrim($_); | ||
$in_record_section = 1; | ||
#$line_to_write .= $_; | ||
} elsif (/$end_record/) { | ||
chomp; | ||
$line_to_write .= ltrim($_); | ||
$line_to_write .= "\n"; | ||
$in_record_section = 0; | ||
writeline($line_to_write); | ||
next; | ||
} else { | ||
chomp; | ||
$line_to_write .= ltrim($_); | ||
|
||
} | ||
} | ||
|
||
|
||
sub writeline { | ||
|
||
print $_[0]; | ||
} | ||
|
||
sub ltrim($) | ||
{ | ||
my $string = shift; | ||
$string =~ s/^\s+//; | ||
return $string; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#Script purpose: call perl script to flatten the lines | ||
|
||
basedir=$1 | ||
|
||
inputdir=$basedir/out.swissbib-MARC | ||
outputdir=$basedir/out.swissbib-MARC-1line | ||
|
||
plfile=$basedir/transform.into.1.line.pl | ||
|
||
echo "start perl transformation to flatten records into one single line" | ||
|
||
for datei in $inputdir/*.xml | ||
do | ||
|
||
filename=`basename ${datei} .xml` | ||
|
||
#suffix format.xml for the file name is necessary for the next steps | ||
echo "transformation of "$datei "into "$filename.format.xml | ||
perl $plfile $datei > $outputdir/$filename.format.xml | ||
|
||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/bin/sh | ||
|
||
# Script that controls the main workflow | ||
|
||
basedir=$PWD | ||
|
||
$basedir/transform.han2sbmarc.sh $basedir HAN | ||
#$basedir/remove.marc.namespaces.sh $basedir | ||
$basedir/transform.into.1.line.sh $basedir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Information on xslt-scripts | ||
|