Skip to content

Commit

Permalink
Basic setup
Browse files Browse the repository at this point in the history
  • Loading branch information
Oliver Schihin committed Jun 23, 2015
1 parent 6a8519b commit f80e867
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 0 deletions.
55 changes: 55 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Created by .ignore support plugin (hsz.mobi)
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion

*.iml

## Directory-based project format:
.idea/
# if you remove the above rule, at least ignore the following:

# User-specific stuff:
# .idea/workspace.xml
# .idea/tasks.xml
# .idea/dictionaries

# Sensitive or high-churn files:
# .idea/dataSources.ids
# .idea/dataSources.xml
# .idea/sqlDataSources.xml
# .idea/dynamic.xml
# .idea/uiDesigner.xml

# Gradle:
# .idea/gradle.xml
# .idea/libraries

# Mongo Explorer plugin:
# .idea/mongoSettings.xml

## File-based project format:
*.ipr
*.iws

## Plugin-specific files:

# IntelliJ
/out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties

# Repository specifics
*.jar
*.lic
*.xml


23 changes: 23 additions & 0 deletions transform.han2sbmarc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Script to process files with HAN data (HAN-Marc) with an xslt-transformation to get MARC21 records

basedir=$1

inputdir=$1/raw.hanmarc
outputdir=$1/out.swissbib-MARC
xslt=$basedir/xslt/
output=HAN.marc21.nr
cp=$1/libs/saxon9.jar
institutioncode=$2

nr=1

echo "start HAN-Marc -> Marc21 transformation"

for datei in $inputdir/*.xml
do

echo "file: "$datei
java -Xms2024m -Xmx2024m -cp $cp net.sf.saxon.Transform -s:$datei -xsl:$xslt -o:$outputdir/$output$nr.xml institutioncode=$institutioncode
nr=$(($nr+1))

done
61 changes: 61 additions & 0 deletions transform.into.1.line.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/perl


use strict;

sub ltrim($);

my $first_line = '<collection>';
my $start_record = '<record';
my $end_record = '</record>';
my $leerzeile = '^$';
my $last_line = '</collection>';
my $xml_declaration = '<\?xml';



my $line_to_write = "";
my $in_declaration_section = 0;
my $in_record_section = 0;



while (<>) {

if (/$leerzeile/) {
next;
}elsif (/($xml_declaration|$first_line|$last_line)/) {
writeline($_);
next;
} elsif (/$start_record/) {
chomp;
$line_to_write = ltrim($_);
$in_record_section = 1;
#$line_to_write .= $_;
} elsif (/$end_record/) {
chomp;
$line_to_write .= ltrim($_);
$line_to_write .= "\n";
$in_record_section = 0;
writeline($line_to_write);
next;
} else {
chomp;
$line_to_write .= ltrim($_);

}
}


sub writeline {

print $_[0];
}

sub ltrim($)
{
my $string = shift;
$string =~ s/^\s+//;
return $string;
}

21 changes: 21 additions & 0 deletions transform.into.1.line.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#Script purpose: call perl script to flatten the lines

basedir=$1

inputdir=$basedir/out.swissbib-MARC
outputdir=$basedir/out.swissbib-MARC-1line

plfile=$basedir/transform.into.1.line.pl

echo "start perl transformation to flatten records into one single line"

for datei in $inputdir/*.xml
do

filename=`basename ${datei} .xml`

#suffix format.xml for the file name is necessary for the next steps
echo "transformation of "$datei "into "$filename.format.xml
perl $plfile $datei > $outputdir/$filename.format.xml

done
9 changes: 9 additions & 0 deletions workflow.HAN.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/sh

# Script that controls the main workflow

basedir=$PWD

$basedir/transform.han2sbmarc.sh $basedir HAN
#$basedir/remove.marc.namespaces.sh $basedir
$basedir/transform.into.1.line.sh $basedir
2 changes: 2 additions & 0 deletions xslt/info.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Information on xslt-scripts

0 comments on commit f80e867

Please sign in to comment.