Skip to content

Commit

Permalink
Add option position_as_occurrence (see #66)
Browse files Browse the repository at this point in the history
  • Loading branch information
nichtich committed Jun 16, 2021
1 parent 4d3cb2a commit d43471d
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 42 deletions.
1 change: 1 addition & 0 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ Revision history for PICA::Data

{{$NEXT}}
- Implement occurrence ranges (#96)
- Add option position_as_occurrence (see #66)

1.24 2021-06-07T08:51:52Z
- Add method to split record
Expand Down
32 changes: 17 additions & 15 deletions lib/App/picadata.pm
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,14 @@ sub new {

delete $opt->{$_} for qw(count build help version);

my $pattern = '[012.][0-9.][0-9.][A-Z@.](\$[^|]+)?';
my $pattern = '[012.][0-9.][0-9.][A-Z@.](\$[^|]+|/[0-9.-]+)?';
while (@argv && $argv[0] =~ /^$pattern(\s*\|\s*($pattern)?)*$/) {
push @path, shift @argv;
}

if (@path) {
@path = map {
my $p = eval {PICA::Path->new($_)};
my $p = parse_path($_);
$p || die "invalid PICA Path: $_\n";
} grep {$_ ne ""} map {split /\s*\|\s*/, $_} @path;

Expand Down Expand Up @@ -311,28 +311,30 @@ sub run {
exit !!$invalid;
}

sub parse_path {
my $path = eval {PICA::Path->new($_[0], position_as_occurrence => 1)};
if ($path) {
}
return $path;
}

sub explain {
my ($schema, $path) = @_;
my $schema = $_[0];
my $path = parse_path($_[1]);

if (my $expr = eval {PICA::Path->new($path)}) {
$path = $expr;
if (!$path) {
warn "invalid PICA Path: $_[1]\n";
return;
}
else {
warn "invalid PICA Path: $path\n";
return
}

if ($path->stringify =~ /[.]/) {
elsif ($path->stringify =~ /[.]/) {
warn "Fields with wildcards cannot be explained yet!\n";
return
}

my $tag = $path->fields;

# Take positions as occurrences to allow PICA Plain syntax
my $occ = $path->occurrences // $path->positions;
my ($someocc) = grep {$_ > 0} split '-', $occ;
my $id = field_identifier($schema, [$tag, $someocc]);
my ($firstocc) = grep {$_ > 0} split '-', $path->occurrences;
my $id = field_identifier($schema, [$tag, $firstocc]);

my $def = $schema->{fields}{$id};
if (defined $path->subfields && $def) {
Expand Down
63 changes: 37 additions & 26 deletions lib/PICA/Path.pm
Original file line number Diff line number Diff line change
Expand Up @@ -10,52 +10,60 @@ use Scalar::Util qw(reftype);
use overload '""' => \&stringify;

sub new {
my ($class, $path) = @_;
my ($class, $path, %options) = @_;

confess "invalid pica path" if $path !~ /
([012.][0-9.][0-9.][A-Z@.]) # tag
(\[([0-9.]{2,3}|[0-9]+-[0-9]+)\])? # occurrence
(\$?([_A-Za-z0-9]+))? # subfields
(\/(\d+)?(-(\d+)?)?)? # position
([012.][0-9.][0-9.][A-Z@.]) # tag
(\[([0-9.]{2,3}|\d+-\d+)\])? # occurrence
(\$?([_A-Za-z0-9]+))? # subfields
(\/(\d+)?(-(\d+)?)?)? # position
/x;

my $field = $1;
my $occurrence = $3 !~ /^0+$/ ? $3 : undef;
my $occurrence = $3;
my $subfield = defined $5 ? "[$5]" : "[_A-Za-z0-9]";

my @position;
if (defined $6) { # from, to
my ($from, $dash, $to, $length) = ($7, $8, $9, 0);

if ($dash) {
confess "invalid pica path" unless defined($from // $to); # /-
if (defined $6) { # position
if (!defined $occurrence && $options{position_as_occurrence}) {
$occurrence = $7 . $8;
}
else {
my ($from, $dash, $to, $length) = ($7, $8, $9, 0);

if (defined $to) {
if (!$from and $dash) { # /-X
$from = 0;
if ($dash) {
confess "invalid pica path" unless defined($from // $to); # /-
}
$length = $to - $from + 1;
}
else {
if ($8) {
$length = undef;

if (defined $to) {
if (!$from and $dash) { # /-X
$from = 0;
}
$length = $to - $from + 1;
}
else {
$length = 1;
if ($8) {
$length = undef;
}
else {
$length = 1;
}
}
}

if (!defined $length or $length >= 1) {
unless (!$from and !defined $length) { # /0-
@position = ($from, $length);
if (!defined $length or $length >= 1) {
unless (!$from and !defined $length) { # /0-
@position = ($from, $length);
}
}
}
}

$field = qr{$field};

if (defined $occurrence) {
if ($occurrence =~ /^0+$/) {
$occurrence = undef;
}
elsif (defined $occurrence) {
if ($occurrence =~ /-/) {
my ($from, $to) = map {1 * $_} split '-', $occurrence;
if ($from eq $to) {
Expand Down Expand Up @@ -572,7 +580,7 @@ Option C<nested_arrays> creates a list for every field found:
=head1 METHODS
=head2 new( $expression )
=head2 new( $expression [, position_as_occurrence => 1 ] )
Create a PICA path by parsing the path expression. The expression consists of
Expand Down Expand Up @@ -600,6 +608,9 @@ supported.
=back
If option C<position_as_occurrence> is set, positions will be read as
occurrences, e.g. C</2-4> is read as C<[2-4]>.
=head2 match_record( $record, %options )
Returns matched fields as string or array reference.
Expand Down
3 changes: 2 additions & 1 deletion script/picadata
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ C<--schema> this will set annotations C<!> and C<?> to mark validation errors.
=head2 --path, -p

Select fields or subfield values specified by PICA Path expressions. Multiple
expressions can be separated by C<|> or by repeating the option.
expressions can be separated by C<|> or by repeating the option. Positions such
as C</3-7> are read as occurrence ranges.

=head2 --schema, -s

Expand Down
7 changes: 7 additions & 0 deletions t/15-path.t
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,11 @@ is $path->occurrences, undef;
is $path->subfields, '0';
is $path->positions, undef;

is(PICA::Path->new('123X/0-3', position_as_occurrence => 1)->stringify,
'123X[0-3]', 'position_as_occurrence');
is(PICA::Path->new('123X/9-10', position_as_occurrence => 1)->stringify,
'123X[9-10]', 'position_as_occurrence');
is(PICA::Path->new('123X/42', position_as_occurrence => 1)->stringify,
'123X[42]', 'position_as_occurrence');

done_testing;

0 comments on commit d43471d

Please sign in to comment.