forked from edf/fcrepo-reporting-utilities
-
Notifications
You must be signed in to change notification settings - Fork 0
/
reportSpaceUsedByCollection.pl
135 lines (121 loc) · 5.23 KB
/
reportSpaceUsedByCollection.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/perl
# collection space used reporting -edf 2013-1222
use strict;
use warnings;
no warnings qw(uninitialized);
use URI::Escape;
use XML::LibXSLT;
use XML::LibXML;
use Config::Tiny;
if ( $#ARGV != 0 ) {
print "\n Usage is $0 <collection pid> \n\n";
exit(8);
}
#TODO add comments
#TODO check if item in collection is a collection
#TODO report nested collection space used
my $collectionPid = $ARGV[0];
chomp $collectionPid;
my $config = Config::Tiny->new;
$config = Config::Tiny->read('settings.config');
my $ServerName = $config->{settings}->{ServerName};
my $ServerPort = $config->{settings}->{ServerPort};
my $fedoraContext = $config->{settings}->{fedoraContext};
my $UserName = $config->{settings}->{UserName};
my $PassWord = $config->{settings}->{PassWord};
my $fedoraURI = $ServerName . ":" . $ServerPort . "/" . $fedoraContext;
## calculate space used by collection PID
my $collectionFoxml = qx(curl -s -u ${UserName}:$PassWord -X GET "$fedoraURI/objects/$collectionPid/objectXML"); #print $collectionFoxml;
my $sizeCalc = q(
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:foxml="info:fedora/fedora-system:def/foxml#"
exclude-result-prefixes="xs foxml"
version="1.0">
<xsl:output method="xml" omit-xml-declaration="yes"/>
<xsl:template match="/">
<!-- pid,sum,count -->
<xsl:value-of select="/foxml:digitalObject/@PID" /><xsl:call-template name="summary"></xsl:call-template></xsl:template><xsl:template name="summary">,<xsl:value-of select="sum(//foxml:datastreamVersion/@SIZE)" />,<xsl:value-of select="count(//foxml:datastreamVersion/@SIZE)" />
</xsl:template>
</xsl:stylesheet>
);
my $xml_parserCollection = XML::LibXML->new;
my $xslt_parserCollection = XML::LibXSLT->new;
my $xmlCollection = $xml_parserCollection->parse_string($collectionFoxml);
my $xslCollection = $xml_parserCollection->parse_string($sizeCalc);
my $stylesheetCollection = $xslt_parserCollection->parse_stylesheet($xslCollection);
my $resultsCollection = $stylesheetCollection->transform($xmlCollection);
my $outputCollection = $stylesheetCollection->output_string($resultsCollection);
chomp $outputCollection;
#print "$outputCollection\n"; # uncomment for verbose report
my @runningTotal;
push( @runningTotal, $outputCollection );
my ( $nameSpace, $pidNumber ) = split( /:/, $collectionPid );
## get members of collection from ITQL query
my $pidNumberCollectionSearchString = 'select $object from <#ri> where ($object <fedora-rels-ext:isMemberOf> <info:fedora/'
. $nameSpace . ':' . $pidNumber
. '> or $object <fedora-rels-ext:isMemberOfCollection> <info:fedora/'
. $nameSpace . ':' . $pidNumber
. '> ) minus $object <fedora-model:hasModel> <info:fedora/'
. $nameSpace . ':' . $nameSpace
. 'BasicCollection> order by $object ';
my $pidNumberCollectionSearchStringEncode = uri_escape($pidNumberCollectionSearchString);
my @pidNumberCollectionSearchStringEncodeCurlCommand =
`curl -s '$fedoraURI/risearch?type=tuples&lang=itql&format=CSV&dt=on&query=$pidNumberCollectionSearchStringEncode'`;
my @pidsInCollection;
foreach my $line (@pidNumberCollectionSearchStringEncodeCurlCommand) {
next if $line =~ m#^"object"#;
chomp $line;
$line =~ s#info:fedora/##g;
$line =~ s#$nameSpace:##g;
push( @pidsInCollection, $line );
}
my @sortedPidsInCollection = sort { $a <=> $b; } @pidsInCollection;
foreach my $line (@sortedPidsInCollection) {
chomp $line;
my $pid = $nameSpace . ":" . $line; # print "$pid\n";
my $foxml = qx(curl -s -u ${UserName}:$PassWord -X GET "$fedoraURI/objects/$pid/objectXML");
my $xml_parser = XML::LibXML->new;
my $xslt_parser = XML::LibXSLT->new;
my $xml = $xml_parser->parse_string($foxml);
my $xsl = $xml_parser->parse_string($sizeCalc);
my $stylesheet = $xslt_parser->parse_stylesheet($xsl);
my $results = $stylesheet->transform($xml);
my $output = $stylesheet->output_string($results);
chomp $output;
# print "$output\n"; # uncomment for verbose report
push( @runningTotal, $output );
}
my ( $pidCounter, $sum, $countPid );
foreach my $line (@runningTotal) {
my ( $pid, $size, $count ) = split( /,/, $line );
$pidCounter++;
$sum = $sum + $size;
$countPid = $countPid + $count;
}
print "\nCollection $collectionPid Totals\n\n Fedora Objects: $pidCounter";
print "\n Space Used: ";
if ( $sum > 1024 * 1024 * 1024 * 1024 ) {
my $humanSize = $sum / 1024 / 1024 / 1024 / 1024;
my $rounded = sprintf "%.3f", $humanSize; # rounded to 2 decimal places
print "$rounded TB\n";
}
elsif ( $sum > 1024 * 1024 * 1024 ) {
my $humanSize = $sum / 1024 / 1024 / 1024;
my $rounded = sprintf "%.3f", $humanSize; # rounded to 2 decimal places
print "$rounded GB\n";
}
elsif ( $sum > 1024 * 1024 ) {
my $humanSize = $sum / 1024 / 1024;
my $rounded = sprintf "%.3f", $humanSize; # rounded to 2 decimal places
print "$rounded MB\n";
}
elsif ( $sum > 1024 ) {
my $humanSize = $sum / 1024;
my $rounded = sprintf "%.3f", $humanSize; # rounded to 2 decimal places
print "$rounded KB\n";
}
else {
print "$sum bytes\n";
}
print " Number of Datastreams: $countPid\n";