forked from lmika/oaipmh
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcmdcompare.go
267 lines (221 loc) · 8.11 KB
/
cmdcompare.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
package main
import (
"log"
"flag"
"os"
"fmt"
)
// --------------------------------------------------------------------------------
// CompareWith command
// Compares the set of metadata records in one catalog with the records of
// another.
type CompareCommand struct {
Ctx *Context
OtherProvider *Provider
OtherSession *OaipmhSession
setName *string
beforeDate *string
afterDate *string
fromFile *string
firstResult *int
maxResults *int
compareContent *bool
urnsInBoth int
missingUrns int
redundentUrns int
urnsDiffering int
errors int
}
// Startup flags
func (sc *CompareCommand) Flags(fs *flag.FlagSet) *flag.FlagSet {
sc.setName = fs.String("s", "", "Select records from this set")
sc.beforeDate = fs.String("B", "", "Select records that were updated before date (YYYY-MM-DD)")
sc.afterDate = fs.String("A", "", "Select records that were updated after date (YYYY-MM-DD)")
sc.firstResult = fs.Int("f", 0, "Index of first record to retrieve")
sc.fromFile = fs.String("F", "", "Read identifiers from a file")
sc.maxResults = fs.Int("c", 100000, "Maximum number of records to retrieve")
sc.compareContent = fs.Bool("C", false, "Compares the metadata content of common metadata records")
return fs
}
// Get list identifier arguments
func (sc *CompareCommand) genListIdentifierArgsFromCommandLine() ListIdentifierArgs {
var set string
set = *(sc.setName)
if set == "" {
set = sc.Ctx.Provider.Set
} else if set == "*" {
set = ""
}
args := ListIdentifierArgs{
Set: set,
From: parseDateString(*(sc.afterDate)),
Until: parseDateString(*(sc.beforeDate)),
}
return args
}
// Returns a suitable lister for the expected comparator
func (sc *CompareCommand) expectedLister() PresenceLister {
if *(sc.fromFile) != "" {
return func(callback func(urn string, isLive bool) bool) error {
return LinesFromFile(*(sc.fromFile), *(sc.firstResult), *(sc.maxResults), func(urn string) bool {
return callback(urn, true)
})
}
} else {
listArgs := sc.genListIdentifierArgsFromCommandLine()
return func(callback func(urn string, isLive bool) bool) error {
return sc.Ctx.Session.ListIdentifiers(listArgs, *(sc.firstResult), *(sc.maxResults), func(hr *HeaderResult) bool {
return callback(hr.Identifier(), !hr.Deleted)
})
}
}
}
// Returns a suitable lister for the comparison endpoint
func (sc *CompareCommand) comparisonLister() PresenceLister {
// TODO: When getting it from file, simply get headers from the other session
if *(sc.fromFile) != "" {
Die("Support for file not done yet")
return nil
} else {
listArgs := sc.genListIdentifierArgsFromCommandLine()
return func(callback func(urn string, isLive bool) bool) error {
return sc.OtherSession.ListIdentifiers(listArgs, *(sc.firstResult), *(sc.maxResults), func(hr *HeaderResult) bool {
return callback(hr.Identifier(), !hr.Deleted)
})
}
}
}
// Runs the presence comparator
func (sc *CompareCommand) runPresenceComparator() {
pc := InMemoryPresenceComparator(make(map[string]byte))
// Run the expected lister
expectedLister := sc.expectedLister()
expectedLister(func(urn string, isLive bool) bool {
if (isLive) {
pc.AddExpectedUrn(urn)
}
return true
})
// Runs the comparison lister
comparisonLister := sc.comparisonLister()
comparisonLister(func(urn string, isLive bool) bool {
if (isLive) {
pc.AddComparisonUrn(urn)
}
return true
})
// Return the report
pc.Report(sc)
}
// Called by the presence comparison lister with URNs that are present in both providers.
func (sc *CompareCommand) UrnPresentInBothProviders(urn string) {
sc.urnsInBoth++
// Compare both records if in comparison mode
if *sc.compareContent {
thisRec, err := sc.Ctx.Session.GetRecord(urn)
if err != nil {
fmt.Println("E ", urn)
sc.errors++
}
otherRec, err := sc.OtherSession.GetRecord(urn)
if err != nil {
fmt.Println("E ", urn)
sc.errors++
}
if thisRec.Content.Xml != otherRec.Content.Xml {
fmt.Println("D ", urn)
sc.urnsDiffering++
}
}
}
// Called by the presence comparison lister with URNs that is in the expected provider but missing
// from the comparison provider.
func (sc *CompareCommand) MissingUrnFound(urn string) {
fmt.Println("- ", urn)
sc.missingUrns++
}
// Called by the presence comparison lister with URNs that are in the comparison provider but missing
// from the expected provider.
func (sc *CompareCommand) RedundentUrnFound(urn string) {
fmt.Println("+ ", urn)
sc.redundentUrns++
}
// Runs the comparator
func (sc *CompareCommand) Run(args []string) {
if (len(args) != 1) {
fmt.Fprintf(os.Stderr, "Usage: compare <provider>\n")
os.Exit(1)
}
// Connect to the other OAIPMH session
sc.OtherProvider = sc.Ctx.Config.LookupProvider(args[0])
if (sc.OtherProvider != nil) {
sc.OtherSession = NewOaipmhSession(sc.OtherProvider.Url, *prefix)
} else {
Die("Could not log into provider %s", args[0])
}
// Runs the presence comparator
sc.runPresenceComparator()
if *sc.compareContent {
log.Printf("Comparison complete: %d OK, %d different, %d missing, %d redundent, %d errors",
sc.urnsInBoth, sc.urnsDiffering, sc.missingUrns, sc.redundentUrns, sc.errors)
} else {
log.Printf("Comparison complete: %d OK, %d missing, %d redundent", sc.urnsInBoth, sc.missingUrns, sc.redundentUrns)
}
}
// -------------------------------------------------------------------------------------
// Maintains the comparison state
type PresenceLister func(callback func(urn string, islive bool) bool) error
type PresenceComparator interface {
// Adds a URN from the "expected" provider.
AddExpectedUrn(urn string)
// Adds a URN from the "comparison" provider.
AddComparisonUrn(urn string)
// Report the results
Report(listener PresenceComparisonStateListener)
}
// Listener which is used by the comparison state to report differences.
type PresenceComparisonStateListener interface {
// Called with URNs that are present in both providers.
UrnPresentInBothProviders(urn string)
// Called with URNs that is in the expected provider but missing
// from the comparison provider.
MissingUrnFound(urn string)
// Called with URNs that are in the comparison provider but missing
// from the expected provider.
RedundentUrnFound(urn string)
}
// Bitmasks for the state of a single URN
const (
URN_IN_EXPECTED byte = 0x01
URN_IN_ACTUAL byte = 0x02
URN_IN_BOTH byte = 0x03
)
// A presence comparator that uses an in-memory map
type InMemoryPresenceComparator map[string]byte
func (mpc InMemoryPresenceComparator) setBitForUrn(urn string, theBit byte) {
if currVal, hasUrn := mpc[urn] ; hasUrn {
mpc[urn] = currVal | theBit
} else {
mpc[urn] = theBit
}
}
func (mpc InMemoryPresenceComparator) AddExpectedUrn(urn string) {
mpc.setBitForUrn(urn, URN_IN_EXPECTED)
}
func (mpc InMemoryPresenceComparator) AddComparisonUrn(urn string) {
mpc.setBitForUrn(urn, URN_IN_ACTUAL)
}
func (mpc InMemoryPresenceComparator) Report(listener PresenceComparisonStateListener) {
for urn, bitmask := range mpc {
switch bitmask {
case URN_IN_BOTH:
listener.UrnPresentInBothProviders(urn)
case URN_IN_EXPECTED:
listener.MissingUrnFound(urn)
case URN_IN_ACTUAL:
listener.RedundentUrnFound(urn)
default:
panic(fmt.Errorf("Invalid value of bitmask: %s, %x", urn, bitmask))
}
}
}