Skip to content

Commit 6923e38

Browse files
committed
fix version conflict
2 parents 4be4e73 + 2ae97a2 commit 6923e38

File tree

8 files changed

+48
-48
lines changed

8 files changed

+48
-48
lines changed

cmd/span-crossref-snapshot/main.go

Lines changed: 17 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,18 @@ import (
5656
"strings"
5757
"sync/atomic"
5858

59-
"github.com/segmentio/encoding/json"
60-
6159
"github.com/klauspost/compress/zstd"
6260
gzip "github.com/klauspost/pgzip"
6361
"github.com/miku/clam"
6462
"github.com/miku/span/formats/crossref"
6563
"github.com/miku/span/parallel"
6664
"github.com/miku/span/xio"
65+
"github.com/segmentio/encoding/json"
6766
log "github.com/sirupsen/logrus"
6867
)
6968

70-
// fallback awk script is used, if the filterline executable is not found
69+
// fallback awk script is used, if the filterline executable is not found;
70+
// compiled filterline is about 3x faster.
7171
var fallback = `
7272
#!/bin/bash
7373
LIST="$1" LC_ALL=C awk '
@@ -94,6 +94,7 @@ var (
9494
verbose = flag.Bool("verbose", false, "be verbose")
9595
pathFile = flag.String("f", "", "path to a file naming all inputs files to be considered, one file per line")
9696
errCountThreshold = flag.Int64("E", 1, "number of json unmarshal errors to tolerate")
97+
sortBufferSize = flag.String("S", "25%", "passed to sort")
9798
)
9899

99100
// writeFields writes a variable number of values separated by sep to a given
@@ -141,27 +142,8 @@ func main() {
141142
)
142143
switch {
143144
case *pathFile != "":
145+
// TODO: this feature would allow to skip the file concatenation step.
144146
log.Fatal("not yet implemented")
145-
// b, err := ioutil.ReadFile(*pathFile)
146-
// if err != nil {
147-
// log.Fatal(err)
148-
// }
149-
// s := string(b)
150-
// var readers []io.Reader
151-
// for _, line := range strings.Split(s, "\n") {
152-
// line = strings.TrimSpace(line)
153-
// if len(line) == 0 || strings.HasPrefix(line, "#") {
154-
// continue
155-
// }
156-
// f, err := os.Open(line)
157-
// if err != nil {
158-
// log.Fatal(err)
159-
// }
160-
// defer f.Close()
161-
// readers = append(readers, f)
162-
// }
163-
// log.Printf("path-list: will read from %d files", len(readers))
164-
// r = io.MultiReader(readers...)
165147
case flag.NArg() == 0:
166148
log.Fatal("input file required")
167149
case flag.NArg() == 1:
@@ -221,7 +203,7 @@ func main() {
221203
var (
222204
br = bufio.NewReader(reader)
223205
bw = bufio.NewWriter(tf)
224-
numErrs atomic.Int64
206+
numErrs atomic.Int64 // error count across threads
225207
)
226208
pp := parallel.NewProcessor(br, bw, func(lineno int64, b []byte) ([]byte, error) {
227209
var (
@@ -240,13 +222,21 @@ func main() {
240222
if n := numErrs.Load(); n > *errCountThreshold {
241223
return nil, err
242224
} else {
243-
log.Printf("skipping error (errs: %d <= max: %d): %v", n, *errCountThreshold, err)
225+
log.Printf("skipping error (#err: %d <= max: %d): %v", n, *errCountThreshold, err)
244226
}
245227
return nil, nil
246228
}
247229
date, err := doc.Indexed.Date()
248230
if err != nil {
249-
return nil, err
231+
// Encountered with a single document found,
232+
// {"DOI":"10.15215\/aupress\/9781897425909.026","score":8.143441}
233+
numErrs.Add(1)
234+
if n := numErrs.Load(); n > *errCountThreshold {
235+
return nil, err
236+
} else {
237+
log.Printf("skipping error (#err: %d <= max: %d): %v", n, *errCountThreshold, err)
238+
}
239+
return nil, nil
250240
}
251241
if _, ok := excludes[doc.DOI]; ok {
252242
return nil, nil
@@ -273,7 +263,7 @@ func main() {
273263
// Stage 2: Identify relevant records. Sort by DOI (3), then date reversed (2);
274264
// then unique by DOI (3). Should keep the entry of the last update (filename,
275265
// document date, DOI).
276-
fastsort := `LC_ALL=C sort -S20%`
266+
fastsort := fmt.Sprintf(`LC_ALL=C sort -S%s`, *sortBufferSize)
277267
cmd := `{{ f }} -k3,3 -rk2,2 {{ input }} | {{ f }} -k3,3 -u | cut -f1 | {{ f }} -n > {{ output }}`
278268
log.WithFields(log.Fields{
279269
"prefix": "stage 2",

cmd/span-import/main.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ var (
5252
cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file")
5353
memProfile = flag.String("memprofile", "", "write heap profile to file (go tool pprof -png --alloc_objects program mem.pprof > mem.png)")
5454
logfile = flag.String("logfile", "", "path to logfile to append to, otherwise stderr")
55+
verbose = flag.Bool("verbose", false, "be verbose")
5556
)
5657

5758
// Factory creates things.
@@ -111,6 +112,9 @@ func processXML(r io.Reader, w io.Writer, name string) error {
111112
output, err := converter.ToIntermediateSchema()
112113
if err != nil {
113114
if _, ok := err.(span.Skip); ok {
115+
if *verbose {
116+
log.Printf("%v", err)
117+
}
114118
continue
115119
}
116120
return err
@@ -138,6 +142,9 @@ func processJSON(r io.Reader, w io.Writer, name string) error {
138142
}
139143
output, err := converter.ToIntermediateSchema()
140144
if _, ok := err.(span.Skip); ok {
145+
if *verbose {
146+
log.Printf("%v", err)
147+
}
141148
return nil, nil
142149
}
143150
if err != nil {
@@ -192,12 +199,10 @@ func processText(r io.Reader, w io.Writer, name string) error {
192199

193200
func main() {
194201
flag.Parse()
195-
196202
if *showVersion {
197203
fmt.Println(span.AppVersion)
198204
os.Exit(0)
199205
}
200-
201206
if *cpuProfile != "" {
202207
f, err := os.Create(*cpuProfile)
203208
if err != nil {
@@ -208,7 +213,6 @@ func main() {
208213
}
209214
defer pprof.StopCPUProfile()
210215
}
211-
212216
if *list {
213217
var keys []string
214218
for k := range FormatMap {
@@ -220,7 +224,6 @@ func main() {
220224
}
221225
os.Exit(0)
222226
}
223-
224227
if *logfile != "" {
225228
f, err := os.OpenFile(*logfile, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
226229
if err != nil {
@@ -232,12 +235,9 @@ func main() {
232235
logger.Out = f
233236
log.SetOutput(logger.Writer())
234237
}
235-
236238
w := bufio.NewWriter(os.Stdout)
237239
defer w.Flush()
238-
239240
var reader io.Reader = os.Stdin
240-
241241
if flag.NArg() > 0 {
242242
var files []io.Reader
243243
for _, filename := range flag.Args() {
@@ -250,7 +250,6 @@ func main() {
250250
}
251251
reader = io.MultiReader(files...)
252252
}
253-
254253
switch *name {
255254
// XXX: Configure this in one place.
256255
case

common.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import (
2929

3030
const (
3131
// AppVersion of span package. Commandline tools will show this on -v.
32-
AppVersion = "0.1.364"
32+
AppVersion = "0.1.366"
3333
// KeyLengthLimit was a limit imposed by the memcached protocol, which
3434
// was used for blob storage until Q1 2017. We switched the key-value
3535
// store, so this limit is somewhat obsolete.

docs/span.1

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ schema and integration tools
2121
.PP
2222
\fB\fCspan\-update\-labels\fR [\fB\fC\-f\fR \fIfile\fP, \fB\fC\-s\fR \fIseparator\fP] < \fIfile\fP
2323
.PP
24-
\fB\fCspan\-crossref\-snapshot\fR [\fB\fC\-x\fR \fIfile\fP] \-o \fIfile\fP \fIfile\fP
24+
\fB\fCspan\-crossref\-snapshot\fR [\fB\fC\-x\fR \fIfile\fP] [\fB\fC\-S\fR \fISIZE\fP] \-o \fIfile\fP \fIfile\fP
2525
.PP
2626
\fB\fCspan\-local\-data\fR < \fIfile\fP
2727
.PP
@@ -37,7 +37,7 @@ schema and integration tools
3737
.PP
3838
\fB\fCspan\-crossref\-members\fR [\fB\fC\-base\fR \fIURL\fP] [\fB\fC\-offset\fR \fIN\fP] [\fB\fC\-rows\fR \fIN\fP] [\fB\fC\-q\fR] [\fB\fC\-sleep\fR \fIduration\fP]
3939
.PP
40-
\fB\fCspan\-crossref\-sync\fR [\fB\fC\-P\fR \fIprefix\fP] [\fB\fC\-i\fR \fIinterval] [\fB\fC\-p\fR \fIcompress\-program\fP] [\fB\fC\-s\fR \fIdate\fP] [\fB\fC\-e\fR *date\fP]
40+
\fB\fCspan\-crossref\-sync\fR [\fB\fC\-P\fR \fIprefix\fP] [\fB\fC\-i\fR *interval] [\fB\fC\-p\fR \fIcompress\-program\fP] [\fB\fC\-s\fR \fIdate\fP] [\fB\fC\-e\fR \fIdate\fP] [\fB\fC\-E\fR \fInumerrors\fP]
4141
.SH DESCRIPTION
4242
.PP
4343
The \fB\fCspan\fR tools convert to and from an intermediate schema and support
@@ -63,7 +63,7 @@ EXAMPLE for a CONFIGURATION FILE. \fB\fCspan\-review\fR details in INDEX REVIEW.
6363
List supported formats. \fB\fCspan\-import\fR, \fB\fCspan\-export\fR only.
6464
.TP
6565
\fB\fC\-verbose\fR
66-
More output. \fB\fCspan\-check\fR only.
66+
More output. \fB\fCspan\-check\fR, \fB\fCspan\-import\fR only.
6767
.TP
6868
\fB\fC\-b\fR \fIN\fP
6969
Batch size. \fB\fCspan\-tag\fR, \fB\fCspan\-check\fR, \fB\fCspan\-import\fR, \fB\fCspan\-export\fR, \fB\fCspan\-crossref\-snapshot\fR only.
@@ -154,6 +154,12 @@ Suppress logging output, \fB\fCspan\-crossref\-members\fR only.
154154
.TP
155155
\fB\fC\-h\fR
156156
Show usage.
157+
.TP
158+
\fB\fC\-E\fR \fInumerrors\fP
159+
Number of errors to tolerate during processing. \fB\fCspan\-crossref\-snapshot\fR only.
160+
.TP
161+
\fB\fC\-S\fR \fIsize\fP
162+
Buffer size, passed to sort. \fB\fCspan\-crossref\-snapshot\fR only.
157163
.SH EXAMPLES
158164
.PP
159165
List supported formats for conversion to intermediate schema:

docs/span.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ SYNOPSIS
2626

2727
`span-update-labels` [`-f` *file*, `-s` *separator*] < *file*
2828

29-
`span-crossref-snapshot` [`-x` *file*] -o *file* *file*
29+
`span-crossref-snapshot` [`-x` *file*] [`-S` *SIZE*] -o *file* *file*
3030

3131
`span-local-data` < *file*
3232

@@ -42,7 +42,7 @@ SYNOPSIS
4242

4343
`span-crossref-members` [`-base` *URL*] [`-offset` *N*] [`-rows` *N*] [`-q`] [`-sleep` *duration*]
4444

45-
`span-crossref-sync` [`-P` *prefix*] [`-i` *interval] [`-p` *compress-program*] [`-s` *date*] [`-e` *date*]
45+
`span-crossref-sync` [`-P` *prefix*] [`-i` *interval] [`-p` *compress-program*] [`-s` *date*] [`-e` *date*] [`-E` *numerrors*]
4646

4747

4848
DESCRIPTION
@@ -73,7 +73,7 @@ This section is correct, but incomplete. Consult `-h` for further flags.
7373
List supported formats. `span-import`, `span-export` only.
7474

7575
`-verbose`
76-
More output. `span-check` only.
76+
More output. `span-check`, `span-import` only.
7777

7878
`-b` *N*
7979
Batch size. `span-tag`, `span-check`, `span-import`, `span-export`, `span-crossref-snapshot` only.
@@ -165,6 +165,12 @@ This section is correct, but incomplete. Consult `-h` for further flags.
165165
`-h`
166166
Show usage.
167167

168+
`-E` *numerrors*
169+
Number of errors to tolerate during processing. `span-crossref-snapshot` only.
170+
171+
`-S` *size*
172+
Buffer size, passed to sort. `span-crossref-snapshot` only.
173+
168174
EXAMPLES
169175
--------
170176

freeze.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"bytes"
66
"fmt"
77
"io"
8-
"io/ioutil"
98
"os"
109
"path/filepath"
1110

@@ -25,7 +24,7 @@ func UnfreezeFilterConfig(frozenfile string) (dir, blob string, err error) {
2524
mappings = make(map[string]string)
2625
b []byte
2726
)
28-
if dir, err = ioutil.TempDir("", "span-tag-unfreeze-"); err != nil {
27+
if dir, err = os.MkdirTemp("", "span-tag-unfreeze-"); err != nil {
2928
return
3029
}
3130
if r, err = zip.OpenReader(frozenfile); err != nil {
@@ -66,7 +65,7 @@ func UnfreezeFilterConfig(frozenfile string) (dir, blob string, err error) {
6665
}
6766
}
6867
blob = filepath.Join(dir, "blob")
69-
if b, err = ioutil.ReadFile(blob); err != nil {
68+
if b, err = os.ReadFile(blob); err != nil {
7069
return
7170
}
7271
for url, file := range mappings {
@@ -75,7 +74,7 @@ func UnfreezeFilterConfig(frozenfile string) (dir, blob string, err error) {
7574
replacement := []byte(fmt.Sprintf(`"file://%s"`, filepath.Join(dir, file)))
7675
b = bytes.Replace(b, value, replacement, -1)
7776
}
78-
if err = ioutil.WriteFile(blob, b, 0777); err != nil {
77+
if err = os.WriteFile(blob, b, 0777); err != nil {
7978
return
8079
}
8180
return dir, blob, nil

packaging/deb/span/DEBIAN/control

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Package: span
2-
Version: 0.1.364
2+
Version: 0.1.366
33
Section: utils
44
Priority: optional
55
Architecture: amd64

packaging/rpm/span.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Summary: Library data tools.
22
Name: span
3-
Version: 0.1.364
3+
Version: 0.1.366
44
Release: 0
55
License: GPL
66
ExclusiveArch: x86_64

0 commit comments

Comments
 (0)