-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrapy_gzip_exporters.py
46 lines (33 loc) · 1.2 KB
/
scrapy_gzip_exporters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gzip
from scrapy.exporters import (
CsvItemExporter,
JsonItemExporter,
JsonLinesItemExporter,
XmlItemExporter,
)
FEED_EXPORTERS = {
# "jsonl" is widely recognized JSON lines file extension
'jsonl': 'scrapy.exporters.JsonLinesItemExporter',
'jsonl.gz': __name__ + '.JsonLinesGzipItemExporter',
'json.gz': __name__ + '.JsonGzipItemExporter',
'jsonlines.gz': __name__ + '.JsonLinesGzipItemExporter',
'jl.gz': __name__ + '.JsonLinesGzipItemExporter',
'csv.gz': __name__ + '.CsvGzipItemExporter',
'xml.gz': __name__ + '.XmlGzipItemExporter',
}
# Derived from https://github.com/scrapy/scrapy/issues/2174
class GzipMixin(object):
def __init__(self, file, **kwargs):
self.gzfile = gzip.GzipFile(fileobj=file)
super(GzipMixin, self).__init__(self.gzfile, **kwargs)
def finish_exporting(self):
super(GzipMixin, self).finish_exporting()
self.gzfile.close()
class JsonLinesGzipItemExporter(GzipMixin, JsonLinesItemExporter):
pass
class JsonGzipItemExporter(GzipMixin, JsonItemExporter):
pass
class XmlGzipItemExporter(GzipMixin, XmlItemExporter):
pass
class CsvGzipItemExporter(GzipMixin, CsvItemExporter):
pass