Skip to content

Commit

Permalink
Merge pull request #22 from Legilibre/skip-links
Browse files Browse the repository at this point in the history
Ajout d'un argument `--skip-links` à tar2sqlite
  • Loading branch information
Changaco authored Feb 3, 2018
2 parents 5832bee + 228c23b commit a33ca8c
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions legi/tar2sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def suppress(get_table, db, liste_suppression):
json.dumps(counts, indent=4, sort_keys=True))


def process_archive(db, archive_path):
def process_archive(db, archive_path, process_links=True):

# Define some constants
ARTICLE_TAGS = set('NOTA BLOC_TEXTUEL'.split())
Expand Down Expand Up @@ -343,7 +343,7 @@ def count_one(k):
else:
raise Exception('unexpected tag: '+tag)

if tag in ('ARTICLE', 'TEXTE_VERSION'):
if process_links and tag in ('ARTICLE', 'TEXTE_VERSION'):
e = root if tag == 'ARTICLE' else meta_version
liens_tags = e.find('LIENS')
if liens_tags is not None:
Expand Down Expand Up @@ -457,6 +457,8 @@ def main():
p.add_argument('--pragma', action='append', default=[],
help="Doc: https://www.sqlite.org/pragma.html | Example: journal_mode=WAL")
p.add_argument('--raw', default=False, action='store_true')
p.add_argument('--skip-links', default=False, action='store_true',
help="if set, all link metadata will be ignored (the `liens` table will be empty)")
args = p.parse_args()

if not os.path.isdir(args.anomalies_dir):
Expand All @@ -468,6 +470,10 @@ def main():
result = db.one(query)
print("> Sent `%s` to SQLite, got `%s` as result" % (query, result))

process_links = not args.skip_links
if args.skip_links:
db.run("DELETE FROM liens")

# Look for new archives in the given directory
last_update = db.one("SELECT value FROM db_meta WHERE key = 'last_update'")
print("> last_update is", last_update)
Expand All @@ -494,7 +500,7 @@ def main():
skipped = 0
print("> Processing %s..." % archive_name)
with db:
process_archive(db, args.directory + '/' + archive_name)
process_archive(db, args.directory + '/' + archive_name, process_links)
if last_update:
db.run("UPDATE db_meta SET value = ? WHERE key = 'last_update'", (archive_date,))
else:
Expand Down

0 comments on commit a33ca8c

Please sign in to comment.