Skip to content

Commit

Permalink
Merge pull request #24 from Legilibre/skip-stray-files
Browse files Browse the repository at this point in the history
Skip unknown folders in LEGI archives
  • Loading branch information
Changaco authored Feb 8, 2018
2 parents a33ca8c + 5c72751 commit 13df894
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions legi/tar2sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def count_one(k):
counts[k] = 1

skipped = 0
unknown_folders = {}
liste_suppression = []
xml = etree.XMLParser(remove_blank_text=True)
with libarchive.file_reader(archive_path) as archive:
Expand All @@ -200,6 +201,13 @@ def count_one(k):
if parts[1] == 'legi':
path = path[len(parts[0])+1:]
parts = parts[1:]
if not parts[2].startswith('code_et_TNC_'):
# https://github.com/Legilibre/legi.py/issues/23
try:
unknown_folders[parts[2]] += 1
except KeyError:
unknown_folders[parts[2]] = 1
continue
dossier = parts[3]
text_cid = parts[11]
text_id = parts[-1][:-4]
Expand Down Expand Up @@ -443,6 +451,10 @@ def count_one(k):
if skipped:
print("skipped", skipped, "files that haven't changed")

if unknown_folders:
for d, x in unknown_folders.items():
print("skipped", x, "files in unknown folder `%s`" % d)

if liste_suppression:
suppress(get_table, db, liste_suppression)

Expand Down

0 comments on commit 13df894

Please sign in to comment.