From 54700d20990bd68d272d250fd9b4d38edcf544bc Mon Sep 17 00:00:00 2001 From: Paul Lesack Date: Wed, 18 Sep 2024 15:36:16 -0700 Subject: [PATCH] Fixed bug with path column; dv_manifest_gen now handles recursion more like it should --- src/dataverse_utils/__init__.py | 2 +- src/dataverse_utils/dataverse_utils.py | 23 ++++++------ .../scripts/dv_manifest_gen.py | 36 ++++++++++--------- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/src/dataverse_utils/__init__.py b/src/dataverse_utils/__init__.py index 8063eb8..a204956 100644 --- a/src/dataverse_utils/__init__.py +++ b/src/dataverse_utils/__init__.py @@ -5,5 +5,5 @@ ''' from dataverse_utils.dataverse_utils import * -VERSION = (0,13,0) +VERSION = (0,13,1) __version__ = '.'.join([str(x) for x in VERSION]) diff --git a/src/dataverse_utils/dataverse_utils.py b/src/dataverse_utils/dataverse_utils.py index 3e46bf0..4681a24 100644 --- a/src/dataverse_utils/dataverse_utils.py +++ b/src/dataverse_utils/dataverse_utils.py @@ -118,18 +118,21 @@ def make_tsv(start_dir, in_list=None, def_tag='Data', # pylint: disable=too-many if kwargs.get('path'): headers.insert(1, 'path') outf = io.StringIO(newline='') - tsv_writer = csv.writer(outf, delimiter='\t', - quoting=quotype - ) + tsv_writer = csv.DictWriter(outf, delimiter='\t', + quoting=quotype, + fieldnames=headers, + extrasaction='ignore') if inc_header: - tsv_writer.writerow(headers) + tsv_writer.writeheader() for row in in_list: - desc = os.path.splitext(os.path.basename(row))[0] - if mime: - mtype = mimetypes.guess_type(row)[0] - tsv_writer.writerow([row, desc, def_tag, mtype]) - else: - tsv_writer.writerow([row, desc, def_tag]) + #the columns + r = {} + r['file'] = row + r['description'] = os.path.splitext(os.path.basename(row))[0] + r['mimetype'] = mimetypes.guess_type(row)[0] + r['tags'] = def_tag + r['path'] = '' + tsv_writer.writerow(r) outf.seek(0) outfile = outf.read() outf.close() diff --git a/src/dataverse_utils/scripts/dv_manifest_gen.py b/src/dataverse_utils/scripts/dv_manifest_gen.py index 0e78855..465685f 100644 --- a/src/dataverse_utils/scripts/dv_manifest_gen.py +++ b/src/dataverse_utils/scripts/dv_manifest_gen.py @@ -6,6 +6,7 @@ ''' import argparse +import glob import os import pathlib #pathlib new for Python 3.5 @@ -16,7 +17,7 @@ import dataverse_utils as du -VERSION = (0, 5, 0) +VERSION = (0, 5, 1) __version__ = '.'.join([str(x) for x in VERSION]) def parse() -> argparse.ArgumentParser(): @@ -100,27 +101,28 @@ def main() -> None: args.quote = quotype(args.quote) if args.quote == -1: parser.error('Invalid quotation type') - f_list = [] - if not args.files: - args.files = [str(x) for x in pathlib.Path('./').glob('*')] - if args.show_hidden: - args.files += [str(x) for x in pathlib.Path('./').glob('.*')] - for fil in args.files: - finder = pathlib.Path(fil).expanduser() - if args.recursive and finder.is_dir(): - f_list += list(finder.rglob('*')) + f_list = [] + for file in args.files: + if not args.recursive: + f_list += glob.glob(file, + include_hidden=args.show_hidden) else: - f_list += list(finder.parent.glob(finder.name)) - #Set comprehension strips out duplicates - #Strip out hidden files and directories - if args.show_hidden: - f_list = {str(x) for x in f_list if x.is_file()} - else: - f_list = {str(x) for x in f_list if x.is_file() and not re.search(r'^\.[Aa9-Zz9]*', str(x))} + f_list += glob.glob(file+'/**', recursive=True, + include_hidden=args.show_hidden) + if not f_list and not args.files: + if not args.recursive: + f_list += glob.glob('./*', include_hidden=args.show_hidden) + else: + f_list += glob.glob('./**', recursive=True, + include_hidden=args.show_hidden) + f_list = [pathlib.Path(_) for _ in f_list] + f_list = [_ for _ in f_list if _.stem != '' and _.exists() and _.is_file()] if not f_list: + #nothing to do print('Nothing matching these criteria. No manifest generated') sys.exit() + if args.filename: du.dump_tsv(os.getcwd(), filename=args.filename, in_list=f_list,