From 6ea10c778efc0270ee8319489510df93aec538ea Mon Sep 17 00:00:00 2001 From: Quinten Steenhuis Date: Tue, 13 Feb 2024 10:24:20 -0500 Subject: [PATCH] Fix types --- docassemble/ALDashboard/aldashboard.py | 5 +- docassemble/ALDashboard/create_package.py | 420 +++++++++++------- docassemble/ALDashboard/docx_wrangling.py | 32 +- docassemble/ALDashboard/package_scanner.py | 321 +++++++------ docassemble/ALDashboard/translation.py | 372 +++++++++++----- .../ALDashboard/validate_attachment.py | 21 +- pyproject.toml | 4 + 7 files changed, 710 insertions(+), 465 deletions(-) diff --git a/docassemble/ALDashboard/aldashboard.py b/docassemble/ALDashboard/aldashboard.py index 4178b52..74fdce8 100644 --- a/docassemble/ALDashboard/aldashboard.py +++ b/docassemble/ALDashboard/aldashboard.py @@ -329,6 +329,7 @@ def list_installed_fonts(): # subq = db.session.query(db.func.max(UserDict.indexno).label('indexno'), UserDict.filename, UserDict.key).group_by(UserDict.filename, UserDict.key).subquery() # interview_query = db.session.query(UserDictKeys.user_id, UserDictKeys.temp_user_id, UserDictKeys.filename, UserDictKeys.key, UserDict.dictionary, UserDict.encrypted, UserModel.email).join(subq, and_(subq.c.filename == UserDictKeys.filename, subq.c.key == UserDictKeys.key)).join(UserDict, and_(UserDict.indexno == subq.c.indexno, UserDict.key == UserDictKeys.key, UserDict.filename == UserDictKeys.filename)).join(UserModel, UserModel.id == UserDictKeys.user_id).filter(UserDictKeys.user_id == user_id, UserDictKeys.filename == filename, UserDictKeys.key == session).group_by(UserModel.email, UserDictKeys.user_id, UserDictKeys.temp_user_id, UserDictKeys.filename, UserDictKeys.key, UserDict.dictionary, UserDict.encrypted, UserDictKeys.indexno).order_by(UserDictKeys.indexno) + def nicer_interview_filename(filename: str) -> str: """ Given a filename like docassemble.playground10ALWeaver:data/questions/assembly_line.yml, @@ -338,12 +339,12 @@ def nicer_interview_filename(filename: str) -> str: # Fixing the slicing for the first part of the filename if filename_parts[0].startswith("docassemble."): - filename_parts[0] = filename_parts[0][len("docassemble."):] + filename_parts[0] = filename_parts[0][len("docassemble.") :] # Check if there are two parts and modify the second part if len(filename_parts) > 1: if filename_parts[1].startswith("data/questions/"): - filename_parts[1] = filename_parts[1][len("data/questions/"):] + filename_parts[1] = filename_parts[1][len("data/questions/") :] return f"{filename_parts[0]}:{filename_parts[1].replace('.yml', '')}" return filename_parts[0] diff --git a/docassemble/ALDashboard/create_package.py b/docassemble/ALDashboard/create_package.py index 5203338..1909437 100644 --- a/docassemble/ALDashboard/create_package.py +++ b/docassemble/ALDashboard/create_package.py @@ -1,125 +1,168 @@ -from docassemble.base.util import log, space_to_underscore, bold, DAObject, DAList, DAFile, DAFileList, path_and_mimetype, user_info +from docassemble.base.util import ( + log, + space_to_underscore, + bold, + DAObject, + DAList, + DAFile, + DAFileList, + path_and_mimetype, + user_info, +) from docassemble.webapp.files import SavedFile from docassemble.webapp.backend import directory_for import datetime import zipfile import os import re -from typing import Any, Dict, List, Tuple, Union #, Set +from typing import Any, Dict, List, Tuple, Union # , Set -__all__ = ['get_files','get_list_of_projects', 'create_user_playground_zip', 'create_package_zip'] +__all__ = [ + "get_files", + "get_list_of_projects", + "create_user_playground_zip", + "create_package_zip", +] -#def get_playground_files(userid:int, project:str=None): +# def get_playground_files(userid:int, project:str=None): + + +def get_files(user_id, section="playground", project="default"): + area = SavedFile(user_id, fix=True, section=section) + the_directory = directory_for(area, project) + files = [ + os.path.join(the_directory, f) + for f in os.listdir(the_directory) + if os.path.isfile(os.path.join(the_directory, f)) + ] + return files -def get_files(user_id, section='playground', project='default'): - area = SavedFile(user_id, fix=True, section=section) - the_directory = directory_for(area, project) - files = [os.path.join(the_directory,f) for f in os.listdir(the_directory) if os.path.isfile(os.path.join(the_directory, f))] - return files def get_list_of_projects(user_id): - playground = SavedFile(user_id, fix=False, section='playground') + playground = SavedFile(user_id, fix=False, section="playground") return playground.list_of_dirs() + def project_name(name): - return '' if name == 'default' else name + return "" if name == "default" else name + + +def create_user_playground_zip( + user_id: int, name: str, project: str = "default", fileobj: DAFile = None +): + folders_and_files = {} + for section in ( + ("playground", "questions"), + ("playgroundtemplate", "templates"), + ("playgroundstatic", "static"), + ("playgroundsources", "sources"), + ("playgroundmodules", "modules"), + ): + folders_and_files[section[1]] = get_files(user_id, section[0], project) -def create_user_playground_zip(user_id:int, name:str, project:str='default', fileobj:DAFile=None): - folders_and_files = {} - for section in (('playground','questions'), ('playgroundtemplate', 'templates'), ('playgroundstatic','static'), ('playgroundsources','sources'), ('playgroundmodules','modules')): - folders_and_files[section[1]] = get_files(user_id, section[0], project) - - return create_package_zip(f"{name}-{project}", - info = { - "license": "MIT", - "author_name": name, - "readme": "readme", - "description": "playground backup", - "url": "https://docassemble.org", - "version": "1.0", - "dependencies": "" - }, - author_info = { - "author name and email": name - }, - folders_and_files=folders_and_files, - fileobj=fileobj) - -def create_package_zip(pkgname: str, info: dict, author_info: dict, folders_and_files: dict, fileobj:DAFile=None)->DAFile: - """ - Given a dictionary of lists, with the keys representing folders and the values - representing a list of DAFiles, create a Python package with Docassemble conventions. - info: (created by DAInterview.package_info()) - license - author_name - readme - description - url - version - dependencies - // interview_files replaced with folders_and_files - // template_files - // module_files - // static_files - author_info: - author name and email - folders_and_files: - questions->list of absolute file paths on the local filesystem - templates - modules - static - sources + return create_package_zip( + f"{name}-{project}", + info={ + "license": "MIT", + "author_name": name, + "readme": "readme", + "description": "playground backup", + "url": "https://docassemble.org", + "version": "1.0", + "dependencies": "", + }, + author_info={"author name and email": name}, + folders_and_files=folders_and_files, + fileobj=fileobj, + ) - Strucure of a docassemble package: - + docassemble-PKGNAME/ - LICENSE - MANIFEST.in - README.md - setup.cfg - setup.py - +-------docassemble - __init__.py - +------PKGNAME - __init__.py - SOME_MODULE.py - +------data - +------questions - README.md - +------sources - README.md - +------static - README.md - +------templates - README.md - """ - pkgname = space_to_underscore(pkgname) - if fileobj: - zip_download = fileobj - else: - zip_download = DAFile() - pkg_path_prefix = "docassemble-" + pkgname - pkg_path_init_prefix = os.path.join(pkg_path_prefix, "docassemble") - pkg_path_deep_prefix = os.path.join(pkg_path_init_prefix, pkgname) - pkg_path_data_prefix = os.path.join(pkg_path_deep_prefix, "data") - pkg_path_questions_prefix = os.path.join(pkg_path_data_prefix,"questions") - pkg_path_sources_prefix = os.path.join(pkg_path_data_prefix,"sources") - pkg_path_static_prefix = os.path.join(pkg_path_data_prefix,"static") - pkg_path_templates_prefix = os.path.join(pkg_path_data_prefix,"templates") - zip_download.initialize(filename="docassemble-" + pkgname + ".zip") - zip_obj = zipfile.ZipFile(zip_download.path(),'w') +def create_package_zip( + pkgname: str, + info: dict, + author_info: dict, + folders_and_files: dict, + fileobj: DAFile = None, +) -> DAFile: + """ + Given a dictionary of lists, with the keys representing folders and the values + representing a list of DAFiles, create a Python package with Docassemble conventions. + info: (created by DAInterview.package_info()) + license + author_name + readme + description + url + version + dependencies + // interview_files replaced with folders_and_files + // template_files + // module_files + // static_files + author_info: + author name and email + folders_and_files: + questions->list of absolute file paths on the local filesystem + templates + modules + static + sources - dependencies = ",".join(['\'' + dep + '\'' for dep in info['dependencies']]) + Strucure of a docassemble package: + + docassemble-PKGNAME/ + LICENSE + MANIFEST.in + README.md + setup.cfg + setup.py + +-------docassemble + __init__.py + +------PKGNAME + __init__.py + SOME_MODULE.py + +------data + +------questions + README.md + +------sources + README.md + +------static + README.md + +------templates + README.md + """ + pkgname = space_to_underscore(pkgname) + if fileobj: + zip_download = fileobj + else: + zip_download = DAFile() + pkg_path_prefix = "docassemble-" + pkgname + pkg_path_init_prefix = os.path.join(pkg_path_prefix, "docassemble") + pkg_path_deep_prefix = os.path.join(pkg_path_init_prefix, pkgname) + pkg_path_data_prefix = os.path.join(pkg_path_deep_prefix, "data") + pkg_path_questions_prefix = os.path.join(pkg_path_data_prefix, "questions") + pkg_path_sources_prefix = os.path.join(pkg_path_data_prefix, "sources") + pkg_path_static_prefix = os.path.join(pkg_path_data_prefix, "static") + pkg_path_templates_prefix = os.path.join(pkg_path_data_prefix, "templates") - initpy = """\ + zip_download.initialize(filename="docassemble-" + pkgname + ".zip") + zip_obj = zipfile.ZipFile(zip_download.path(), "w") + + dependencies = ",".join(["'" + dep + "'" for dep in info["dependencies"]]) + + initpy = """\ try: __import__('pkg_resources').declare_namespace(__name__) except ImportError: __path__ = __import__('pkgutil').extend_path(__path__, __name__) """ - licensetext = str(info['license']) - if re.search(r'MIT License', licensetext): - licensetext += '\n\nCopyright (c) ' + str(datetime.datetime.now().year) + ' ' + str(info.get('author_name', '')) + """ + licensetext = str(info["license"]) + if re.search(r"MIT License", licensetext): + licensetext += ( + "\n\nCopyright (c) " + + str(datetime.datetime.now().year) + + " " + + str(info.get("author_name", "")) + + """ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -136,18 +179,27 @@ def create_package_zip(pkgname: str, info: dict, author_info: dict, folders_and_ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - if info['readme'] and re.search(r'[A-Za-z]', info['readme']): - readme = str(info['readme']) - else: - readme = '# docassemble.' + str(pkgname) + "\n\n" + info['description'] + "\n\n## Author\n\n" + author_info['author name and email'] + "\n\n" - manifestin = """\ + ) + if info["readme"] and re.search(r"[A-Za-z]", info["readme"]): + readme = str(info["readme"]) + else: + readme = ( + "# docassemble." + + str(pkgname) + + "\n\n" + + info["description"] + + "\n\n## Author\n\n" + + author_info["author name and email"] + + "\n\n" + ) + manifestin = """\ include README.md """ - setupcfg = """\ + setupcfg = """\ [metadata] description_file = README.md """ - setuppy = """\ + setuppy = """\ import os import sys from setuptools import setup, find_packages @@ -191,84 +243,120 @@ def find_package_data(where='.', package='', exclude=standard_exclude, exclude_d out.setdefault(package, []).append(prefix+name) return out """ - setuppy += "setup(name='docassemble." + str(pkgname) + "',\n" + """\ - version=""" + repr(info.get('version', '')) + """, - description=(""" + repr(info.get('description', '')) + """), - long_description=""" + repr(readme) + """, + setuppy += ( + "setup(name='docassemble." + + str(pkgname) + + "',\n" + + """\ + version=""" + + repr(info.get("version", "")) + + """, + description=(""" + + repr(info.get("description", "")) + + """), + long_description=""" + + repr(readme) + + """, long_description_content_type='text/markdown', - author=""" + repr(info.get('author_name', '')) + """, - author_email=""" + repr(info.get('author_email', '')) + """, - license=""" + repr(info.get('license', '')) + """, - url=""" + repr(info['url'] if info['url'] else 'https://docassemble.org') + """, + author=""" + + repr(info.get("author_name", "")) + + """, + author_email=""" + + repr(info.get("author_email", "")) + + """, + license=""" + + repr(info.get("license", "")) + + """, + url=""" + + repr(info["url"] if info["url"] else "https://docassemble.org") + + """, packages=find_packages(), namespace_packages=['docassemble'], - install_requires=[""" + dependencies + """], + install_requires=[""" + + dependencies + + """], zip_safe=False, - package_data=find_package_data(where='docassemble/""" + str(pkgname) + """/', package='docassemble.""" + str(pkgname) + """'), + package_data=find_package_data(where='docassemble/""" + + str(pkgname) + + """/', package='docassemble.""" + + str(pkgname) + + """'), ) """ - templatereadme = """\ + ) + templatereadme = """\ # Template directory If you want to use templates for document assembly, put them in this directory. """ - staticreadme = """\ + staticreadme = """\ # Static file directory If you want to make files available in the web app, put them in this directory. """ - sourcesreadme = """\ + sourcesreadme = """\ # Sources directory This directory is used to store word translation files, machine learning training files, and other source files. """ - templatesreadme = """\ + templatesreadme = """\ # Template directory This directory is used to store templates. """ - # Write the standard files - zip_obj.writestr(os.path.join(pkg_path_prefix,"LICENSE"), licensetext) - zip_obj.writestr(os.path.join(pkg_path_prefix,"MANIFEST.in"), manifestin) - zip_obj.writestr(os.path.join(pkg_path_prefix,"README.md"), readme) - zip_obj.writestr(os.path.join(pkg_path_prefix,"setup.cfg"), setupcfg) - zip_obj.writestr(os.path.join(pkg_path_prefix,"setup.py"), setuppy) - zip_obj.writestr(os.path.join(pkg_path_init_prefix,"__init__.py"), initpy) - zip_obj.writestr(os.path.join(pkg_path_deep_prefix,"__init__.py"), ("__version__ = " + repr(info.get('version', '')) + "\n") ) - zip_obj.writestr(os.path.join(pkg_path_questions_prefix,"README.md"), templatereadme ) - zip_obj.writestr(os.path.join(pkg_path_sources_prefix,"README.md"), sourcesreadme ) - zip_obj.writestr(os.path.join(pkg_path_static_prefix,"README.md"), staticreadme) - zip_obj.writestr(os.path.join(pkg_path_templates_prefix,"README.md"), templatesreadme) - - # Modules - for f in folders_and_files.get('modules',[]): - try: - zip_obj.write(f,os.path.join(pkg_path_deep_prefix, os.path.basename(f))) - except: - log('Unable to add file ' + repr(f)) - # Templates - for f in folders_and_files.get('templates',[]): - try: - zip_obj.write(f,os.path.join(pkg_path_templates_prefix, os.path.basename(f))) - except: - log('Unable to add file ' + repr(f)) - # sources - for f in folders_and_files.get('sources',[]): - try: - zip_obj.write(f,os.path.join(pkg_path_sources_prefix, os.path.basename(f))) - except: - log('Unable to add file ' + repr(f)) - # static - for f in folders_and_files.get('static',[]): - try: - zip_obj.write(f,os.path.join(pkg_path_static_prefix, os.path.basename(f))) - except: - log('Unable to add file ' + repr(f)) - # questions - for f in folders_and_files.get('questions',[]): - try: - zip_obj.write(f,os.path.join(pkg_path_questions_prefix, os.path.basename(f))) - except: - log('Unable to add file ' + repr(f)) - - zip_obj.close() - zip_download.commit() - return zip_download + # Write the standard files + zip_obj.writestr(os.path.join(pkg_path_prefix, "LICENSE"), licensetext) + zip_obj.writestr(os.path.join(pkg_path_prefix, "MANIFEST.in"), manifestin) + zip_obj.writestr(os.path.join(pkg_path_prefix, "README.md"), readme) + zip_obj.writestr(os.path.join(pkg_path_prefix, "setup.cfg"), setupcfg) + zip_obj.writestr(os.path.join(pkg_path_prefix, "setup.py"), setuppy) + zip_obj.writestr(os.path.join(pkg_path_init_prefix, "__init__.py"), initpy) + zip_obj.writestr( + os.path.join(pkg_path_deep_prefix, "__init__.py"), + ("__version__ = " + repr(info.get("version", "")) + "\n"), + ) + zip_obj.writestr( + os.path.join(pkg_path_questions_prefix, "README.md"), templatereadme + ) + zip_obj.writestr(os.path.join(pkg_path_sources_prefix, "README.md"), sourcesreadme) + zip_obj.writestr(os.path.join(pkg_path_static_prefix, "README.md"), staticreadme) + zip_obj.writestr( + os.path.join(pkg_path_templates_prefix, "README.md"), templatesreadme + ) + + # Modules + for f in folders_and_files.get("modules", []): + try: + zip_obj.write(f, os.path.join(pkg_path_deep_prefix, os.path.basename(f))) + except: + log("Unable to add file " + repr(f)) + # Templates + for f in folders_and_files.get("templates", []): + try: + zip_obj.write( + f, os.path.join(pkg_path_templates_prefix, os.path.basename(f)) + ) + except: + log("Unable to add file " + repr(f)) + # sources + for f in folders_and_files.get("sources", []): + try: + zip_obj.write(f, os.path.join(pkg_path_sources_prefix, os.path.basename(f))) + except: + log("Unable to add file " + repr(f)) + # static + for f in folders_and_files.get("static", []): + try: + zip_obj.write(f, os.path.join(pkg_path_static_prefix, os.path.basename(f))) + except: + log("Unable to add file " + repr(f)) + # questions + for f in folders_and_files.get("questions", []): + try: + zip_obj.write( + f, os.path.join(pkg_path_questions_prefix, os.path.basename(f)) + ) + except: + log("Unable to add file " + repr(f)) + + zip_obj.close() + zip_download.commit() + return zip_download diff --git a/docassemble/ALDashboard/docx_wrangling.py b/docassemble/ALDashboard/docx_wrangling.py index 66643d1..9683466 100644 --- a/docassemble/ALDashboard/docx_wrangling.py +++ b/docassemble/ALDashboard/docx_wrangling.py @@ -33,20 +33,19 @@ def add_paragraph_before(paragraph, text): def update_docx( - document: Union[docx.Document, str], - modified_runs: List[Tuple[int, int, str, int]] + document: Union[docx.Document, str], modified_runs: List[Tuple[int, int, str, int]] ) -> docx.Document: """Update the document with modified runs. Args: document: the docx.Document object, or the path to the DOCX file - modified_runs: a tuple of paragraph number, run number, the modified text, and - a number from -1 to 1 indicating whether a new paragraph should be inserted + modified_runs: a tuple of paragraph number, run number, the modified text, and + a number from -1 to 1 indicating whether a new paragraph should be inserted before or after the current paragraph. Returns: The modified document. - """ + """ modified_runs.sort(key=lambda x: x[0], reverse=True) if isinstance(document, str): @@ -67,9 +66,9 @@ def update_docx( continue # Skip invalid run index if new_paragraph == 1: - add_paragraph_after(paragraph, modified_text) + add_paragraph_after(paragraph, modified_text) elif new_paragraph == -1: - add_paragraph_before(paragraph, modified_text) + add_paragraph_before(paragraph, modified_text) else: paragraph.runs[run_number].text = modified_text @@ -264,23 +263,6 @@ def get_labeled_docx_runs( return guesses -def docx_rewrite(docx_path: str, prompt:str, openai_client: Optional[OpenAI] = None,) -> List[Tuple[int, int, str, int]]: - """Use GPT to rewrite the contents of a DOCX file paragraph by paragraph. - - Args: - docx_path: path to the DOCX file - prompt: the prompt to use for OpenAI - openai_client: an optional OpenAI client - - Returns: - The modified document. - """ - doc = docx.Document(docx_path) - for paragraph in doc.paragraphs: - paragraph.text = paragraph.text.replace(find, replace) - return doc - - def modify_docx_with_openai_guesses(docx_path: str) -> docx.Document: """Uses OpenAI to guess the variable names for a document and then modifies the document with the guesses. @@ -297,4 +279,4 @@ def modify_docx_with_openai_guesses(docx_path: str) -> docx.Document: if __name__ == "__main__": new_doc = modify_docx_with_openai_guesses(sys.argv[1]) - new_doc.save(sys.argv[1] + ".output.docx") \ No newline at end of file + new_doc.save(sys.argv[1] + ".output.docx") diff --git a/docassemble/ALDashboard/package_scanner.py b/docassemble/ALDashboard/package_scanner.py index 770d0b7..28cfe35 100644 --- a/docassemble/ALDashboard/package_scanner.py +++ b/docassemble/ALDashboard/package_scanner.py @@ -7,172 +7,201 @@ from io import BytesIO from docassemble.base.util import as_datetime -#----------------------------------------------------- + +# ----------------------------------------------------- # Extract server name -#----------------------------------------------------- +# ----------------------------------------------------- def get_server_name(interview_url): - begin = interview_url.find('//') + 2 - end = interview_url.find('/interview?') - - return interview_url[begin:end] + begin = interview_url.find("//") + 2 + end = interview_url.find("/interview?") + + return interview_url[begin:end] -#----------------------------------------------------- + +# ----------------------------------------------------- # Crawl installed packages on the current server. # Store key and non key docassemble packages separately. -#----------------------------------------------------- -def installed_pkg_list(target: list) -> dict: - installed_packages = {} - key_packages = {} - non_key_packages = {} - - for p in pkg_resources.working_set: - # docassemble packages - if 'docassemble' in p.project_name: - # Key packages - if p.project_name in target: - key_packages[p.project_name] = p.version - # non-key packages - if p.project_name not in target: - non_key_packages[p.project_name] = p.version - - sorted_key_packages = sort_dict(key_packages) - sorted_non_key_packages = sort_dict(non_key_packages) - - installed_packages['key_pkgs'] = sorted_key_packages - installed_packages['non_key_pkgs'] = sorted_non_key_packages - - return installed_packages +# ----------------------------------------------------- +def installed_pkg_list(target: list) -> dict: + installed_packages = {} + key_packages = {} + non_key_packages = {} + + for p in pkg_resources.working_set: + # docassemble packages + if "docassemble" in p.project_name: + # Key packages + if p.project_name in target: + key_packages[p.project_name] = p.version + # non-key packages + if p.project_name not in target: + non_key_packages[p.project_name] = p.version + + sorted_key_packages = sort_dict(key_packages) + sorted_non_key_packages = sort_dict(non_key_packages) + + installed_packages["key_pkgs"] = sorted_key_packages + installed_packages["non_key_pkgs"] = sorted_non_key_packages + + return installed_packages + def sort_dict(raw_data: dict): - return dict( sorted(raw_data.items(), key=lambda x: x[0].lower()) ) - -#----------------------------------------------------- + return dict(sorted(raw_data.items(), key=lambda x: x[0].lower())) + + +# ----------------------------------------------------- # Crawl github packages (default branch) under a given github user name -#----------------------------------------------------- +# ----------------------------------------------------- # Borrowed ideas from https://github.com/rsain/GitHub-Crawler -# It's 4 years old and broken, but the structure is still valid. +# It's 4 years old and broken, but the structure is still valid. # # The GitHub API limits the queries to get 100 elements per page and up to 1,000 elements in total. -# To get more than 1,000 elements, the main query should be split in multiple subqueries +# To get more than 1,000 elements, the main query should be split in multiple subqueries # using different time windows through sub_queries (a list of subqueries). # # The original had a comment that DELAY_BETWEEN_QUERYS is used to avoid be banned - Is this still valid? -# See documentation regarding Github Search API limitations: +# See documentation regarding Github Search API limitations: # https://docs.github.com/en/rest/reference/search # https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting -URL = "https://api.github.com/search/repositories?q=" #The basic URL to use the GitHub API -PARAMETERS = "&per_page=100" #Additional parameters for the query (by default 100 items per page) -DELAY_BETWEEN_QUERYS = 3 #The time to wait between different queries to GitHub - -def getUrl (url) : - ''' Given a URL it returns its body ''' - buffer = BytesIO() - c = pycurl.Curl() - c.setopt(c.URL, url) - c.setopt(c.WRITEDATA, buffer) - c.setopt(c.CAINFO, certifi.where()) - c.perform() - c.close() - body = buffer.getvalue() - # Body is a byte string. - # We have to know the encoding in order to print it to a text file. - return body.decode('iso-8859-1') - -def fetch_github_repos (github_user, sub_queries) -> dict: - ''' Given a github user input, returns soughted info. It doesn't contain version number. ''' - repositories = {} - - # Run queries to get information in json format and download ZIP file for each repository - for subquery in range(1, len(sub_queries)+1): - - #Obtain the number of pages for the current subquery (by default each page contains 100 items) - url = URL + github_user + str(sub_queries[subquery-1]) + PARAMETERS - dataRead = json.loads(getUrl(url)) - numberOfPages = int(math.ceil(dataRead.get('total_count')/100.0)) - - #Results are in different pages - for currentPage in range(1, numberOfPages+1): - url = URL + github_user + str(sub_queries[subquery-1]) + PARAMETERS + "&page=" + str(currentPage) - dataRead = json.loads(getUrl(url)) - - #Iteration over all the repositories in the current json page [a list of dicts] - for repo in dataRead['items']: - repositories[repo['name']] = { - 'branch_name': repo['default_branch'], - 'created_date': as_datetime(repo['created_at']).format_date(), - 'push_date': as_datetime(repo['pushed_at']).format_date(), - 'open_issues_count': repo['open_issues_count'], - 'repo_url': repo['html_url'], - } - - # A delay between different subqueries - if (subquery < len(sub_queries)): - time.sleep(DELAY_BETWEEN_QUERYS) - return sort_dict(repositories) - -#----------------------------------------------------- +URL = "https://api.github.com/search/repositories?q=" # The basic URL to use the GitHub API +PARAMETERS = "&per_page=100" # Additional parameters for the query (by default 100 items per page) +DELAY_BETWEEN_QUERYS = 3 # The time to wait between different queries to GitHub + + +def getUrl(url): + """Given a URL it returns its body""" + buffer = BytesIO() + c = pycurl.Curl() + c.setopt(c.URL, url) + c.setopt(c.WRITEDATA, buffer) + c.setopt(c.CAINFO, certifi.where()) + c.perform() + c.close() + body = buffer.getvalue() + # Body is a byte string. + # We have to know the encoding in order to print it to a text file. + return body.decode("iso-8859-1") + + +def fetch_github_repos(github_user, sub_queries) -> dict: + """Given a github user input, returns soughted info. It doesn't contain version number.""" + repositories = {} + + # Run queries to get information in json format and download ZIP file for each repository + for subquery in range(1, len(sub_queries) + 1): + # Obtain the number of pages for the current subquery (by default each page contains 100 items) + url = URL + github_user + str(sub_queries[subquery - 1]) + PARAMETERS + dataRead = json.loads(getUrl(url)) + numberOfPages = int(math.ceil(dataRead.get("total_count") / 100.0)) + + # Results are in different pages + for currentPage in range(1, numberOfPages + 1): + url = ( + URL + + github_user + + str(sub_queries[subquery - 1]) + + PARAMETERS + + "&page=" + + str(currentPage) + ) + dataRead = json.loads(getUrl(url)) + + # Iteration over all the repositories in the current json page [a list of dicts] + for repo in dataRead["items"]: + repositories[repo["name"]] = { + "branch_name": repo["default_branch"], + "created_date": as_datetime(repo["created_at"]).format_date(), + "push_date": as_datetime(repo["pushed_at"]).format_date(), + "open_issues_count": repo["open_issues_count"], + "repo_url": repo["html_url"], + } + + # A delay between different subqueries + if subquery < len(sub_queries): + time.sleep(DELAY_BETWEEN_QUERYS) + return sort_dict(repositories) + + +# ----------------------------------------------------- # Grab github repo version number in setup.py and add it to the input repo_list # Separate the pile into key repos/non-key repos & sort each, then store them in a new nested dict. -#----------------------------------------------------- +# ----------------------------------------------------- def fetch_github_repo_version(repo_list, key_pkgs, github_user) -> dict: - import requests - - github_repos = {} # Parent dict - key_repos = {} # Child dict - nonkey_repos = {} # Child dict - - github_key_pkg_names = [a.replace('.', '-') for a in key_pkgs] - - for k, v in list(repo_list.items()): - # Construct url for repo's setup.py file - setup_py_URL = 'https://raw.githubusercontent.com/' + github_user + '/' + k + '/' + v['branch_name'] + '/setup.py' - - # Fetch file content - file_content = requests.get(setup_py_URL) - has_version_num = False - # Not every package has a setup.py file - if file_content.text: - # Find the line containing "version=" and copy the version number - for line in file_content: - decoded_line = line.decode("utf-8") - if 'version=' in decoded_line: - str_start = decoded_line.find('version=') - str_end = decoded_line.find('description') - version_num = decoded_line[str_start:str_end][9:].replace('\',\n', '') - v['version'] = version_num # Add version number to the original repo_list. - has_version_num = True - break - - # Separate key pkgs from non-key pkgs and save them into new dicts - if k in github_key_pkg_names: - key_repos[k] = v #copy the record into key_repos - else: - if has_version_num: # Only care about repos with version#/setup.py - nonkey_repos[k] = v #copy the record into nonkey_repos - - # Store the sorted new repos into the parent dict - github_repos['key_repos'] = sort_dict(key_repos) - github_repos['non_key_repos'] = sort_dict(nonkey_repos) - - return github_repos - -#----------------------------------------------------- + import requests + + github_repos = {} # Parent dict + key_repos = {} # Child dict + nonkey_repos = {} # Child dict + + github_key_pkg_names = [a.replace(".", "-") for a in key_pkgs] + + for k, v in list(repo_list.items()): + # Construct url for repo's setup.py file + setup_py_URL = ( + "https://raw.githubusercontent.com/" + + github_user + + "/" + + k + + "/" + + v["branch_name"] + + "/setup.py" + ) + + # Fetch file content + file_content = requests.get(setup_py_URL) + has_version_num = False + # Not every package has a setup.py file + if file_content.text: + # Find the line containing "version=" and copy the version number + for line in file_content: + decoded_line = line.decode("utf-8") + if "version=" in decoded_line: + str_start = decoded_line.find("version=") + str_end = decoded_line.find("description") + version_num = decoded_line[str_start:str_end][9:].replace( + "',\n", "" + ) + v[ + "version" + ] = version_num # Add version number to the original repo_list. + has_version_num = True + break + + # Separate key pkgs from non-key pkgs and save them into new dicts + if k in github_key_pkg_names: + key_repos[k] = v # copy the record into key_repos + else: + if has_version_num: # Only care about repos with version#/setup.py + nonkey_repos[k] = v # copy the record into nonkey_repos + + # Store the sorted new repos into the parent dict + github_repos["key_repos"] = sort_dict(key_repos) + github_repos["non_key_repos"] = sort_dict(nonkey_repos) + + return github_repos + + +# ----------------------------------------------------- # Compare server repo version with github repo version # Return the results as a dict for screen display -#----------------------------------------------------- +# ----------------------------------------------------- def compare_repo_version(server_repo_dict, github_repo_dict) -> dict: - version_table = {} - for k1, v1 in server_repo_dict.items(): # Loop thru server repos - version_table[k1] = {'server': v1, 'github': 'No new commit'} - - if len(github_repo_dict) > 0: # If there are any commits in the period - for k2, v2 in github_repo_dict.items(): - if k1 == k2.replace('-', '.'): # If repo match is found - if v1 in v2['version']: # Check its version - # Override the version info in the table record - version_table[k1] = {'server': v1, 'github': v2['version']} - else: # Override the not-matched version info with an alert sign - version_table[k1] = {'server': v1 + ' ⛔', 'github': v2['version']} - - return version_table \ No newline at end of file + version_table = {} + for k1, v1 in server_repo_dict.items(): # Loop thru server repos + version_table[k1] = {"server": v1, "github": "No new commit"} + + if len(github_repo_dict) > 0: # If there are any commits in the period + for k2, v2 in github_repo_dict.items(): + if k1 == k2.replace("-", "."): # If repo match is found + if v1 in v2["version"]: # Check its version + # Override the version info in the table record + version_table[k1] = {"server": v1, "github": v2["version"]} + else: # Override the not-matched version info with an alert sign + version_table[k1] = { + "server": v1 + " ⛔", + "github": v2["version"], + } + + return version_table diff --git a/docassemble/ALDashboard/translation.py b/docassemble/ALDashboard/translation.py index 7d525a9..771fff5 100644 --- a/docassemble/ALDashboard/translation.py +++ b/docassemble/ALDashboard/translation.py @@ -8,6 +8,7 @@ import zipfile import docassemble.base.config + if not docassemble.base.config.loaded: docassemble.base.config.load() from docassemble.base.config import in_celery @@ -30,6 +31,7 @@ import docassemble.webapp.telnyx import docassemble.webapp.machinelearning from docassemble.webapp.translations import setup_translation + if not in_celery: import docassemble.webapp.worker @@ -41,6 +43,7 @@ from docassemble.base.util import DAFile from docassemble.webapp.server import mako_parts from typing import NamedTuple, Dict + DEFAULT_LANGUAGE = "en" __all__ = [ @@ -48,15 +51,17 @@ "translation_file", ] + class Translation(NamedTuple): - file: DAFile # an XLSX or XLIFF file - untranslated_words: int # Word count for all untranslated segments that are not Mako or HTML - untranslated_segments: int # Number of rows in the output that have untranslated text - one for each question, subquestion, field, etc. + file: DAFile # an XLSX or XLIFF file + untranslated_words: int # Word count for all untranslated segments that are not Mako or HTML + untranslated_segments: int # Number of rows in the output that have untranslated text - one for each question, subquestion, field, etc. total_rows: int -def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: + +def translation_file(yaml_filename: str, tr_lang: str) -> Translation: """ - Return a tuple of the translation file in XLSX format, plus a count of the + Return a tuple of the translation file in XLSX format, plus a count of the number of words and segments that need to be translated. The word and segment count only apply when filetype="XLSX". @@ -64,15 +69,17 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: This code was adjusted from the Flask endpoint-only version in server.py. XLIFF support was removed for now but can be added later. """ - filetype:str = "XLSX" # Look in server.py for support of XLIFF format, but we won't implement it here + filetype: str = "XLSX" # Look in server.py for support of XLIFF format, but we won't implement it here output_file = DAFile() setup_translation() - if yaml_filename is None or not re.search(r'\S', yaml_filename): + if yaml_filename is None or not re.search(r"\S", yaml_filename): raise ValueError("YAML filename was not valid") - if tr_lang is None or not re.search(r'\S', tr_lang): + if tr_lang is None or not re.search(r"\S", tr_lang): raise ValueError("You must provide a language") try: - interview_source = docassemble.base.parse.interview_source_from_string(yaml_filename) + interview_source = docassemble.base.parse.interview_source_from_string( + yaml_filename + ) except DAError: raise ValueError("Invalid interview") interview_source.update() @@ -85,9 +92,22 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: the_xlsx_file = docassemble.base.functions.package_data_filename(item) if not os.path.isfile(the_xlsx_file): continue - df = pandas.read_excel(the_xlsx_file, na_values=['NaN', '-NaN', '#NA', '#N/A'], keep_default_na=False) + df = pandas.read_excel( + the_xlsx_file, + na_values=["NaN", "-NaN", "#NA", "#N/A"], + keep_default_na=False, + ) invalid = False - for column_name in ('interview', 'question_id', 'index_num', 'hash', 'orig_lang', 'tr_lang', 'orig_text', 'tr_text'): + for column_name in ( + "interview", + "question_id", + "index_num", + "hash", + "orig_lang", + "tr_lang", + "orig_text", + "tr_text", + ): if column_name not in df.columns: invalid = True break @@ -95,26 +115,42 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: continue for indexno in df.index: try: - assert df['interview'][indexno] - assert df['question_id'][indexno] - assert df['index_num'][indexno] >= 0 - assert df['hash'][indexno] - assert df['orig_lang'][indexno] - assert df['tr_lang'][indexno] - assert df['orig_text'][indexno] != '' - assert df['tr_text'][indexno] != '' - if isinstance(df['orig_text'][indexno], float): - assert not math.isnan(df['orig_text'][indexno]) - if isinstance(df['tr_text'][indexno], float): - assert not math.isnan(df['tr_text'][indexno]) + assert df["interview"][indexno] + assert df["question_id"][indexno] + assert df["index_num"][indexno] >= 0 + assert df["hash"][indexno] + assert df["orig_lang"][indexno] + assert df["tr_lang"][indexno] + assert df["orig_text"][indexno] != "" + assert df["tr_text"][indexno] != "" + if isinstance(df["orig_text"][indexno], float): + assert not math.isnan(df["orig_text"][indexno]) + if isinstance(df["tr_text"][indexno], float): + assert not math.isnan(df["tr_text"][indexno]) except: continue - the_dict = {'interview': str(df['interview'][indexno]), 'question_id': str(df['question_id'][indexno]), 'index_num': df['index_num'][indexno], 'hash': str(df['hash'][indexno]), 'orig_lang': str(df['orig_lang'][indexno]), 'tr_lang': str(df['tr_lang'][indexno]), 'orig_text': str(df['orig_text'][indexno]), 'tr_text': str(df['tr_text'][indexno])} - if df['orig_text'][indexno] not in tr_cache: - tr_cache[df['orig_text'][indexno]] = {} - if df['orig_lang'][indexno] not in tr_cache[df['orig_text'][indexno]]: - tr_cache[df['orig_text'][indexno]][df['orig_lang'][indexno]] = {} - tr_cache[df['orig_text'][indexno]][df['orig_lang'][indexno]][df['tr_lang'][indexno]] = the_dict + the_dict = { + "interview": str(df["interview"][indexno]), + "question_id": str(df["question_id"][indexno]), + "index_num": df["index_num"][indexno], + "hash": str(df["hash"][indexno]), + "orig_lang": str(df["orig_lang"][indexno]), + "tr_lang": str(df["tr_lang"][indexno]), + "orig_text": str(df["orig_text"][indexno]), + "tr_text": str(df["tr_text"][indexno]), + } + if df["orig_text"][indexno] not in tr_cache: + tr_cache[df["orig_text"][indexno]] = {} + if ( + df["orig_lang"][indexno] + not in tr_cache[df["orig_text"][indexno]] + ): + tr_cache[df["orig_text"][indexno]][ + df["orig_lang"][indexno] + ] = {} + tr_cache[df["orig_text"][indexno]][df["orig_lang"][indexno]][ + df["tr_lang"][indexno] + ] = the_dict elif item.lower().endswith(".xlf") or item.lower().endswith(".xliff"): the_xlf_file = docassemble.base.functions.package_data_filename(item) if not os.path.isfile(the_xlf_file): @@ -122,15 +158,21 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: tree = ET.parse(the_xlf_file) root = tree.getroot() indexno = 1 - if root.attrib['version'] == "1.2": - for the_file in root.iter('{urn:oasis:names:tc:xliff:document:1.2}file'): - source_lang = the_file.attrib.get('source-language', 'en') - target_lang = the_file.attrib.get('target-language', 'en') - source_filename = the_file.attrib.get('original', yaml_filename) - for transunit in the_file.iter('{urn:oasis:names:tc:xliff:document:1.2}trans-unit'): - orig_text = '' - tr_text = '' - for source in transunit.iter('{urn:oasis:names:tc:xliff:document:1.2}source'): + if root.attrib["version"] == "1.2": + for the_file in root.iter( + "{urn:oasis:names:tc:xliff:document:1.2}file" + ): + source_lang = the_file.attrib.get("source-language", "en") + target_lang = the_file.attrib.get("target-language", "en") + source_filename = the_file.attrib.get("original", yaml_filename) + for transunit in the_file.iter( + "{urn:oasis:names:tc:xliff:document:1.2}trans-unit" + ): + orig_text = "" + tr_text = "" + for source in transunit.iter( + "{urn:oasis:names:tc:xliff:document:1.2}source" + ): if source.text: orig_text += source.text for mrk in source: @@ -138,7 +180,9 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: orig_text += mrk.text if mrk.tail: orig_text += mrk.tail - for target in transunit.iter('{urn:oasis:names:tc:xliff:document:1.2}target'): + for target in transunit.iter( + "{urn:oasis:names:tc:xliff:document:1.2}target" + ): if target.text: tr_text += target.text for mrk in target: @@ -146,26 +190,47 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: tr_text += mrk.text if mrk.tail: tr_text += mrk.tail - if orig_text == '' or tr_text == '': + if orig_text == "" or tr_text == "": continue - the_dict = {'interview': source_filename, 'question_id': 'Unknown' + str(indexno), 'index_num': transunit.attrib.get('id', str(indexno)), 'hash': hashlib.md5(orig_text.encode('utf-8')).hexdigest(), 'orig_lang': source_lang, 'tr_lang': target_lang, 'orig_text': orig_text, 'tr_text': tr_text} + the_dict = { + "interview": source_filename, + "question_id": "Unknown" + str(indexno), + "index_num": transunit.attrib.get("id", str(indexno)), + "hash": hashlib.md5( + orig_text.encode("utf-8") + ).hexdigest(), + "orig_lang": source_lang, + "tr_lang": target_lang, + "orig_text": orig_text, + "tr_text": tr_text, + } if orig_text not in tr_cache: tr_cache[orig_text] = {} if source_lang not in tr_cache[orig_text]: tr_cache[orig_text][source_lang] = {} tr_cache[orig_text][source_lang][target_lang] = the_dict indexno += 1 - elif root.attrib['version'] == "2.0": - source_lang = root.attrib['srcLang'] - target_lang = root.attrib['trgLang'] - for the_file in root.iter('{urn:oasis:names:tc:xliff:document:2.0}file'): - source_filename = the_file.attrib.get('original', yaml_filename) - for unit in the_file.iter('{urn:oasis:names:tc:xliff:document:2.0}unit'): - question_id = unit.attrib.get('id', 'Unknown' + str(indexno)) - for segment in unit.iter('{urn:oasis:names:tc:xliff:document:2.0}segment'): - orig_text = '' - tr_text = '' - for source in transunit.iter('{urn:oasis:names:tc:xliff:document:2.0}source'): + elif root.attrib["version"] == "2.0": + source_lang = root.attrib["srcLang"] + target_lang = root.attrib["trgLang"] + for the_file in root.iter( + "{urn:oasis:names:tc:xliff:document:2.0}file" + ): + source_filename = the_file.attrib.get("original", yaml_filename) + for unit in the_file.iter( + "{urn:oasis:names:tc:xliff:document:2.0}unit" + ): + question_id = unit.attrib.get( + "id", "Unknown" + str(indexno) + ) + for segment in unit.iter( + "{urn:oasis:names:tc:xliff:document:2.0}segment" + ): + orig_text = "" + tr_text = "" + for source in transunit.iter( + "{urn:oasis:names:tc:xliff:document:2.0}source" + ): if source.text: orig_text += source.text for mrk in source: @@ -173,7 +238,9 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: orig_text += mrk.text if mrk.tail: orig_text += mrk.tail - for target in transunit.iter('{urn:oasis:names:tc:xliff:document:2.0}target'): + for target in transunit.iter( + "{urn:oasis:names:tc:xliff:document:2.0}target" + ): if target.text: tr_text += target.text for mrk in target: @@ -181,83 +248,101 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: tr_text += mrk.text if mrk.tail: tr_text += mrk.tail - if orig_text == '' or tr_text == '': + if orig_text == "" or tr_text == "": continue - the_dict = {'interview': source_filename, 'question_id': question_id, 'index_num': segment.attrib.get('id', str(indexno)), 'hash': hashlib.md5(orig_text.encode('utf-8')).hexdigest(), 'orig_lang': source_lang, 'tr_lang': target_lang, 'orig_text': orig_text, 'tr_text': tr_text} + the_dict = { + "interview": source_filename, + "question_id": question_id, + "index_num": segment.attrib.get("id", str(indexno)), + "hash": hashlib.md5( + orig_text.encode("utf-8") + ).hexdigest(), + "orig_lang": source_lang, + "tr_lang": target_lang, + "orig_text": orig_text, + "tr_text": tr_text, + } if orig_text not in tr_cache: tr_cache[orig_text] = {} if source_lang not in tr_cache[orig_text]: tr_cache[orig_text][source_lang] = {} tr_cache[orig_text][source_lang][target_lang] = the_dict indexno += 1 - if filetype == 'XLSX': - xlsx_filename = docassemble.base.functions.space_to_underscore(os.path.splitext(os.path.basename(re.sub(r'.*:', '', yaml_filename)))[0]) + "_" + tr_lang + ".xlsx" + if filetype == "XLSX": + xlsx_filename = ( + docassemble.base.functions.space_to_underscore( + os.path.splitext(os.path.basename(re.sub(r".*:", "", yaml_filename)))[0] + ) + + "_" + + tr_lang + + ".xlsx" + ) output_file.initialize(filename=xlsx_filename) workbook = xlsxwriter.Workbook(output_file.path()) worksheet = workbook.add_worksheet() - bold = workbook.add_format({'bold': 1}) + bold = workbook.add_format({"bold": 1}) text = workbook.add_format() - text.set_align('top') + text.set_align("top") fixedcell = workbook.add_format() - fixedcell.set_align('top') + fixedcell.set_align("top") fixedcell.set_text_wrap() fixedunlockedcell = workbook.add_format() - fixedunlockedcell.set_align('top') + fixedunlockedcell.set_align("top") fixedunlockedcell.set_text_wrap() # fixedunlockedcell.set_locked(False) fixed = workbook.add_format() fixedone = workbook.add_format() fixedone.set_bold() - fixedone.set_font_color('green') + fixedone.set_font_color("green") fixedtwo = workbook.add_format() fixedtwo.set_bold() - fixedtwo.set_font_color('blue') + fixedtwo.set_font_color("blue") fixedunlocked = workbook.add_format() fixedunlockedone = workbook.add_format() fixedunlockedone.set_bold() - fixedunlockedone.set_font_color('green') + fixedunlockedone.set_font_color("green") fixedunlockedtwo = workbook.add_format() fixedunlockedtwo.set_bold() - fixedunlockedtwo.set_font_color('blue') + fixedunlockedtwo.set_font_color("blue") wholefixed = workbook.add_format() - wholefixed.set_align('top') + wholefixed.set_align("top") wholefixed.set_text_wrap() wholefixedone = workbook.add_format() wholefixedone.set_bold() - wholefixedone.set_font_color('green') - wholefixedone.set_align('top') + wholefixedone.set_font_color("green") + wholefixedone.set_align("top") wholefixedone.set_text_wrap() wholefixedtwo = workbook.add_format() wholefixedtwo.set_bold() - wholefixedtwo.set_font_color('blue') - wholefixedtwo.set_align('top') + wholefixedtwo.set_font_color("blue") + wholefixedtwo.set_align("top") wholefixedtwo.set_text_wrap() wholefixedunlocked = workbook.add_format() - wholefixedunlocked.set_align('top') + wholefixedunlocked.set_align("top") wholefixedunlocked.set_text_wrap() # wholefixedunlocked.set_locked(False) wholefixedunlockedone = workbook.add_format() wholefixedunlockedone.set_bold() - wholefixedunlockedone.set_font_color('green') - wholefixedunlockedone.set_align('top') + wholefixedunlockedone.set_font_color("green") + wholefixedunlockedone.set_align("top") wholefixedunlockedone.set_text_wrap() # wholefixedunlockedone.set_locked(False) wholefixedunlockedtwo = workbook.add_format() wholefixedunlockedtwo.set_bold() - wholefixedunlockedtwo.set_font_color('blue') - wholefixedunlockedtwo.set_align('top') + wholefixedunlockedtwo.set_font_color("blue") + wholefixedunlockedtwo.set_align("top") wholefixedunlockedtwo.set_text_wrap() # wholefixedunlockedtwo.set_locked(False) numb = workbook.add_format() - numb.set_align('top') - worksheet.write('A1', 'interview', bold) - worksheet.write('B1', 'question_id', bold) - worksheet.write('C1', 'index_num', bold) - worksheet.write('D1', 'hash', bold) - worksheet.write('E1', 'orig_lang', bold) - worksheet.write('F1', 'tr_lang', bold) - worksheet.write('G1', 'orig_text', bold) - worksheet.write('H1', 'tr_text', bold) + numb.set_align("top") + worksheet.write("A1", "interview", bold) + worksheet.write("B1", "question_id", bold) + worksheet.write("C1", "index_num", bold) + worksheet.write("D1", "hash", bold) + worksheet.write("E1", "orig_lang", bold) + worksheet.write("F1", "tr_lang", bold) + worksheet.write("G1", "orig_text", bold) + worksheet.write("H1", "tr_text", bold) worksheet.set_column(0, 0, 25) worksheet.set_column(1, 1, 15) @@ -270,17 +355,17 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: untranslated_text = "" total_rows = 0 for question in interview.all_questions: - if not hasattr(question, 'translations'): + if not hasattr(question, "translations"): continue language = question.language - if language == '*': + if language == "*": language = question.from_source.get_language() - if language == '*': + if language == "*": language = interview.default_language if language == tr_lang: continue indexno = 0 - if hasattr(question, 'id'): + if hasattr(question, "id"): question_id = question.id else: question_id = question.name @@ -289,26 +374,32 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: continue total_rows += 1 # The segment has already been translated and the translation is still valid - if item in tr_cache and language in tr_cache[item] and tr_lang in tr_cache[item][language]: - tr_text = str(tr_cache[item][language][tr_lang]['tr_text']) - else: # This string needs to be translated - tr_text = '' + if ( + item in tr_cache + and language in tr_cache[item] + and tr_lang in tr_cache[item][language] + ): + tr_text = str(tr_cache[item][language][tr_lang]["tr_text"]) + else: # This string needs to be translated + tr_text = "" untranslated_segments += 1 worksheet.write_string(row, 0, question.from_source.get_name(), text) worksheet.write_string(row, 1, question_id, text) worksheet.write_number(row, 2, indexno, numb) - worksheet.write_string(row, 3, hashlib.md5(item.encode('utf-8')).hexdigest(), text) + worksheet.write_string( + row, 3, hashlib.md5(item.encode("utf-8")).hexdigest(), text + ) worksheet.write_string(row, 4, language, text) worksheet.write_string(row, 5, tr_lang, text) mako = mako_parts(item) - + if not tr_text: for phrase in mako: if phrase[1] == 0: untranslated_text += phrase[0] - if len(mako) == 0: # Can this case occur? Not in tests - worksheet.write_string(row, 6, '', wholefixed) + if len(mako) == 0: # Can this case occur? Not in tests + worksheet.write_string(row, 6, "", wholefixed) elif len(mako) == 1: if mako[0][1] == 0: worksheet.write_string(row, 6, item, wholefixed) @@ -329,7 +420,7 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: worksheet.write_rich_string(*parts) mako = mako_parts(tr_text) if len(mako) == 0: - worksheet.write_string(row, 7, '', wholefixedunlocked) + worksheet.write_string(row, 7, "", wholefixedunlocked) elif len(mako) == 1: if mako[0][1] == 0: worksheet.write_string(row, 7, tr_text, wholefixedunlocked) @@ -348,31 +439,57 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: parts.extend([fixedunlockedtwo, part[0]]) parts.append(fixedunlockedcell) worksheet.write_rich_string(*parts) - num_lines = item.count('\n') + num_lines = item.count("\n") # if num_lines > 25: # num_lines = 25 if num_lines > 0: - worksheet.set_row(row, 15*(num_lines + 1)) + worksheet.set_row(row, 15 * (num_lines + 1)) indexno += 1 row += 1 seen.append(item) for item, cache_item in tr_cache.items(): - if item in seen or language not in cache_item or tr_lang not in cache_item[language]: + if ( + item in seen + or language not in cache_item + or tr_lang not in cache_item[language] + ): continue - worksheet.write_string(row, 0, cache_item[language][tr_lang]['interview'], text) - worksheet.write_string(row, 1, cache_item[language][tr_lang]['question_id'], text) - worksheet.write_number(row, 2, 1000 + cache_item[language][tr_lang]['index_num'], numb) - worksheet.write_string(row, 3, cache_item[language][tr_lang]['hash'], text) - worksheet.write_string(row, 4, cache_item[language][tr_lang]['orig_lang'], text) - worksheet.write_string(row, 5, cache_item[language][tr_lang]['tr_lang'], text) - mako = mako_parts(cache_item[language][tr_lang]['orig_text']) + worksheet.write_string( + row, 0, cache_item[language][tr_lang]["interview"], text + ) + worksheet.write_string( + row, 1, cache_item[language][tr_lang]["question_id"], text + ) + worksheet.write_number( + row, 2, 1000 + cache_item[language][tr_lang]["index_num"], numb + ) + worksheet.write_string(row, 3, cache_item[language][tr_lang]["hash"], text) + worksheet.write_string( + row, 4, cache_item[language][tr_lang]["orig_lang"], text + ) + worksheet.write_string( + row, 5, cache_item[language][tr_lang]["tr_lang"], text + ) + mako = mako_parts(cache_item[language][tr_lang]["orig_text"]) if len(mako) == 1: if mako[0][1] == 0: - worksheet.write_string(row, 6, cache_item[language][tr_lang]['orig_text'], wholefixed) + worksheet.write_string( + row, 6, cache_item[language][tr_lang]["orig_text"], wholefixed + ) elif mako[0][1] == 1: - worksheet.write_string(row, 6, cache_item[language][tr_lang]['orig_text'], wholefixedone) + worksheet.write_string( + row, + 6, + cache_item[language][tr_lang]["orig_text"], + wholefixedone, + ) elif mako[0][1] == 2: - worksheet.write_string(row, 6, cache_item[language][tr_lang]['orig_text'], wholefixedtwo) + worksheet.write_string( + row, + 6, + cache_item[language][tr_lang]["orig_text"], + wholefixedtwo, + ) else: parts = [row, 6] for part in mako: @@ -384,14 +501,29 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: parts.extend([fixedtwo, part[0]]) parts.append(fixedcell) worksheet.write_rich_string(*parts) - mako = mako_parts(cache_item[language][tr_lang]['tr_text']) + mako = mako_parts(cache_item[language][tr_lang]["tr_text"]) if len(mako) == 1: if mako[0][1] == 0: - worksheet.write_string(row, 7, cache_item[language][tr_lang]['tr_text'], wholefixedunlocked) + worksheet.write_string( + row, + 7, + cache_item[language][tr_lang]["tr_text"], + wholefixedunlocked, + ) elif mako[0][1] == 1: - worksheet.write_string(row, 7, cache_item[language][tr_lang]['tr_text'], wholefixedunlockedone) + worksheet.write_string( + row, + 7, + cache_item[language][tr_lang]["tr_text"], + wholefixedunlockedone, + ) elif mako[0][1] == 2: - worksheet.write_string(row, 7, cache_item[language][tr_lang]['tr_text'], wholefixedunlockedtwo) + worksheet.write_string( + row, + 7, + cache_item[language][tr_lang]["tr_text"], + wholefixedunlockedtwo, + ) else: parts = [row, 7] for part in mako: @@ -403,11 +535,13 @@ def translation_file(yaml_filename:str, tr_lang:str ) -> Translation: parts.extend([fixedunlockedtwo, part[0]]) parts.append(fixedunlockedcell) worksheet.write_rich_string(*parts) - num_lines = cache_item[language][tr_lang]['orig_text'].count('\n') + num_lines = cache_item[language][tr_lang]["orig_text"].count("\n") if num_lines > 0: - worksheet.set_row(row, 15*(num_lines + 1)) + worksheet.set_row(row, 15 * (num_lines + 1)) row += 1 workbook.close() - untranslated_words = len(re.findall(r"\w+", untranslated_text)) - return Translation(output_file, untranslated_words,untranslated_segments, total_rows) - raise ValueError("That's not a valid filetype for a translation file") \ No newline at end of file + untranslated_words = len(re.findall(r"\w+", untranslated_text)) + return Translation( + output_file, untranslated_words, untranslated_segments, total_rows + ) + raise ValueError("That's not a valid filetype for a translation file") diff --git a/docassemble/ALDashboard/validate_attachment.py b/docassemble/ALDashboard/validate_attachment.py index 96d0bb9..a854a31 100644 --- a/docassemble/ALDashboard/validate_attachment.py +++ b/docassemble/ALDashboard/validate_attachment.py @@ -2,21 +2,28 @@ import ruamel.yaml import mako.template import mako.runtime + mako.runtime.UNDEFINED = DAEmpty() from mako import exceptions from typing import List, Tuple -__all__ = ['validate_attachment_block'] +__all__ = ["validate_attachment_block"] + -def validate_attachment_block(fields_statement: str) -> List[Tuple[str,str]]: - yaml = ruamel.yaml.YAML(typ='rt') +def validate_attachment_block(fields_statement: str) -> List[Tuple[str, str]]: + yaml = ruamel.yaml.YAML(typ="rt") parsed_blocks = yaml.load(fields_statement) errors = [] - for index, row in enumerate(parsed_blocks['fields']): - try: + for index, row in enumerate(parsed_blocks["fields"]): + try: mytemplate = mako.template.Template(next(iter(row.values()))) content = mytemplate.render() except: - errors.append((f"Error on row {index}, id: {row}",exceptions.text_error_template().render())) - return errors \ No newline at end of file + errors.append( + ( + f"Error on row {index}, id: {row}", + exceptions.text_error_template().render(), + ) + ) + return errors diff --git a/pyproject.toml b/pyproject.toml index af7d40a..d12d044 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,3 +60,7 @@ ignore_missing_imports = true module="docx.*" ignore_missing_imports = true +[[tool.mako.overrides]] +module="mako.*" +ignore_missing_imports = true +