Merge pull request #126 from SuffolkLITLab/patch-new-ruamel

Fix issue with deprecated ruamel functionality
SuffolkLITLab · Jan 5, 2024 · 4b85d43 · 4b85d43
2 parents d6b35de + 621257c
commit 4b85d43
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 23 deletions.
diff --git a/docassemble/ALDashboard/data/questions/review_screen_generator.yml b/docassemble/ALDashboard/data/questions/review_screen_generator.yml
@@ -31,17 +31,19 @@ fields:
     default: True
 ---
 code: |
-  import ruamel.yaml as yaml
+  from ruamel.yaml import YAML
+
+  yaml = YAML(typ='safe', pure=True)
   yaml_parsed = []
   for f in yaml_file:
-    yaml_parsed.extend(list(yaml.safe_load_all(f.slurp())))
+    yaml_parsed.extend(list(yaml.load_all(f.slurp())))
+
+  del yaml
 ---
 code: |
   # identify all questions that set a variable in the interview
   # they will be added as a dictionary of label: field, with other modifiers
   
-  name_match = re.compile(r"((\w+\[\d+\])|\w+)")
-  name_without_index_match = re.compile(r"(\w+).*\[i.*")
   objects_temp = []
   attributes_list = {}
   questions_temp = []
@@ -59,7 +61,7 @@ code: |
         for field in doc["fields"]:
           if field and "code" in field:
             try:
-              object_name = name_match.match(field["code"])[1]
+              object_name = re.match(r"((\w+\[\d+\])|\w+)", field["code"])[1]
               if object_name == "x" or "[i]" in field["code"]:
                 continue
             except:
@@ -98,7 +100,7 @@ code: |
                   )
           elif isinstance( (val := next(iter(field.values()))), str ) and "[i]" in val:
             # log(next(iter(field.values())), "success")
-            obj_match = name_without_index_match.match(next(iter(field.values())))
+            obj_match = re.match(r"(\w+).*\[i.*", next(iter(field.values())))
             if obj_match:
               object_name = obj_match[1]
             else:

diff --git a/docassemble/ALDashboard/docx_wrangling.py b/docassemble/ALDashboard/docx_wrangling.py
@@ -33,35 +33,46 @@ def add_paragraph_before(paragraph, text):
 
 
 def update_docx(
-    document: Union[docx.Document, str], modified_runs: List[Tuple[int, int, str, int]]
+    document: Union[docx.Document, str], 
+    modified_runs: List[List[int, int, str, int]]
 ) -> docx.Document:
-    """Update the document with the modified runs.
+    """Update the document with modified runs.
 
     Args:
         document: the docx.Document object, or the path to the DOCX file
-        modified_runs: a tuple of paragraph number, run number, the modified text, a question (not used), and whether a new paragraph should be inserted (for conditional text)
+        modified_runs: a tuple of paragraph number, run number, the modified text, and 
+            a number from -1 to 1 indicating whether a new paragraph should be inserted 
+            before or after the current paragraph.
 
     Returns:
         The modified document.
-    """
-    ## Sort modified_runs in reverse order so inserted paragraphs are in the correct order
-    # modified_runs = sorted(modified_runs, key=lambda x: x[0], reverse=True)
-    #
-    ## also sort each run in the modified_runs so that the runs are in the correct order
-    # modified_runs = sorted(modified_runs, key=lambda x: x[1], reverse=True)
+    """    
+    modified_runs.sort(key=lambda x: x[0], reverse=True)
 
     if isinstance(document, str):
         document = docx.Document(document)
 
-    for paragraph_number, run_number, modified_text, new_paragraph in modified_runs:
+    for item in modified_runs:
+        if len(item) != 4:
+            continue  # Skip items with incorrect format
+
+        paragraph_number, run_number, modified_text, new_paragraph = item
+
+        if paragraph_number >= len(document.paragraphs):
+            continue  # Skip invalid paragraph index
+
         paragraph = document.paragraphs[paragraph_number]
-        run = paragraph.runs[run_number]
-        # if new_paragraph == 1:
-        #    add_paragraph_after(paragraph, modified_text)
-        # elif new_paragraph == -1:
-        #    add_paragraph_before(paragraph, modified_text)
-        # else:
-        run.text = modified_text
+
+        if run_number >= len(paragraph.runs):
+            continue  # Skip invalid run index
+
+        if new_paragraph == 1:
+           add_paragraph_after(paragraph, modified_text)
+        elif new_paragraph == -1:
+           add_paragraph_before(paragraph, modified_text)
+        else:
+            paragraph.runs[run_number].text = modified_text
+
     return document
 
 
@@ -253,6 +264,23 @@ def get_labeled_docx_runs(
     return guesses
 
 
+def docx_rewrite(docx_path: str, prompt:str, openai_client: Optional[OpenAI] = None,) -> List[Tuple[int, int, str, int]]:
+    """Use GPT to rewrite the contents of a DOCX file paragraph by paragraph.
+
+    Args:
+        docx_path: path to the DOCX file
+        prompt: the prompt to use for OpenAI
+        openai_client: an optional OpenAI client
+
+    Returns:
+        The modified document.
+    """
+    doc = docx.Document(docx_path)
+    for paragraph in doc.paragraphs:
+        paragraph.text = paragraph.text.replace(find, replace)
+    return doc
+
+
 def modify_docx_with_openai_guesses(docx_path: str) -> docx.Document:
     """Uses OpenAI to guess the variable names for a document and then modifies the document with the guesses.