Skip to content

Commit

Permalink
Merge pull request #126 from SuffolkLITLab/patch-new-ruamel
Browse files Browse the repository at this point in the history
Fix issue with deprecated ruamel functionality
  • Loading branch information
nonprofittechy authored Jan 5, 2024
2 parents d6b35de + 621257c commit 4b85d43
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,19 @@ fields:
default: True
---
code: |
import ruamel.yaml as yaml
from ruamel.yaml import YAML
yaml = YAML(typ='safe', pure=True)
yaml_parsed = []
for f in yaml_file:
yaml_parsed.extend(list(yaml.safe_load_all(f.slurp())))
yaml_parsed.extend(list(yaml.load_all(f.slurp())))
del yaml
---
code: |
# identify all questions that set a variable in the interview
# they will be added as a dictionary of label: field, with other modifiers
name_match = re.compile(r"((\w+\[\d+\])|\w+)")
name_without_index_match = re.compile(r"(\w+).*\[i.*")
objects_temp = []
attributes_list = {}
questions_temp = []
Expand All @@ -59,7 +61,7 @@ code: |
for field in doc["fields"]:
if field and "code" in field:
try:
object_name = name_match.match(field["code"])[1]
object_name = re.match(r"((\w+\[\d+\])|\w+)", field["code"])[1]
if object_name == "x" or "[i]" in field["code"]:
continue
except:
Expand Down Expand Up @@ -98,7 +100,7 @@ code: |
)
elif isinstance( (val := next(iter(field.values()))), str ) and "[i]" in val:
# log(next(iter(field.values())), "success")
obj_match = name_without_index_match.match(next(iter(field.values())))
obj_match = re.match(r"(\w+).*\[i.*", next(iter(field.values())))
if obj_match:
object_name = obj_match[1]
else:
Expand Down
62 changes: 45 additions & 17 deletions docassemble/ALDashboard/docx_wrangling.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,35 +33,46 @@ def add_paragraph_before(paragraph, text):


def update_docx(
document: Union[docx.Document, str], modified_runs: List[Tuple[int, int, str, int]]
document: Union[docx.Document, str],
modified_runs: List[List[int, int, str, int]]
) -> docx.Document:
"""Update the document with the modified runs.
"""Update the document with modified runs.
Args:
document: the docx.Document object, or the path to the DOCX file
modified_runs: a tuple of paragraph number, run number, the modified text, a question (not used), and whether a new paragraph should be inserted (for conditional text)
modified_runs: a tuple of paragraph number, run number, the modified text, and
a number from -1 to 1 indicating whether a new paragraph should be inserted
before or after the current paragraph.
Returns:
The modified document.
"""
## Sort modified_runs in reverse order so inserted paragraphs are in the correct order
# modified_runs = sorted(modified_runs, key=lambda x: x[0], reverse=True)
#
## also sort each run in the modified_runs so that the runs are in the correct order
# modified_runs = sorted(modified_runs, key=lambda x: x[1], reverse=True)
"""
modified_runs.sort(key=lambda x: x[0], reverse=True)

if isinstance(document, str):
document = docx.Document(document)

for paragraph_number, run_number, modified_text, new_paragraph in modified_runs:
for item in modified_runs:
if len(item) != 4:
continue # Skip items with incorrect format

paragraph_number, run_number, modified_text, new_paragraph = item

if paragraph_number >= len(document.paragraphs):
continue # Skip invalid paragraph index

paragraph = document.paragraphs[paragraph_number]
run = paragraph.runs[run_number]
# if new_paragraph == 1:
# add_paragraph_after(paragraph, modified_text)
# elif new_paragraph == -1:
# add_paragraph_before(paragraph, modified_text)
# else:
run.text = modified_text

if run_number >= len(paragraph.runs):
continue # Skip invalid run index

if new_paragraph == 1:
add_paragraph_after(paragraph, modified_text)
elif new_paragraph == -1:
add_paragraph_before(paragraph, modified_text)
else:
paragraph.runs[run_number].text = modified_text

return document


Expand Down Expand Up @@ -253,6 +264,23 @@ def get_labeled_docx_runs(
return guesses


def docx_rewrite(docx_path: str, prompt:str, openai_client: Optional[OpenAI] = None,) -> List[Tuple[int, int, str, int]]:
"""Use GPT to rewrite the contents of a DOCX file paragraph by paragraph.
Args:
docx_path: path to the DOCX file
prompt: the prompt to use for OpenAI
openai_client: an optional OpenAI client
Returns:
The modified document.
"""
doc = docx.Document(docx_path)
for paragraph in doc.paragraphs:
paragraph.text = paragraph.text.replace(find, replace)
return doc


def modify_docx_with_openai_guesses(docx_path: str) -> docx.Document:
"""Uses OpenAI to guess the variable names for a document and then modifies the document with the guesses.
Expand Down

0 comments on commit 4b85d43

Please sign in to comment.