tensorlakeai · drguthals · Nov 28, 2025
diff --git a/Applications/.DS_Store b/Applications/.DS_Store
diff --git a/Applications/docx-tracked-changes/Commercial Property Insurance Claim Assessment Report.docx b/Applications/docx-tracked-changes/Commercial Property Insurance Claim Assessment Report.docx
diff --git a/Applications/docx-tracked-changes/docx-remote-testing.py b/Applications/docx-tracked-changes/docx-remote-testing.py
@@ -0,0 +1,7 @@
+from tensorlake.runtime import run_remote_application
+
+# Deploy and run at scale
+changes = run_remote_application(
+    analyze_contract_revisions,
+    contract_urls=contracts
+)
diff --git a/Applications/docx-tracked-changes/docx-tracked-changes.py b/Applications/docx-tracked-changes/docx-tracked-changes.py
@@ -0,0 +1,243 @@
+import os
+from typing import List
+from tensorlake.applications import Image, application, function, cls
+from tensorlake.documentai import DocumentAI
+
+# Define the runtime environment
+image = (
+    Image(base_image="python:3.11-slim", name="docx-example")
+    .run("pip install tensorlake beautifulsoup4")
+)
+
+@application()
+@function(
+    secrets=["TENSORLAKE_API_KEY"],
+    image=image
+)
+def analyze_contract_revisions(contract_urls: List[str]) -> None:
+    """
+    Main application that processes contracts with tracked changes
+    and loads analysis results
+    """
+    print(f"[DEBUG] Starting analysis of {len(contract_urls)} contracts")
+
+    # Process each contract in parallel
+    print("[DEBUG] Processing contracts in parallel...")
+    results = process_single_contract.map(contract_urls)
+
+    # Collect and analyze all results
+    all_changes = []
+    for result in results:
+        all_changes.extend(result)
+
+    print(f"Processed {len(contract_urls)} contracts")
+    print(f"Found {len(all_changes)} tracked changes total")
+
+    return all_changes
+
+
+@function(
+    secrets=["TENSORLAKE_API_KEY"],
+image=image
+)
+def process_single_contract(url: str) -> List[dict]:
+    """
+    Process a single contract and extract tracked changes with locations
+    """
+    print(f"[DEBUG] Processing contract: {url}")
+    from bs4 import BeautifulSoup
+
+    doc_ai = DocumentAI() 
+
+    # Parse the DOCX file
+    print("[DEBUG] Parsing DOCX file...")
+    try:
+       result = doc_ai.parse_and_wait(file=url)
+    except Exception as e:
+       print(f"[ERROR] Parse failed: {e}")
+       # Handle tracked changes manually from markdown if needed
+       return []
+    print("[DEBUG] DOCX parsing complete")
+
+    changes = []
+
+    # Extract tracked changes from each chunk
+    print(f"[DEBUG] Processing {len(result.chunks)} document chunks")
+    for chunk_idx, chunk in enumerate(result.chunks, 1):
+        print(f"[DEBUG] Processing chunk {chunk_idx}/{len(result.chunks)}")
+        soup = BeautifulSoup(chunk.markdown, 'html.parser')
+
+        # Find insertions
+        insertions = soup.find_all('ins')
+        print(f"[DEBUG] Found {len(insertions)} insertions in chunk {chunk_idx}")
+        for ins in insertions:
+            text = ins.get_text()
+            print(f"[DEBUG] Processing insertion: '{text[:50]}{'...' if len(text) > 50 else ''}'")
+            changes.append({
+                'type': 'insertion',
+                'text': text,
+                'context': get_surrounding_context(ins),
+                'page': chunk.page_number if hasattr(chunk, 'page_number') else None
+            })
+
+        # Find deletions
+        deletions = soup.find_all('del')
+        print(f"[DEBUG] Found {len(deletions)} deletions in chunk {chunk_idx}")
+        for del_tag in deletions:
+            text = del_tag.get_text()
+            print(f"[DEBUG] Processing deletion: '{text[:50]}{'...' if len(text) > 50 else ''}'")
+            changes.append({
+                'type': 'deletion',
+                'text': text,
+                'context': get_surrounding_context(del_tag),
+                'page': chunk.page_number if hasattr(chunk, 'page_number') else None
+            })
+
+        # Find comments
+        comments = soup.find_all('span', class_='comment')
+        print(f"[DEBUG] Found {len(comments)} comments in chunk {chunk_idx}")
+        for comment in comments:
+            note = comment.get('data-note')
+            print(f"[DEBUG] Processing comment: '{note[:50]}{'...' if len(note) > 50 else ''}'")
+            changes.append({
+                'type': 'comment',
+                'text': note,
+                'highlighted_text': comment.get_text(),
+                'page': chunk.page_number if hasattr(chunk, 'page_number') else None
+            })
+
+    # Also extract spatial information for key clauses
+    for page in result.pages:
+        for fragment in page.page_fragments:
+            if fragment.fragment_type == 'text' and is_key_clause(fragment.content):
+                # Find any tracked changes in this fragment
+                fragment_changes = [
+                    c for c in changes 
+                    if c.get('page') == page.page_number and 
+                    c.get('text', '') in fragment.content
+                ]
+
+                for change in fragment_changes:
+                    change['bbox'] = fragment.bbox
+                    change['reading_order'] = fragment.reading_order
+
+    return changes
+
+
+@function(image=image)
+def get_surrounding_context(tag, chars=100):
+    """Extract surrounding text context for a tracked change"""
+    text = tag.parent.get_text() if tag.parent else ""
+    tag_text = tag.get_text()
+
+    if tag_text in text:
+        start = max(0, text.index(tag_text) - chars)
+        end = min(len(text), text.index(tag_text) + len(tag_text) + chars)
+        return text[start:end]
+
+    return text[:chars]
+
+
+@function(image=image)
+def is_key_clause(text: str) -> bool:
+    """Identify if text contains a key contract clause"""
+    key_terms = [
+        'indemnification', 'liability', 'warranty', 'termination',
+        'confidentiality', 'intellectual property', 'force majeure',
+        'governing law', 'dispute resolution', 'payment terms'
+    ]
+
+    text_lower = text.lower()
+    return any(term in text_lower for term in key_terms)
+
+
+@application()
+@function(
+    secrets=["TENSORLAKE_API_KEY"],
+    image=image
+)
+def query_contract_changes(contract_tuple: tuple[str, str]) -> str:
+    """
+    Query a contract about specific changes using an LLM with full context
+    """
+    print(f"[DEBUG] Starting contract query with parameters: {contract_tuple}")
+    doc_ai = DocumentAI()
+
+    # Parse the contract
+    print("[DEBUG] Parsing contract for querying...")
+    result = doc_ai.parse_and_wait(file=contract_tuple[0])
+    print("[DEBUG] Contract parsing complete")
+
+    # Build context from tracked changes and spatial data
+    context = build_contract_context(result)
+
+    # Use an LLM to answer the question (pseudo-code)
+    # In production, you'd call your LLM API here
+    answer = f"""
+    Based on the tracked changes in the document:
+
+    Question: { contract_tuple[1] }
+
+    Analysis:
+    {context}
+    """
+
+    return answer
+
+
+@function(image=image)
+def build_contract_context(result) -> str:
+    """Build a comprehensive context string with tracked changes and locations"""
+    from bs4 import BeautifulSoup
+
+    context_parts = []
+
+    for chunk in result.chunks:
+        soup = BeautifulSoup(chunk.markdown, 'html.parser')
+
+        # Extract insertions with page numbers
+        for ins in soup.find_all('ins'):
+            context_parts.append(
+                f"INSERTION: Added '{ins.get_text()}' "
+                f"(Page {chunk.page_number if hasattr(chunk, 'page_number') else 'unknown'})"
+            )
+
+        # Extract deletions with page numbers
+        for del_tag in soup.find_all('del'):
+            context_parts.append(
+                f"DELETION: Removed '{del_tag.get_text()}' "
+                f"(Page {chunk.page_number if hasattr(chunk, 'page_number') else 'unknown'})"
+            )
+
+        # Extract comments
+        for comment in soup.find_all('span', class_='comment'):
+            context_parts.append(
+                f"COMMENT: '{comment.get('data-note')}' on text '{comment.get_text()}' "
+                f"(Page {chunk.page_number if hasattr(chunk, 'page_number') else 'unknown'})"
+            )
+
+    return "\n".join(context_parts)
+
+if __name__ == "__main__":
+    from tensorlake.applications import run_local_application
+
+    # Analyze multiple contracts
+    contracts_urls = [
+        "https://pub-226479de18b2493f96b64c6674705dd8.r2.dev/Commercial%20Property%20Insurance%20Claim%20Assessment%20Report.docx"
+    ]
+
+    changes = run_local_application(
+        analyze_contract_revisions,
+        contracts_urls
+    )
+
+    print(f"Found {changes} tracked changes across all contracts")
+
+    # Query a specific contract
+    answer = run_local_application(
+        query_contract_changes,
+        ("https://pub-226479de18b2493f96b64c6674705dd8.r2.dev/Commercial%20Property%20Insurance%20Claim%20Assessment%20Report.docx",
+        "What changes did opposing counsel make to the liability section?")
+    )
+
+    print(answer)
diff --git a/Applications/docx-tracked-changes/requirements.txt b/Applications/docx-tracked-changes/requirements.txt
@@ -0,0 +1,4 @@
+tensorlake>=0.1.0
+
+
+