Skip to content

Commit

Permalink
Merge pull request #1049 from 18F/fixup-warnings
Browse files Browse the repository at this point in the history
Document editor fixes
  • Loading branch information
cmc333333 authored Mar 2, 2018
2 parents 3d45030 + df3768a commit 7fec670
Show file tree
Hide file tree
Showing 8 changed files with 291 additions and 6 deletions.
9 changes: 9 additions & 0 deletions api/document/js/__tests__/serialize-doc.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Fragment } from 'prosemirror-model';
import * as repeatString from 'repeat-string';

import serializeDoc, { apiFactory, convertTexts } from '../serialize-doc';
import schema, { factory } from '../schema';
Expand Down Expand Up @@ -53,6 +54,14 @@ describe('serializeDoc()', () => {
expect(result.title).toEqual('Stufff');
});

it('trims heading text to for the section title', () => {
const node = factory.sec([
factory.heading(repeatString('1', 555), 1),
]);
const result = serializeDoc(node);
expect(result.title).toEqual(repeatString('1', 128));
});

it('converts footnotes', () => {
const node = schema.nodes.para.create({}, [
schema.nodes.paraText.create({}, [
Expand Down
2 changes: 1 addition & 1 deletion api/document/js/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ const schema = new Schema({
toDOM: () => ['div', { class: 'node-paragraph' }, 0],
},
sec: {
content: 'heading block+',
content: 'heading block*',
group: 'block',
toDOM: () => ['section', { class: 'node-section' }, 0],
},
Expand Down
4 changes: 2 additions & 2 deletions api/document/js/serialize-doc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ const NODE_CONVERTERS: NodeConverterMap = {
},
sec(node) {
const headerNode = node.content.child(0);
const title = headerNode.textContent;
const title = headerNode.textContent || '';
const sec = defaultNodeConverter(node);
sec.title = title;
sec.title = title.substr(0, 128); // trim

return sec;
},
Expand Down
73 changes: 73 additions & 0 deletions api/document/tests/tree_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,76 @@ def test_jump_to():
assert root.jump_to('root_1').identifier == 'root_1'
assert para_a.jump_to('root_1').identifier == 'root_1'
assert para_a.jump_to('root_1__sec_1').identifier == 'root_1__sec_1'


def test_add_child_insert_pos():
root = tree.DocCursor.new_tree('root')
root.add_child('sec', '1')
root.add_child('sec', '2')
root.add_child('sec', '3')

assert [n.identifier for n in root.walk()] == [
'root_1', 'root_1__sec_1', 'root_1__sec_2', 'root_1__sec_3']

root.add_child('sec', '4', insert_pos=1)
assert [n.identifier for n in root.walk()] == [
'root_1', 'root_1__sec_1', 'root_1__sec_4', 'root_1__sec_2',
'root_1__sec_3',
]


def test_siblings():
root = tree.DocCursor.new_tree('root')
s1 = root.add_child('sec', '1')
s2 = root.add_child('sec', '2')
s3 = root.add_child('sec', '3')
s4 = root.add_child('sec', '4')
s1.add_child('para')
s3.add_child('para')

assert [s.identifier for s in s3.left_siblings()] == [
s1.identifier, s2.identifier]
assert [s.identifier for s in s3.right_siblings()] == [s4.identifier]
assert s1.left_sibling() is None
assert s1.right_sibling().identifier == s2.identifier
assert s2.left_sibling().identifier == s1.identifier


def test_append_to():
root = tree.DocCursor.new_tree('root')
s1 = root.add_child('sec')
s1.add_child('para')
p12 = s1.add_child('para')
p12.add_child('math')
s2 = root.add_child('sec')
s2.add_child('para')
list_el = s2.add_child('list')
list_el.add_child('listitem')
list_el.add_child('listitem')

assert [n.identifier for n in root.walk()] == [
'root_1',
'root_1__sec_1',
'root_1__sec_1__para_1',
'root_1__sec_1__para_2',
'root_1__sec_1__para_2__math_1',
'root_1__sec_2',
'root_1__sec_2__para_1',
'root_1__sec_2__list_1',
'root_1__sec_2__list_1__listitem_1',
'root_1__sec_2__list_1__listitem_2',
]

list_el.append_to(p12)
assert [n.identifier for n in root.walk()] == [
'root_1',
'root_1__sec_1',
'root_1__sec_1__para_1',
'root_1__sec_1__para_2',
'root_1__sec_1__para_2__math_1',
'root_1__sec_1__para_2__list_1',
'root_1__sec_1__para_2__list_1__listitem_1',
'root_1__sec_1__para_2__list_1__listitem_2',
'root_1__sec_2',
'root_1__sec_2__para_1',
]
76 changes: 73 additions & 3 deletions api/document/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from networkx import DiGraph
from networkx.algorithms.dag import descendants
from networkx.relabel import relabel_nodes

from document.models import DocNode

Expand Down Expand Up @@ -126,7 +127,7 @@ def next_emblem(self, node_type: str) -> str:
return str(child_type_counts[node_type] + 1)

def add_child(self: T, node_type: str, type_emblem: Optional[str] = None,
**attrs) -> T:
insert_pos: Optional[int] = None, **attrs) -> T:
if type_emblem is None:
type_emblem = self.next_emblem(node_type)
if 'policy' not in attrs:
Expand All @@ -138,8 +139,15 @@ def add_child(self: T, node_type: str, type_emblem: Optional[str] = None,
type_emblem=type_emblem, depth=self.depth + 1,
**attrs
))
self.tree.add_edge(self.identifier, identifier,
sort_order=self.next_sort_order())
if insert_pos is None:
insert_pos = self.next_sort_order()
else:
# shift children if necessary
edges = self.tree.out_edges(self.identifier, data='sort_order')
for _, child_idx, sort_order in edges:
if sort_order >= insert_pos:
self.tree[self.identifier][child_idx]['sort_order'] += 1
self.tree.add_edge(self.identifier, identifier, sort_order=insert_pos)
return self.__class__(self.tree, identifier=identifier)

def subtree_size(self) -> int:
Expand Down Expand Up @@ -219,6 +227,18 @@ def parent(self: T) -> Optional[T]:
return type(self)(self.tree, predecessors[0])
return None

def left_sibling(self: T) -> Optional[T]:
siblings = list(self.left_siblings())
if siblings:
return siblings[-1]
return None

def right_sibling(self: T) -> Optional[T]:
siblings = list(self.right_siblings())
if siblings:
return siblings[0]
return None

def ancestors(self: T, filter_fn: Callable[[T], bool]=None) \
-> Iterator[T]:
parent = self.parent()
Expand All @@ -227,6 +247,26 @@ def ancestors(self: T, filter_fn: Callable[[T], bool]=None) \
if parent:
yield from parent.ancestors(filter_fn)

def left_siblings(self: T) -> Iterator[T]:
"""What's before this node, sharing the same parent."""
parent = self.parent()
if parent:
for sibling in parent.children():
if sibling.identifier == self.identifier:
break
yield sibling

def right_siblings(self: T) -> Iterator[T]:
"""What's after this node, sharing the same parent."""
parent = self.parent()
seen_self = False
if parent:
for sibling in parent.children():
if seen_self:
yield sibling
if sibling.identifier == self.identifier:
seen_self = True

def add_models(self: T, models: Iterator[DocNode]) -> T:
"""Convert a (linear) list of DocNodes into a tree-aware version.
We assume that the models are already sorted."""
Expand Down Expand Up @@ -257,6 +297,36 @@ def filter(self: T, filter_fn: Callable[[DocNode], bool]) -> Iterator[T]:
def jump_to(self: T, identifier: str) -> T:
return type(self)(self.tree, identifier)

def append_to(self: T, new_parent: T) -> T:
"""Move this node (and its subtree) to be the last child of the
new_parent. This requires modifying all of the subtree's
identifiers."""
prev_parent = self.parent()
if not prev_parent:
raise AssertionError("Can't move a node with no parent")
# remove from previous position
self.tree.remove_edge(prev_parent.identifier, self.identifier)
# add to new position
old_prefix = self.identifier
self.model.type_emblem = new_parent.next_emblem(self.node_type)
new_prefix = (
new_parent.identifier
+ '__' + self.node_type
+ '_' + self.type_emblem
)
self.tree.add_edge(new_parent.identifier, self.identifier,
sort_order=new_parent.next_sort_order())

# relabel
label_mapping = {}
for node in self.walk():
old_label = node.identifier
node.model.identifier = new_prefix + old_label[len(old_prefix):]
label_mapping[old_label] = node.model.identifier
relabel_nodes(self.tree, label_mapping, copy=False)

return self.jump_to(label_mapping[self.identifier])


class XMLAwareCursor(DocCursor):
"""Extension of DocCursor which also tracks the XML which created each
Expand Down
71 changes: 71 additions & 0 deletions api/ombpdf/management/commands/migrate_documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import logging

from django.core.management.base import BaseCommand
from django.db import transaction
from tqdm import tqdm

from document.models import DocNode
from document.tree import DocCursor

logger = logging.getLogger(__name__)


def ensure_listitem_in_list(doc: DocCursor) -> DocCursor:
"""We expect all listitems to have a parent list. However, the parser has
emitted lists followed by bare listitems. We'll place the listitem in the
list that imemdiately precedes it."""
for li in doc.filter(lambda n: n.node_type == 'listitem'):
parent = li.parent()
prev_sibling = li.left_sibling()

if not parent:
logger.warning('Root of %s is an li.',
doc.policy.title_with_number)
continue
if parent.node_type == 'list': # all is well
continue

if prev_sibling and prev_sibling.node_type == 'list':
li.append_to(prev_sibling)
return ensure_listitem_in_list(doc)
# else: create new list to wrap this one
logger.warning('Could not fix li in %s', doc.policy.title_with_number)

return doc # no changes needed


def ensure_section_has_heading(doc: DocCursor) -> DocCursor:
"""We expect all sections to have a heading. Fill the missing data with
placeholder text."""
# materialize so we don't need to worry about a modified iterator
secs = list(doc.filter(lambda n: n.node_type == 'sec'))
for sec in secs:
children = list(sec.children())
if not children or children[0].node_type != 'heading':
sec.add_child('heading', insert_pos=0, text='--Missing Heading--',
policy_id=doc.policy_id)
return doc


transforms = [
ensure_listitem_in_list,
ensure_section_has_heading,
]


class Command(BaseCommand):
help = ( # noqa (overriding a builtin)
"Run through (idempotent) document migrations to mass-fixup docs.")

def handle(self, *args, **kwargs):
roots = DocNode.objects.filter(depth=0)
with tqdm(total=roots.count()) as pbar:
for root_docnode in roots:
with transaction.atomic():
doc = DocCursor.load_from_model(root_docnode)
for transform in transforms:
doc = transform(doc)
doc.nested_set_renumber(bulk_create=False)
for node in doc.walk():
node.save()
pbar.update(1)
41 changes: 41 additions & 0 deletions api/ombpdf/tests/test_management_commands_migrate_documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from document.tree import DocCursor
from ombpdf.management.commands import migrate_documents


def test_ensure_listitem_in_list():
root = DocCursor.new_tree('root')
list_el = root.add_child('list')
list_el.add_child('listitem', '1')
root.add_child('listitem', '2')
root.add_child('listitem', '3')
assert [n.identifier for n in root.walk()] == [
'root_1',
'root_1__list_1',
'root_1__list_1__listitem_1',
'root_1__listitem_2',
'root_1__listitem_3',
]

root = migrate_documents.ensure_listitem_in_list(root)
assert [n.identifier for n in root.walk()] == [
'root_1',
'root_1__list_1',
'root_1__list_1__listitem_1',
'root_1__list_1__listitem_2',
'root_1__list_1__listitem_3',
]


def test_ensure_section_has_heading():
root = DocCursor.new_tree('root')
sec1 = root.add_child('sec')
sec1.add_child('sec')
sec12 = sec1.add_child('sec')
sec12.add_child('heading', text='Subheading')
root.add_child('sec')

root = migrate_documents.ensure_section_has_heading(root)
assert root['sec_1']['heading_1'].text == '--Missing Heading--'
assert root['sec_1']['sec_1']['heading_1'].text == '--Missing Heading--'
assert root['sec_1']['sec_2']['heading_1'].text == 'Subheading'
assert root['sec_2']['heading_1'].text == '--Missing Heading--'
21 changes: 21 additions & 0 deletions api/reqs/migrations/0016_auto_20180302_0007.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.5 on 2018-03-02 00:07
from __future__ import unicode_literals

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('reqs', '0015_remove_policy_public'),
]

operations = [
migrations.RunSQL("""
UPDATE reqs_policy
SET workflow_phase='cleanup'
WHERE workflow_phase='no_doc'
AND id IN (SELECT policy_id from document_docnode)
""")
]

0 comments on commit 7fec670

Please sign in to comment.