Skip to content

Commit 6d3bb0e

Browse files
committed
add path tracking
1 parent e834591 commit 6d3bb0e

File tree

8 files changed

+217
-83
lines changed

8 files changed

+217
-83
lines changed

.gitignore

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1-
*.pyc
1+
*.egg-info/
2+
__pycache__
23
.vscode
3-
dist
4+
.coverage
5+
dist

patchdiff/__init__.py

+1-76
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,3 @@
1-
from typing import Dict, List
2-
1+
from .diff import diff
32

43
__version__ = "0.2.0"
5-
6-
7-
def diff_lists(input: List, output: List):
8-
memory = {(0, 0): {"ops": [], "cost": 0}}
9-
10-
def dist(i, j):
11-
if (i, j) not in memory:
12-
if i > 0 and j > 0 and not diff(input[i - 1], output[j - 1]):
13-
step = dist(i - 1, j - 1)
14-
else:
15-
paths = []
16-
if i > 0:
17-
base = dist(i - 1, j)
18-
op = {"op": "remove", "idx": i - 1}
19-
paths.append(
20-
{
21-
"ops": base["ops"] + [op],
22-
"cost": base["cost"] + 1,
23-
}
24-
)
25-
if j > 0:
26-
base = dist(i, j - 1)
27-
op = {"op": "add", "idx": j - 1, "value": output[j - 1]}
28-
paths.append(
29-
{
30-
"ops": base["ops"] + [op],
31-
"cost": base["cost"] + 1,
32-
}
33-
)
34-
if i > 0 and j > 0:
35-
base = dist(i - 1, j - 1)
36-
op = {
37-
"op": "replace",
38-
"idx": i - 1,
39-
"original": input[i - 1],
40-
"value": output[j - 1],
41-
}
42-
paths.append(
43-
{
44-
"ops": base["ops"] + [op],
45-
"cost": base["cost"] + 1,
46-
}
47-
)
48-
step = min(paths, key=lambda a: a["cost"])
49-
memory[(i, j)] = step
50-
return memory[(i, j)]
51-
52-
return dist(len(input), len(output))["ops"]
53-
54-
55-
def diff_dicts(input: Dict, output: Dict):
56-
ops = []
57-
input_keys = set(input.keys())
58-
output_keys = set(output.keys())
59-
for key in input_keys - output_keys:
60-
ops.append({"op": "remove", "key": key})
61-
for key in output_keys - input_keys:
62-
ops.append({"op": "add", "key": key, "value": output[key]})
63-
for key in input_keys & output_keys:
64-
ops.extend(diff(input[key], output[key]))
65-
return ops
66-
67-
68-
def diff(input, output):
69-
# TODO: track paths
70-
# TODO: properly check equality
71-
if input == output:
72-
return []
73-
if isinstance(input, list) and isinstance(output, list):
74-
return diff_lists(input, output)
75-
if isinstance(input, dict) and isinstance(output, dict):
76-
return diff_dicts(input, output)
77-
# TODO: sets, tuples
78-
return [{"op": "replace", "value": output}]

patchdiff/diff.py

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
from functools import reduce
2+
from typing import Dict, List, Set
3+
4+
from .pointer import Pointer
5+
from .types import Diffable
6+
7+
8+
def diff_lists(input: List, output: List, ptr: Pointer) -> List:
9+
memory = {(0, 0): {"ops": [], "cost": 0}}
10+
11+
def dist(i, j):
12+
if (i, j) not in memory:
13+
if i > 0 and j > 0 and input[i - 1] == output[j - 1]:
14+
step = dist(i - 1, j - 1)
15+
else:
16+
paths = []
17+
if i > 0:
18+
base = dist(i - 1, j)
19+
op = {"op": "remove", "idx": i - 1}
20+
paths.append(
21+
{
22+
"ops": base["ops"] + [op],
23+
"cost": base["cost"] + 1,
24+
}
25+
)
26+
if j > 0:
27+
base = dist(i, j - 1)
28+
op = {"op": "add", "idx": j - 1, "value": output[j - 1]}
29+
paths.append(
30+
{
31+
"ops": base["ops"] + [op],
32+
"cost": base["cost"] + 1,
33+
}
34+
)
35+
if i > 0 and j > 0:
36+
base = dist(i - 1, j - 1)
37+
op = {
38+
"op": "replace",
39+
"idx": i - 1,
40+
"original": input[i - 1],
41+
"value": output[j - 1],
42+
}
43+
paths.append(
44+
{
45+
"ops": base["ops"] + [op],
46+
"cost": base["cost"] + 1,
47+
}
48+
)
49+
step = min(paths, key=lambda a: a["cost"])
50+
memory[(i, j)] = step
51+
return memory[(i, j)]
52+
53+
ops = dist(len(input), len(output))["ops"]
54+
55+
def pad(state, op):
56+
ops, padding = state
57+
if op["op"] == "add":
58+
padded_idx = op["idx"] + 1 + padding
59+
idx_token = str(padded_idx) if padded_idx < len(input) + padding else "-"
60+
full_op = {
61+
"op": "add",
62+
"path": str(ptr.append(idx_token)),
63+
"value": op["value"],
64+
}
65+
return [ops + [full_op], padding + 1]
66+
elif op["op"] == "remove":
67+
full_op = {
68+
"op": "remove",
69+
"path": str(ptr.append(str(op["idx"] + padding))),
70+
}
71+
return [ops + [full_op], padding - 1]
72+
else:
73+
replace_ptr = ptr.append(str(op["idx"] + padding))
74+
replace_ops = diff(op["original"], op["value"], replace_ptr)
75+
return [ops + replace_ops, padding]
76+
77+
padded_ops, _ = reduce(pad, ops, [[], 0])
78+
79+
return padded_ops
80+
81+
82+
def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> List:
83+
ops = []
84+
input_keys = set(input.keys())
85+
output_keys = set(output.keys())
86+
for key in input_keys - output_keys:
87+
ops.append({"op": "remove", "path": str(ptr.append(key)), "key": key})
88+
for key in output_keys - input_keys:
89+
ops.append(
90+
{
91+
"op": "add",
92+
"path": str(ptr.append(key)),
93+
"key": key,
94+
"value": output[key],
95+
}
96+
)
97+
for key in input_keys & output_keys:
98+
ops.extend(diff(input[key], output[key], ptr.append(key)))
99+
return ops
100+
101+
102+
def diff_sets(input: Set, output: Set, ptr: Pointer) -> List:
103+
# TODO: pointers?
104+
ops = []
105+
for value in input - output:
106+
ops.append({"op": "remove", "value": value})
107+
for value in output - input:
108+
ops.append({"op": "add", "value": value})
109+
return ops
110+
111+
112+
def diff(input: Diffable, output: Diffable, ptr: Pointer = None) -> List:
113+
if input == output:
114+
return []
115+
if ptr is None:
116+
ptr = Pointer()
117+
if isinstance(input, list) and isinstance(output, list):
118+
return diff_lists(input, output, ptr)
119+
if isinstance(input, dict) and isinstance(output, dict):
120+
return diff_dicts(input, output, ptr)
121+
if isinstance(input, set) and isinstance(output, set):
122+
return diff_sets(input, output, ptr)
123+
return [{"op": "replace", "path": str(ptr), "value": output}]

patchdiff/pointer.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import re
2+
from typing import List
3+
4+
from .types import Diffable
5+
6+
7+
tilde0_re = re.compile("~0")
8+
tilde1_re = re.compile("~1")
9+
tilde_re = re.compile("~")
10+
slash_re = re.compile("/")
11+
12+
13+
def unescape(token: str) -> str:
14+
return tilde0_re.sub("~", tilde1_re.sub("/", token))
15+
16+
17+
def escape(token: str) -> str:
18+
return slash_re.sub("~1", tilde_re.sub("~0", token))
19+
20+
21+
class Pointer:
22+
def __init__(self, tokens: List[str] = None) -> None:
23+
if tokens is None:
24+
tokens = [""]
25+
self.tokens = tokens
26+
27+
@staticmethod
28+
def from_json(path: str) -> "Pointer":
29+
tokens = [unescape(t) for t in path.split("/")]
30+
return Pointer(tokens)
31+
32+
def __str__(self) -> str:
33+
return "/".join(escape(t) for t in self.tokens)
34+
35+
def evaluate(self, obj: Diffable):
36+
key = ""
37+
parent = None
38+
value = obj
39+
for key in self.tokens[1:]:
40+
parent = value
41+
if isinstance(parent, list):
42+
key = int(key)
43+
value = parent[key]
44+
return parent, key, value
45+
46+
def get(self, obj: Diffable):
47+
_, _, value = self.evaluate(obj)
48+
return value
49+
50+
def set(self, obj: Diffable, value):
51+
cursor = obj
52+
for key in self.tokens[1:-1]:
53+
cursor = cursor[key]
54+
cursor[self.tokens[-1]] = value
55+
56+
def iappend(self, token):
57+
"""append, in-place"""
58+
self.tokens.append(token)
59+
60+
def append(self, token):
61+
"""append, creating new Pointer"""
62+
return Pointer(self.tokens + [token])

patchdiff/types.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from typing import Dict, List, Set, Tuple, Union
2+
3+
Diffable = Union[Dict, List, Set, Tuple]

setup.cfg

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ max-line-length = 88
55
extend-ignore =
66
# See https://github.com/PyCQA/pycodestyle/issues/373
77
E203,
8-
application-import-names = observ
8+
application-import-names = patchdiff
99
import-order-style = google
1010
per-file-ignores =
11-
observ/__init__.py:F401,F403
11+
patchdiff/__init__.py:F401,F403
1212
exclude = .venv

tests/test_all.py renamed to tests/test_diff.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ def test_list():
77
ops = diff(a, b)
88

99
assert len(ops) == 3
10-
assert (ops[0]["op"], ops[0]["idx"]) == ("replace", 0)
11-
assert (ops[1]["op"], ops[1]["idx"]) == ("replace", 3)
12-
assert (ops[2]["op"], ops[2]["idx"]) == ("remove", 4)
10+
assert (ops[0]["op"], ops[0]["path"]) == ("replace", "/0")
11+
assert (ops[1]["op"], ops[1]["path"]) == ("replace", "/3")
12+
assert (ops[2]["op"], ops[2]["path"]) == ("remove", "/4")
1313

1414

1515
def test_dicts():

tests/test_pointer.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from patchdiff.pointer import Pointer
2+
3+
4+
def test_pointer_get():
5+
obj = [1, 5, {"foo": 1, "bar": [1, 2, 3]}, "sdfsdf", "fff"]
6+
assert Pointer(["", "1"]).get(obj) == 5
7+
assert Pointer(["", "2", "bar", "1"]).get(obj) == 2
8+
9+
10+
def test_pointer_str():
11+
assert str(Pointer(["", "1"])) == "/1"
12+
assert str(Pointer(["", "foo", "bar", "-"])) == "/foo/bar/-"
13+
14+
15+
def test_pointer_iappend():
16+
ptr = Pointer()
17+
ptr.iappend("3")
18+
ptr.iappend("foo")
19+
assert str(ptr) == "/3/foo"

0 commit comments

Comments
 (0)