Skip to content

Commit

Permalink
diff: about unified diff encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
fcharlie committed Dec 13, 2024
1 parent 2010b92 commit 38381f9
Show file tree
Hide file tree
Showing 12 changed files with 506 additions and 180 deletions.
85 changes: 48 additions & 37 deletions modules/diferenco/diferenco_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package diferenco

import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"testing"

"github.com/antgroup/hugescm/modules/diferenco/color"
)

func TestDiff(t *testing.T) {
Expand All @@ -23,45 +26,53 @@ func TestDiff(t *testing.T) {
return
}
textB := string(bytesB)
sink := &Sink{
Index: make(map[string]int),
u, err := DoUnified(context.Background(), &Options{
From: &File{
Path: "a.txt",
},
To: nil,
A: textA,
B: textB,
})
if err != nil {
return
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
changes := OnpDiff(a, b)
i := 0
for _, c := range changes {
for ; i < c.P1; i++ {
fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]])
}
for j := c.P1; j < c.P1+c.Del; j++ {
fmt.Fprintf(os.Stderr, "\x1b[31m- %s\x1b[0m", sink.Lines[a[j]])
}
for j := c.P2; j < c.P2+c.Ins; j++ {
fmt.Fprintf(os.Stderr, "\x1b[32m+ %s\x1b[0m", sink.Lines[b[j]])
}
i += c.Del
fmt.Fprintf(os.Stderr, "%s\n", u)
}

func TestDiff2(t *testing.T) {
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt"))
if err != nil {
fmt.Fprintf(os.Stderr, "read a error: %v\n", err)
return
}
for ; i < len(a); i++ {
fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]])
textA := string(bytesA)
bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt"))
if err != nil {
fmt.Fprintf(os.Stderr, "read b error: %v\n", err)
return
}
fmt.Fprintf(os.Stderr, "\n\npatience\n\n")

diffs := PatienceDiff(a, b)
for _, d := range diffs {
switch d.T {
case Delete:
for _, i := range d.E {
fmt.Fprintf(os.Stderr, "\x1b[31m-%s\x1b[0m", sink.Lines[i])
}
case Insert:
for _, i := range d.E {
fmt.Fprintf(os.Stderr, "\x1b[32m+%s\x1b[0m", sink.Lines[i])
}
default:
for _, i := range d.E {
fmt.Fprintf(os.Stderr, " %s", sink.Lines[i])
}
}
textB := string(bytesB)
u, err := DoUnified(context.Background(), &Options{
From: &File{
Path: "a.txt",
Hash: "4789568",
Mode: 0o10644,
},
To: &File{
Path: "b.txt",
Hash: "6547898",
Mode: 0o10644,
},
A: textA,
B: textB,
})
if err != nil {
return
}
e := NewUnifiedEncoder(os.Stderr)
e.SetColor(color.NewColorConfig())
_ = e.Encode([]*Unified{u})
}
35 changes: 25 additions & 10 deletions modules/diferenco/histogram.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Refer to https://github.com/pascalkuthe/imara-diff reimplemented in Golang.
package diferenco

import "context"

// https://stackoverflow.com/questions/32365271/whats-the-difference-between-git-diff-patience-and-git-diff-histogram/32367597#32367597
// https://arxiv.org/abs/1902.02467

Expand Down Expand Up @@ -141,29 +143,40 @@ type changesOut struct {
changes []Change
}

func (h *histogram[E]) run(beforce []E, beforePos int, after []E, afterPos int, o *changesOut) {
func (h *histogram[E]) run(ctx context.Context, beforce []E, beforePos int, after []E, afterPos int, o *changesOut) error {
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
if len(beforce) == 0 {
if len(after) != 0 {
o.changes = append(o.changes, Change{P1: beforePos, P2: afterPos, Ins: len(after)})
}
return
return nil
}
if len(after) == 0 {
o.changes = append(o.changes, Change{P1: beforePos, P2: afterPos, Del: len(beforce)})
return
return nil
}
h.populate(beforce)
lcs := findLcs(beforce, after, h)
if lcs == nil {
o.changes = append(o.changes, onpDiff(beforce, beforePos, after, afterPos)...)
return
changes, err := onpDiff(ctx, beforce, beforePos, after, afterPos)
if err != nil {
return err
}
o.changes = append(o.changes, changes...)
return nil
}
if lcs.length == 0 {
o.changes = append(o.changes, Change{P1: beforePos, P2: afterPos, Del: len(beforce), Ins: len(after)})
return
return nil
}
if err := h.run(ctx, beforce[:lcs.beforeStart], beforePos, after[:lcs.afterStart], afterPos, o); err != nil {
return err
}
h.run(beforce[:lcs.beforeStart], beforePos, after[:lcs.afterStart], afterPos, o)
e1 := lcs.beforeStart + lcs.length
beforce = beforce[e1:]
beforePos += e1
Expand All @@ -173,7 +186,7 @@ func (h *histogram[E]) run(beforce []E, beforePos int, after []E, afterPos int,
}
}

func HistogramDiff[E comparable](L1, L2 []E) []Change {
func HistogramDiff[E comparable](ctx context.Context, L1, L2 []E) ([]Change, error) {
prefix := commonPrefixLength(L1, L2)
L1 = L1[prefix:]
L2 = L2[prefix:]
Expand All @@ -184,6 +197,8 @@ func HistogramDiff[E comparable](L1, L2 []E) []Change {
tokenOccurances: make(map[E][]int, len(L1)),
}
o := &changesOut{changes: make([]Change, 0, 100)}
h.run(L1, prefix, L2, prefix, o)
return o.changes
if err := h.run(ctx, L1, prefix, L2, prefix, o); err != nil {
return nil, err
}
return o.changes, nil
}
3 changes: 2 additions & 1 deletion modules/diferenco/histogram_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package diferenco

import (
"context"
"fmt"
"os"
"path/filepath"
Expand Down Expand Up @@ -28,7 +29,7 @@ func TestHistogram(t *testing.T) {
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
changes := HistogramDiff(a, b)
changes, _ := HistogramDiff(context.Background(), a, b)
i := 0
for _, c := range changes {
for ; i < c.P1; i++ {
Expand Down
20 changes: 14 additions & 6 deletions modules/diferenco/myers.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,22 @@

package diferenco

import "slices"
import (
"context"
"slices"
)

func MyersDiff[E comparable](seq1, seq2 []E) []Change {
func MyersDiff[E comparable](ctx context.Context, seq1, seq2 []E) ([]Change, error) {
// These are common special cases.
// The early return improves performance dramatically.
if len(seq1) == 0 && len(seq2) == 0 {
return []Change{}
return []Change{}, nil
}
if len(seq1) == 0 {
return []Change{{Ins: len(seq2)}}
return []Change{{Ins: len(seq2)}}, nil
}
if len(seq2) == 0 {
return []Change{{Del: len(seq1)}}
return []Change{{Del: len(seq1)}}, nil
}
seqX := seq1
seqY := seq2
Expand Down Expand Up @@ -48,6 +51,11 @@ func MyersDiff[E comparable](seq1, seq2 []E) []Change {
k := 0
outer:
for {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
d++
// The paper has `for (k = -d; k <= d; k += 2)`, but we can ignore diagonals that cannot influence the result.
lowerBound := -min(d, len(seqY)+(d%2))
Expand Down Expand Up @@ -110,7 +118,7 @@ outer:
path = path.pre
}
slices.Reverse(changes)
return changes
return changes, nil
}

type SnakePath struct {
Expand Down
11 changes: 6 additions & 5 deletions modules/diferenco/myers_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package diferenco

import (
"context"
"fmt"
"os"
"path/filepath"
Expand Down Expand Up @@ -28,7 +29,7 @@ func TestMyersDiff(t *testing.T) {
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
changes := MyersDiff(a, b)
changes, _ := MyersDiff(context.Background(), a, b)
i := 0
for _, c := range changes {
for ; i < c.P1; i++ {
Expand Down Expand Up @@ -68,8 +69,8 @@ func TestMyersDiff2(t *testing.T) {
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
changes := MyersDiff(a, b)
u := sink.ToUnified(File{Path: "a.txt"}, File{Path: "b.txt"}, changes, a, b, DefaultContextLines)
changes, _ := MyersDiff(context.Background(), a, b)
u := sink.ToUnified(&File{Path: "a.txt"}, &File{Path: "b.txt"}, changes, a, b, DefaultContextLines)
fmt.Fprintf(os.Stderr, "diff:\n%s\n", u.String())
}

Expand All @@ -89,7 +90,7 @@ func TestMyersDiff3(t *testing.T) {
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
changes := MyersDiff(a, b)
u := sink.ToUnified(File{Path: "a.txt"}, File{Path: "b.txt"}, changes, a, b, DefaultContextLines)
changes, _ := MyersDiff(context.Background(), a, b)
u := sink.ToUnified(&File{Path: "a.txt"}, &File{Path: "b.txt"}, changes, a, b, DefaultContextLines)
fmt.Fprintf(os.Stderr, "diff:\n%s\n", u.String())
}
19 changes: 13 additions & 6 deletions modules/diferenco/onp.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
// "An O(NP) Sequence Comparison Algorithm" August 1989.
package diferenco

func onpDiff[E comparable](L1 []E, P1 int, L2 []E, P2 int) []Change {
import "context"

func onpDiff[E comparable](ctx context.Context, L1 []E, P1 int, L2 []E, P2 int) ([]Change, error) {
m, n := len(L1), len(L2)
c := &onpCtx[E]{L1: L1, L2: L2, P1: P1, P2: P2}
if n >= m {
Expand All @@ -21,7 +23,7 @@ func onpDiff[E comparable](L1 []E, P1 int, L2 []E, P2 int) []Change {
c.xchg = true
}
c.Δ = c.N - c.M
return c.compare()
return c.compare(ctx)
}

type onpCtx[E comparable] struct {
Expand All @@ -33,7 +35,12 @@ type onpCtx[E comparable] struct {
xchg bool
}

func (c *onpCtx[E]) compare() []Change {
func (c *onpCtx[E]) compare(ctx context.Context) ([]Change, error) {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
c.fp = make([]point, (c.M+1)+(c.N+1)+1)
for i := range c.fp {
c.fp[i].y = -1
Expand Down Expand Up @@ -71,7 +78,7 @@ func (c *onpCtx[E]) compare() []Change {
changes = append(changes, Change{y + c.P1, x + c.P2, c.N - y, c.M - x})
}
}
return changes
return changes, nil
}

func (c *onpCtx[E]) snake(k int) {
Expand Down Expand Up @@ -140,13 +147,13 @@ type onpLcs struct {

// OnpDiff returns the differences between data.
// It makes O(NP) (the worst case) calls to data.Equal.
func OnpDiff[E comparable](L1, L2 []E) []Change {
func OnpDiff[E comparable](ctx context.Context, L1, L2 []E) ([]Change, error) {
//return myersDiff(L1, 0, L2, 0)
prefix := commonPrefixLength(L1, L2)
L1 = L1[prefix:]
L2 = L2[prefix:]
suffix := commonSuffixLength(L1, L2)
L1 = L1[:len(L1)-suffix]
L2 = L2[:len(L2)-suffix]
return onpDiff(L1, prefix, L2, prefix)
return onpDiff(ctx, L1, prefix, L2, prefix)
}
3 changes: 2 additions & 1 deletion modules/diferenco/onp_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package diferenco

import (
"context"
"fmt"
"os"
"path/filepath"
Expand Down Expand Up @@ -28,7 +29,7 @@ func TestONP(t *testing.T) {
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
changes := OnpDiff(a, b)
changes, _ := OnpDiff(context.Background(), a, b)
i := 0
for _, c := range changes {
for ; i < c.P1; i++ {
Expand Down
Loading

0 comments on commit 38381f9

Please sign in to comment.