diff --git a/modules/udiff/LICENSE-BSD b/modules/udiff/LICENSE-BSD deleted file mode 100644 index 6a66aea..0000000 --- a/modules/udiff/LICENSE-BSD +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/modules/udiff/LICENSE-MIT b/modules/udiff/LICENSE-MIT deleted file mode 100644 index d9ee6a5..0000000 --- a/modules/udiff/LICENSE-MIT +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2023 Ayman Bagabas - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/modules/udiff/README.md b/modules/udiff/README.md deleted file mode 100644 index 63e7cc9..0000000 --- a/modules/udiff/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# µDiff - -Port from: [https://github.com/aymanbagabas/go-udiff](https://github.com/aymanbagabas/go-udiff) \ No newline at end of file diff --git a/modules/udiff/VERSION b/modules/udiff/VERSION deleted file mode 100644 index 7820aef..0000000 --- a/modules/udiff/VERSION +++ /dev/null @@ -1,2 +0,0 @@ -https://github.com/aymanbagabas/go-udiff -VERSION: ce75f43a7e2be9ebea2c8ec88de3a0b655d85076 \ No newline at end of file diff --git a/modules/udiff/_patches/0001-fix-remove-testenv-package.patch b/modules/udiff/_patches/0001-fix-remove-testenv-package.patch deleted file mode 100644 index 9506b3c..0000000 --- a/modules/udiff/_patches/0001-fix-remove-testenv-package.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 225b4fb5cb35cf333d28ce64b9bb9f9bc492cce7 Mon Sep 17 00:00:00 2001 -From: Ayman Bagabas -Date: Wed, 13 Dec 2023 12:38:49 -0500 -Subject: [PATCH 1/2] fix: remove testenv package - -Signed-off-by: Ayman Bagabas ---- - diff_test.go | 3 --- - difftest/difftest_test.go | 2 -- - 2 files changed, 5 deletions(-) - -diff --git a/diff_test.go b/diff_test.go -index a5e18dbc06eb..dc0063db13ef 100644 ---- a/diff_test.go -+++ b/diff_test.go -@@ -17,7 +17,6 @@ import ( - - diff "github.com/aymanbagabas/go-udiff" - "github.com/aymanbagabas/go-udiff/difftest" -- "golang.org/x/tools/internal/testenv" - ) - - func TestApply(t *testing.T) { -@@ -120,7 +119,6 @@ func TestLineEdits(t *testing.T) { - } - - func TestToUnified(t *testing.T) { -- testenv.NeedsTool(t, "patch") - for _, tc := range difftest.TestCases { - t.Run(tc.Name, func(t *testing.T) { - unified, err := diff.ToUnified(difftest.FileA, difftest.FileB, tc.In, tc.Edits, diff.DefaultContextLines) -@@ -156,7 +154,6 @@ func TestToUnified(t *testing.T) { - t.Errorf("applying unified failed: got\n%q, wanted\n%q unified\n%q", - got, tc.Out, unified) - } -- - }) - } - } -diff --git a/difftest/difftest_test.go b/difftest/difftest_test.go -index 02cfca0ccc3b..4ccfe7c43bba 100644 ---- a/difftest/difftest_test.go -+++ b/difftest/difftest_test.go -@@ -15,11 +15,9 @@ import ( - "testing" - - "github.com/aymanbagabas/go-udiff/difftest" -- "golang.org/x/tools/internal/testenv" - ) - - func TestVerifyUnified(t *testing.T) { -- testenv.NeedsTool(t, "diff") - for _, test := range difftest.TestCases { - t.Run(test.Name, func(t *testing.T) { - if test.NoDiff { --- -2.43.0 - diff --git a/modules/udiff/_patches/0002-fix-re-export-symbols.patch b/modules/udiff/_patches/0002-fix-re-export-symbols.patch deleted file mode 100644 index 464c211..0000000 --- a/modules/udiff/_patches/0002-fix-re-export-symbols.patch +++ /dev/null @@ -1,250 +0,0 @@ -From b347a6a816ada3af6e374a480d13d469979d8c34 Mon Sep 17 00:00:00 2001 -From: Ayman Bagabas -Date: Wed, 13 Dec 2023 12:42:58 -0500 -Subject: [PATCH 2/2] fix: re-export symbols - -Signed-off-by: Ayman Bagabas ---- - unified.go | 122 ++++++++++++++++++++++++++--------------------------- - 1 file changed, 61 insertions(+), 61 deletions(-) - -diff --git a/unified.go b/unified.go -index 85523a1b435a..268ad228825b 100644 ---- a/unified.go -+++ b/unified.go -@@ -41,57 +41,57 @@ func ToUnified(oldLabel, newLabel, content string, edits []Edit, contextLines in - - // unified represents a set of edits as a unified diff. - type unified struct { -- // from is the name of the original file. -- from string -- // to is the name of the modified file. -- to string -- // hunks is the set of edit hunks needed to transform the file content. -- hunks []*hunk -+ // From is the name of the original file. -+ From string -+ // To is the name of the modified file. -+ To string -+ // Hunks is the set of edit Hunks needed to transform the file content. -+ Hunks []*hunk - } - - // Hunk represents a contiguous set of line edits to apply. - type hunk struct { - // The line in the original source where the hunk starts. -- fromLine int -+ FromLine int - // The line in the original source where the hunk finishes. -- toLine int -+ ToLine int - // The set of line based edits to apply. -- lines []line -+ Lines []line - } - - // Line represents a single line operation to apply as part of a Hunk. - type line struct { -- // kind is the type of line this represents, deletion, insertion or copy. -- kind opKind -- // content is the content of this line. -+ // Kind is the type of line this represents, deletion, insertion or copy. -+ Kind OpKind -+ // Content is the Content of this line. - // For deletion it is the line being removed, for all others it is the line - // to put in the output. -- content string -+ Content string - } - --// opKind is used to denote the type of operation a line represents. --type opKind int -+// OpKind is used to denote the type of operation a line represents. -+type OpKind int - - const ( -- // opDelete is the operation kind for a line that is present in the input -+ // Delete is the operation kind for a line that is present in the input - // but not in the output. -- opDelete opKind = iota -- // opInsert is the operation kind for a line that is new in the output. -- opInsert -- // opEqual is the operation kind for a line that is the same in the input and -+ Delete OpKind = iota -+ // Insert is the operation kind for a line that is new in the output. -+ Insert -+ // Equal is the operation kind for a line that is the same in the input and - // output, often used to provide context around edited lines. -- opEqual -+ Equal - ) - - // String returns a human readable representation of an OpKind. It is not - // intended for machine processing. --func (k opKind) String() string { -+func (k OpKind) String() string { - switch k { -- case opDelete: -+ case Delete: - return "delete" -- case opInsert: -+ case Insert: - return "insert" -- case opEqual: -+ case Equal: - return "equal" - default: - panic("unknown operation kind") -@@ -103,8 +103,8 @@ func (k opKind) String() string { - func toUnified(fromName, toName string, content string, edits []Edit, contextLines int) (unified, error) { - gap := contextLines * 2 - u := unified{ -- from: fromName, -- to: toName, -+ From: fromName, -+ To: toName, - } - if len(edits) == 0 { - return u, nil -@@ -129,35 +129,35 @@ func toUnified(fromName, toName string, content string, edits []Edit, contextLin - - switch { - case h != nil && start == last: -- //direct extension -+ // direct extension - case h != nil && start <= last+gap: -- //within range of previous lines, add the joiners -+ // within range of previous lines, add the joiners - addEqualLines(h, lines, last, start) - default: -- //need to start a new hunk -+ // need to start a new hunk - if h != nil { - // add the edge to the previous hunk - addEqualLines(h, lines, last, last+contextLines) -- u.hunks = append(u.hunks, h) -+ u.Hunks = append(u.Hunks, h) - } - toLine += start - last - h = &hunk{ -- fromLine: start + 1, -- toLine: toLine + 1, -+ FromLine: start + 1, -+ ToLine: toLine + 1, - } - // add the edge to the new hunk - delta := addEqualLines(h, lines, start-contextLines, start) -- h.fromLine -= delta -- h.toLine -= delta -+ h.FromLine -= delta -+ h.ToLine -= delta - } - last = start - for i := start; i < end; i++ { -- h.lines = append(h.lines, line{kind: opDelete, content: lines[i]}) -+ h.Lines = append(h.Lines, line{Kind: Delete, Content: lines[i]}) - last++ - } - if edit.New != "" { - for _, content := range splitLines(edit.New) { -- h.lines = append(h.lines, line{kind: opInsert, content: content}) -+ h.Lines = append(h.Lines, line{Kind: Insert, Content: content}) - toLine++ - } - } -@@ -165,7 +165,7 @@ func toUnified(fromName, toName string, content string, edits []Edit, contextLin - if h != nil { - // add the edge to the final hunk - addEqualLines(h, lines, last, last+contextLines) -- u.hunks = append(u.hunks, h) -+ u.Hunks = append(u.Hunks, h) - } - return u, nil - } -@@ -187,7 +187,7 @@ func addEqualLines(h *hunk, lines []string, start, end int) int { - if i >= len(lines) { - return delta - } -- h.lines = append(h.lines, line{kind: opEqual, content: lines[i]}) -+ h.Lines = append(h.Lines, line{Kind: Equal, Content: lines[i]}) - delta++ - } - return delta -@@ -196,19 +196,19 @@ func addEqualLines(h *hunk, lines []string, start, end int) int { - // String converts a unified diff to the standard textual form for that diff. - // The output of this function can be passed to tools like patch. - func (u unified) String() string { -- if len(u.hunks) == 0 { -+ if len(u.Hunks) == 0 { - return "" - } - b := new(strings.Builder) -- fmt.Fprintf(b, "--- %s\n", u.from) -- fmt.Fprintf(b, "+++ %s\n", u.to) -- for _, hunk := range u.hunks { -+ fmt.Fprintf(b, "--- %s\n", u.From) -+ fmt.Fprintf(b, "+++ %s\n", u.To) -+ for _, hunk := range u.Hunks { - fromCount, toCount := 0, 0 -- for _, l := range hunk.lines { -- switch l.kind { -- case opDelete: -+ for _, l := range hunk.Lines { -+ switch l.Kind { -+ case Delete: - fromCount++ -- case opInsert: -+ case Insert: - toCount++ - default: - fromCount++ -@@ -217,32 +217,32 @@ func (u unified) String() string { - } - fmt.Fprint(b, "@@") - if fromCount > 1 { -- fmt.Fprintf(b, " -%d,%d", hunk.fromLine, fromCount) -- } else if hunk.fromLine == 1 && fromCount == 0 { -+ fmt.Fprintf(b, " -%d,%d", hunk.FromLine, fromCount) -+ } else if hunk.FromLine == 1 && fromCount == 0 { - // Match odd GNU diff -u behavior adding to empty file. - fmt.Fprintf(b, " -0,0") - } else { -- fmt.Fprintf(b, " -%d", hunk.fromLine) -+ fmt.Fprintf(b, " -%d", hunk.FromLine) - } - if toCount > 1 { -- fmt.Fprintf(b, " +%d,%d", hunk.toLine, toCount) -- } else if hunk.toLine == 1 && toCount == 0 { -+ fmt.Fprintf(b, " +%d,%d", hunk.ToLine, toCount) -+ } else if hunk.ToLine == 1 && toCount == 0 { - // Match odd GNU diff -u behavior adding to empty file. - fmt.Fprintf(b, " +0,0") - } else { -- fmt.Fprintf(b, " +%d", hunk.toLine) -+ fmt.Fprintf(b, " +%d", hunk.ToLine) - } - fmt.Fprint(b, " @@\n") -- for _, l := range hunk.lines { -- switch l.kind { -- case opDelete: -- fmt.Fprintf(b, "-%s", l.content) -- case opInsert: -- fmt.Fprintf(b, "+%s", l.content) -+ for _, l := range hunk.Lines { -+ switch l.Kind { -+ case Delete: -+ fmt.Fprintf(b, "-%s", l.Content) -+ case Insert: -+ fmt.Fprintf(b, "+%s", l.Content) - default: -- fmt.Fprintf(b, " %s", l.content) -+ fmt.Fprintf(b, " %s", l.Content) - } -- if !strings.HasSuffix(l.content, "\n") { -+ if !strings.HasSuffix(l.Content, "\n") { - fmt.Fprintf(b, "\n\\ No newline at end of file\n") - } - } --- -2.43.0 - diff --git a/modules/udiff/diff.go b/modules/udiff/diff.go deleted file mode 100644 index 5416163..0000000 --- a/modules/udiff/diff.go +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package diff computes differences between text files or strings. -package udiff - -import ( - "fmt" - "sort" - "strings" -) - -// An Edit describes the replacement of a portion of a text file. -type Edit struct { - Start, End int // byte offsets of the region to replace - New string // the replacement -} - -func (e Edit) String() string { - return fmt.Sprintf("{Start:%d,End:%d,New:%q}", e.Start, e.End, e.New) -} - -// Apply applies a sequence of edits to the src buffer and returns the -// result. Edits are applied in order of start offset; edits with the -// same start offset are applied in they order they were provided. -// -// Apply returns an error if any edit is out of bounds, -// or if any pair of edits is overlapping. -func Apply(src string, edits []Edit) (string, error) { - edits, size, err := validate(src, edits) - if err != nil { - return "", err - } - - // Apply edits. - out := make([]byte, 0, size) - lastEnd := 0 - for _, edit := range edits { - if lastEnd < edit.Start { - out = append(out, src[lastEnd:edit.Start]...) - } - out = append(out, edit.New...) - lastEnd = edit.End - } - out = append(out, src[lastEnd:]...) - - if len(out) != size { - panic("wrong size") - } - - return string(out), nil -} - -// ApplyBytes is like Apply, but it accepts a byte slice. -// The result is always a new array. -func ApplyBytes(src []byte, edits []Edit) ([]byte, error) { - res, err := Apply(string(src), edits) - return []byte(res), err -} - -// validate checks that edits are consistent with src, -// and returns the size of the patched output. -// It may return a different slice. -func validate(src string, edits []Edit) ([]Edit, int, error) { - if !sort.IsSorted(editsSort(edits)) { - edits = append([]Edit(nil), edits...) - SortEdits(edits) - } - - // Check validity of edits and compute final size. - size := len(src) - lastEnd := 0 - for _, edit := range edits { - if !(0 <= edit.Start && edit.Start <= edit.End && edit.End <= len(src)) { - return nil, 0, fmt.Errorf("diff has out-of-bounds edits") - } - if edit.Start < lastEnd { - return nil, 0, fmt.Errorf("diff has overlapping edits") - } - size += len(edit.New) + edit.Start - edit.End - lastEnd = edit.End - } - - return edits, size, nil -} - -// SortEdits orders a slice of Edits by (start, end) offset. -// This ordering puts insertions (end = start) before deletions -// (end > start) at the same point, but uses a stable sort to preserve -// the order of multiple insertions at the same point. -// (Apply detects multiple deletions at the same point as an error.) -func SortEdits(edits []Edit) { - sort.Stable(editsSort(edits)) -} - -type editsSort []Edit - -func (a editsSort) Len() int { return len(a) } -func (a editsSort) Less(i, j int) bool { - if cmp := a[i].Start - a[j].Start; cmp != 0 { - return cmp < 0 - } - return a[i].End < a[j].End -} -func (a editsSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] } - -// lineEdits expands and merges a sequence of edits so that each -// resulting edit replaces one or more complete lines. -// See ApplyEdits for preconditions. -func lineEdits(src string, edits []Edit) ([]Edit, error) { - edits, _, err := validate(src, edits) - if err != nil { - return nil, err - } - - // Do all deletions begin and end at the start of a line, - // and all insertions end with a newline? - // (This is merely a fast path.) - for _, edit := range edits { - if edit.Start >= len(src) || // insertion at EOF - edit.Start > 0 && src[edit.Start-1] != '\n' || // not at line start - edit.End > 0 && src[edit.End-1] != '\n' || // not at line start - edit.New != "" && edit.New[len(edit.New)-1] != '\n' { // partial insert - goto expand // slow path - } - } - return edits, nil // aligned - -expand: - if len(edits) == 0 { - return edits, nil // no edits (unreachable due to fast path) - } - expanded := make([]Edit, 0, len(edits)) // a guess - prev := edits[0] - // TODO(adonovan): opt: start from the first misaligned edit. - // TODO(adonovan): opt: avoid quadratic cost of string += string. - for _, edit := range edits[1:] { - between := src[prev.End:edit.Start] - if !strings.Contains(between, "\n") { - // overlapping lines: combine with previous edit. - prev.New += between + edit.New - prev.End = edit.End - } else { - // non-overlapping lines: flush previous edit. - expanded = append(expanded, expandEdit(prev, src)) - prev = edit - } - } - return append(expanded, expandEdit(prev, src)), nil // flush final edit -} - -// expandEdit returns edit expanded to complete whole lines. -func expandEdit(edit Edit, src string) Edit { - // Expand start left to start of line. - // (delta is the zero-based column number of start.) - start := edit.Start - if delta := start - 1 - strings.LastIndex(src[:start], "\n"); delta > 0 { - edit.Start -= delta - edit.New = src[start-delta:start] + edit.New - } - - // Expand end right to end of line. - end := edit.End - if end > 0 && src[end-1] != '\n' || - edit.New != "" && edit.New[len(edit.New)-1] != '\n' { - if nl := strings.IndexByte(src[end:], '\n'); nl < 0 { - edit.End = len(src) // extend to EOF - } else { - edit.End = end + nl + 1 // extend beyond \n - } - } - edit.New += src[end:edit.End] - - return edit -} diff --git a/modules/udiff/diff_test.go b/modules/udiff/diff_test.go deleted file mode 100644 index a6d7e24..0000000 --- a/modules/udiff/diff_test.go +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package udiff_test - -import ( - "bytes" - "math/rand" - "os" - "os/exec" - "path/filepath" - "reflect" - "strings" - "testing" - "unicode/utf8" - - diff "github.com/antgroup/hugescm/modules/udiff" - "github.com/antgroup/hugescm/modules/udiff/difftest" -) - -func TestApply(t *testing.T) { - for _, tc := range difftest.TestCases { - t.Run(tc.Name, func(t *testing.T) { - got, err := diff.Apply(tc.In, tc.Edits) - if err != nil { - t.Fatalf("Apply(Edits) failed: %v", err) - } - if got != tc.Out { - t.Errorf("Apply(Edits): got %q, want %q", got, tc.Out) - } - if tc.LineEdits != nil { - got, err := diff.Apply(tc.In, tc.LineEdits) - if err != nil { - t.Fatalf("Apply(LineEdits) failed: %v", err) - } - if got != tc.Out { - t.Errorf("Apply(LineEdits): got %q, want %q", got, tc.Out) - } - } - }) - } -} - -func TestNEdits(t *testing.T) { - for _, tc := range difftest.TestCases { - edits := diff.Strings(tc.In, tc.Out) - got, err := diff.Apply(tc.In, edits) - if err != nil { - t.Fatalf("Apply failed: %v", err) - } - if got != tc.Out { - t.Fatalf("%s: got %q wanted %q", tc.Name, got, tc.Out) - } - if len(edits) < len(tc.Edits) { // should find subline edits - t.Errorf("got %v, expected %v for %#v", edits, tc.Edits, tc) - } - } -} - -func TestNRandom(t *testing.T) { - for i := 0; i < 1000; i++ { - a := randstr("abω", 16) - b := randstr("abωc", 16) - edits := diff.Strings(a, b) - got, err := diff.Apply(a, edits) - if err != nil { - t.Fatalf("Apply failed: %v", err) - } - if got != b { - t.Fatalf("%d: got %q, wanted %q, starting with %q", i, got, b, a) - } - } -} - -// $ go test -fuzz=FuzzRoundTrip ./internal/diff -func FuzzRoundTrip(f *testing.F) { - f.Fuzz(func(t *testing.T, a, b string) { - if !utf8.ValidString(a) || !utf8.ValidString(b) { - return // inputs must be text - } - edits := diff.Strings(a, b) - got, err := diff.Apply(a, edits) - if err != nil { - t.Fatalf("Apply failed: %v", err) - } - if got != b { - t.Fatalf("applying diff(%q, %q) gives %q; edits=%v", a, b, got, edits) - } - }) -} - -func TestLineEdits(t *testing.T) { - for _, tc := range difftest.TestCases { - t.Run(tc.Name, func(t *testing.T) { - want := tc.LineEdits - if want == nil { - want = tc.Edits // already line-aligned - } - got, err := diff.LineEdits(tc.In, tc.Edits) - if err != nil { - t.Fatalf("LineEdits: %v", err) - } - if !reflect.DeepEqual(got, want) { - t.Errorf("in=<<%s>>\nout=<<%s>>\nraw edits=%s\nline edits=%s\nwant: %s", - tc.In, tc.Out, tc.Edits, got, want) - } - // make sure that applying the edits gives the expected result - fixed, err := diff.Apply(tc.In, got) - if err != nil { - t.Error(err) - } - if fixed != tc.Out { - t.Errorf("Apply(LineEdits): got %q, want %q", fixed, tc.Out) - } - }) - } -} - -func TestToUnified(t *testing.T) { - for _, tc := range difftest.TestCases { - t.Run(tc.Name, func(t *testing.T) { - unified, err := diff.ToUnified(difftest.FileA, difftest.FileB, tc.In, tc.Edits, diff.DefaultContextLines) - if err != nil { - t.Fatal(err) - } - if unified == "" { - return - } - orig := filepath.Join(t.TempDir(), "original") - err = os.WriteFile(orig, []byte(tc.In), 0644) - if err != nil { - t.Fatal(err) - } - temp := filepath.Join(t.TempDir(), "patched") - err = os.WriteFile(temp, []byte(tc.In), 0644) - if err != nil { - t.Fatal(err) - } - cmd := exec.Command("patch", "-p0", "-u", "-s", "-o", temp, orig) - cmd.Stdin = strings.NewReader(unified) - cmd.Stdout = new(bytes.Buffer) - cmd.Stderr = new(bytes.Buffer) - if err = cmd.Run(); err != nil { - t.Fatalf("%v: %q (%q) (%q)", err, cmd.String(), - cmd.Stderr, cmd.Stdout) - } - got, err := os.ReadFile(temp) - if err != nil { - t.Fatal(err) - } - if string(got) != tc.Out { - t.Errorf("applying unified failed: got\n%q, wanted\n%q unified\n%q", - got, tc.Out, unified) - } - }) - } -} - -func TestRegressionOld001(t *testing.T) { - a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage udiff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/aymanbagabas/go-udiff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n" - - b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage udiff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/aymanbagabas/go-udiff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n" - diffs := diff.Strings(a, b) - got, err := diff.Apply(a, diffs) - if err != nil { - t.Fatalf("Apply failed: %v", err) - } - if got != b { - i := 0 - for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ { - } - t.Errorf("oops %vd\n%q\n%q", diffs, got, b) - t.Errorf("\n%q\n%q", got[i:], b[i:]) - } -} - -func TestRegressionOld002(t *testing.T) { - a := "n\"\n)\n" - b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n" - diffs := diff.Strings(a, b) - got, err := diff.Apply(a, diffs) - if err != nil { - t.Fatalf("Apply failed: %v", err) - } - if got != b { - i := 0 - for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ { - } - t.Errorf("oops %vd\n%q\n%q", diffs, got, b) - t.Errorf("\n%q\n%q", got[i:], b[i:]) - } -} - -// return a random string of length n made of characters from s -func randstr(s string, n int) string { - src := []rune(s) - x := make([]rune, n) - for i := 0; i < n; i++ { - x[i] = src[rand.Intn(len(src))] - } - return string(x) -} diff --git a/modules/udiff/difftest/difftest.go b/modules/udiff/difftest/difftest.go deleted file mode 100644 index ce0733a..0000000 --- a/modules/udiff/difftest/difftest.go +++ /dev/null @@ -1,324 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package difftest supplies a set of tests that will operate on any -// implementation of a diff algorithm as exposed by -// diff "github.com/aymanbagabas/go-udiff" -package difftest - -// There are two kinds of tests, semantic tests, and 'golden data' tests. -// The semantic tests check that the computed diffs transform the input to -// the output, and that 'patch' accepts the computed unified diffs. -// The other tests just check that Edits and LineEdits haven't changed -// unexpectedly. These fields may need to be changed when the diff algorithm -// changes. - -import ( - "testing" - - diff "github.com/antgroup/hugescm/modules/udiff" -) - -const ( - FileA = "from" - FileB = "to" - UnifiedPrefix = "--- " + FileA + "\n+++ " + FileB + "\n" -) - -var TestCases = []struct { - Name, In, Out, Unified string - Edits, LineEdits []diff.Edit // expectation (LineEdits=nil => already line-aligned) - NoDiff bool -}{{ - Name: "empty", - In: "", - Out: "", -}, { - Name: "no_diff", - In: "gargantuan\n", - Out: "gargantuan\n", -}, { - Name: "replace_all", - In: "fruit\n", - Out: "cheese\n", - Unified: UnifiedPrefix + ` -@@ -1 +1 @@ --fruit -+cheese -`[1:], - Edits: []diff.Edit{{Start: 0, End: 5, New: "cheese"}}, - LineEdits: []diff.Edit{{Start: 0, End: 6, New: "cheese\n"}}, -}, { - Name: "insert_rune", - In: "gord\n", - Out: "gourd\n", - Unified: UnifiedPrefix + ` -@@ -1 +1 @@ --gord -+gourd -`[1:], - Edits: []diff.Edit{{Start: 2, End: 2, New: "u"}}, - LineEdits: []diff.Edit{{Start: 0, End: 5, New: "gourd\n"}}, -}, { - Name: "delete_rune", - In: "groat\n", - Out: "goat\n", - Unified: UnifiedPrefix + ` -@@ -1 +1 @@ --groat -+goat -`[1:], - Edits: []diff.Edit{{Start: 1, End: 2, New: ""}}, - LineEdits: []diff.Edit{{Start: 0, End: 6, New: "goat\n"}}, -}, { - Name: "replace_rune", - In: "loud\n", - Out: "lord\n", - Unified: UnifiedPrefix + ` -@@ -1 +1 @@ --loud -+lord -`[1:], - Edits: []diff.Edit{{Start: 2, End: 3, New: "r"}}, - LineEdits: []diff.Edit{{Start: 0, End: 5, New: "lord\n"}}, -}, { - Name: "replace_partials", - In: "blanket\n", - Out: "bunker\n", - Unified: UnifiedPrefix + ` -@@ -1 +1 @@ --blanket -+bunker -`[1:], - Edits: []diff.Edit{ - {Start: 1, End: 3, New: "u"}, - {Start: 6, End: 7, New: "r"}, - }, - LineEdits: []diff.Edit{{Start: 0, End: 8, New: "bunker\n"}}, -}, { - Name: "insert_line", - In: "1: one\n3: three\n", - Out: "1: one\n2: two\n3: three\n", - Unified: UnifiedPrefix + ` -@@ -1,2 +1,3 @@ - 1: one -+2: two - 3: three -`[1:], - Edits: []diff.Edit{{Start: 7, End: 7, New: "2: two\n"}}, -}, { - Name: "replace_no_newline", - In: "A", - Out: "B", - Unified: UnifiedPrefix + ` -@@ -1 +1 @@ --A -\ No newline at end of file -+B -\ No newline at end of file -`[1:], - Edits: []diff.Edit{{Start: 0, End: 1, New: "B"}}, -}, { - Name: "delete_empty", - In: "meow", - Out: "", // GNU diff -u special case: +0,0 - Unified: UnifiedPrefix + ` -@@ -1 +0,0 @@ --meow -\ No newline at end of file -`[1:], - Edits: []diff.Edit{{Start: 0, End: 4, New: ""}}, - LineEdits: []diff.Edit{{Start: 0, End: 4, New: ""}}, -}, { - Name: "append_empty", - In: "", // GNU diff -u special case: -0,0 - Out: "AB\nC", - Unified: UnifiedPrefix + ` -@@ -0,0 +1,2 @@ -+AB -+C -\ No newline at end of file -`[1:], - Edits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}}, - LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}}, -}, - // TODO(adonovan): fix this test: GNU diff -u prints "+1,2", Unifies prints "+1,3". - // { - // Name: "add_start", - // In: "A", - // Out: "B\nCA", - // Unified: UnifiedPrefix + ` - // @@ -1 +1,2 @@ - // -A - // \ No newline at end of file - // +B - // +CA - // \ No newline at end of file - // `[1:], - // Edits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}}, - // LineEdits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}}, - // }, - { - Name: "add_end", - In: "A", - Out: "AB", - Unified: UnifiedPrefix + ` -@@ -1 +1 @@ --A -\ No newline at end of file -+AB -\ No newline at end of file -`[1:], - Edits: []diff.Edit{{Start: 1, End: 1, New: "B"}}, - LineEdits: []diff.Edit{{Start: 0, End: 1, New: "AB"}}, - }, { - Name: "add_empty", - In: "", - Out: "AB\nC", - Unified: UnifiedPrefix + ` -@@ -0,0 +1,2 @@ -+AB -+C -\ No newline at end of file -`[1:], - Edits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}}, - LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}}, - }, { - Name: "add_newline", - In: "A", - Out: "A\n", - Unified: UnifiedPrefix + ` -@@ -1 +1 @@ --A -\ No newline at end of file -+A -`[1:], - Edits: []diff.Edit{{Start: 1, End: 1, New: "\n"}}, - LineEdits: []diff.Edit{{Start: 0, End: 1, New: "A\n"}}, - }, { - Name: "delete_front", - In: "A\nB\nC\nA\nB\nB\nA\n", - Out: "C\nB\nA\nB\nA\nC\n", - Unified: UnifiedPrefix + ` -@@ -1,7 +1,6 @@ --A --B - C -+B - A - B --B - A -+C -`[1:], - NoDiff: true, // unified diff is different but valid - Edits: []diff.Edit{ - {Start: 0, End: 4, New: ""}, - {Start: 6, End: 6, New: "B\n"}, - {Start: 10, End: 12, New: ""}, - {Start: 14, End: 14, New: "C\n"}, - }, - LineEdits: []diff.Edit{ - {Start: 0, End: 4, New: ""}, - {Start: 6, End: 6, New: "B\n"}, - {Start: 10, End: 12, New: ""}, - {Start: 14, End: 14, New: "C\n"}, - }, - }, { - Name: "replace_last_line", - In: "A\nB\n", - Out: "A\nC\n\n", - Unified: UnifiedPrefix + ` -@@ -1,2 +1,3 @@ - A --B -+C -+ -`[1:], - Edits: []diff.Edit{{Start: 2, End: 3, New: "C\n"}}, - LineEdits: []diff.Edit{{Start: 2, End: 4, New: "C\n\n"}}, - }, - { - Name: "multiple_replace", - In: "A\nB\nC\nD\nE\nF\nG\n", - Out: "A\nH\nI\nJ\nE\nF\nK\n", - Unified: UnifiedPrefix + ` -@@ -1,7 +1,7 @@ - A --B --C --D -+H -+I -+J - E - F --G -+K -`[1:], - Edits: []diff.Edit{ - {Start: 2, End: 8, New: "H\nI\nJ\n"}, - {Start: 12, End: 14, New: "K\n"}, - }, - NoDiff: true, // diff algorithm produces different delete/insert pattern - }, - { - Name: "extra_newline", - In: "\nA\n", - Out: "A\n", - Edits: []diff.Edit{{Start: 0, End: 1, New: ""}}, - Unified: UnifiedPrefix + `@@ -1,2 +1 @@ -- - A -`, - }, { - Name: "unified_lines", - In: "aaa\nccc\n", - Out: "aaa\nbbb\nccc\n", - Edits: []diff.Edit{{Start: 3, End: 3, New: "\nbbb"}}, - LineEdits: []diff.Edit{{Start: 0, End: 4, New: "aaa\nbbb\n"}}, - Unified: UnifiedPrefix + "@@ -1,2 +1,3 @@\n aaa\n+bbb\n ccc\n", - }, { - Name: "60379", - In: `package a - -type S struct { -s fmt.Stringer -} -`, - Out: `package a - -type S struct { - s fmt.Stringer -} -`, - Edits: []diff.Edit{{Start: 27, End: 27, New: "\t"}}, - LineEdits: []diff.Edit{{Start: 27, End: 42, New: "\ts fmt.Stringer\n"}}, - Unified: UnifiedPrefix + "@@ -1,5 +1,5 @@\n package a\n \n type S struct {\n-s fmt.Stringer\n+\ts fmt.Stringer\n }\n", - }, -} - -func DiffTest(t *testing.T, compute func(before, after string) []diff.Edit) { - for _, test := range TestCases { - t.Run(test.Name, func(t *testing.T) { - edits := compute(test.In, test.Out) - got, err := diff.Apply(test.In, edits) - if err != nil { - t.Fatalf("Apply failed: %v", err) - } - unified, err := diff.ToUnified(FileA, FileB, test.In, edits, diff.DefaultContextLines) - if err != nil { - t.Fatalf("ToUnified: %v", err) - } - if got != test.Out { - t.Errorf("Apply: got patched:\n%v\nfrom diff:\n%v\nexpected:\n%v", - got, unified, test.Out) - } - if !test.NoDiff && unified != test.Unified { - t.Errorf("Unified: got diff:\n%q\nexpected:\n%q diffs:%v", - unified, test.Unified, edits) - } - }) - } -} diff --git a/modules/udiff/difftest/difftest_test.go b/modules/udiff/difftest/difftest_test.go deleted file mode 100644 index ee7f01a..0000000 --- a/modules/udiff/difftest/difftest_test.go +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package difftest supplies a set of tests that will operate on any -// implementation of a diff algorithm as exposed by -// diff "github.com/aymanbagabas/go-udiff" -package difftest_test - -import ( - "fmt" - "os" - "os/exec" - "strings" - "testing" - - "github.com/antgroup/hugescm/modules/udiff/difftest" -) - -func TestVerifyUnified(t *testing.T) { - for _, test := range difftest.TestCases { - t.Run(test.Name, func(t *testing.T) { - if test.NoDiff { - t.Skip("diff tool produces expected different results") - } - diff, err := getDiffOutput(test.In, test.Out) - if err != nil { - t.Fatal(err) - } - if len(diff) > 0 { - diff = difftest.UnifiedPrefix + diff - } - if diff != test.Unified { - t.Errorf("unified:\n%s\ndiff -u:\n%s", test.Unified, diff) - } - }) - } -} - -func getDiffOutput(a, b string) (string, error) { - fileA, err := os.CreateTemp("", "myers.in") - if err != nil { - return "", err - } - defer os.Remove(fileA.Name()) - if _, err := fileA.Write([]byte(a)); err != nil { - return "", err - } - if err := fileA.Close(); err != nil { - return "", err - } - fileB, err := os.CreateTemp("", "myers.in") - if err != nil { - return "", err - } - defer os.Remove(fileB.Name()) - if _, err := fileB.Write([]byte(b)); err != nil { - return "", err - } - if err := fileB.Close(); err != nil { - return "", err - } - cmd := exec.Command("diff", "-u", fileA.Name(), fileB.Name()) - cmd.Env = append(cmd.Env, "LANG=en_US.UTF-8") - out, err := cmd.Output() - if err != nil { - exit, ok := err.(*exec.ExitError) - if !ok { - return "", fmt.Errorf("can't exec %s: %v", cmd, err) - } - if len(out) == 0 { - // Nonzero exit with no output: terminated by signal? - return "", fmt.Errorf("%s failed: %v; stderr:\n%s", cmd, err, exit.Stderr) - } - // nonzero exit + output => files differ - } - diff := string(out) - if len(diff) <= 0 { - return diff, nil - } - bits := strings.SplitN(diff, "\n", 3) - if len(bits) != 3 { - return "", fmt.Errorf("diff output did not have file prefix:\n%s", diff) - } - return bits[2], nil -} diff --git a/modules/udiff/export.go b/modules/udiff/export.go deleted file mode 100644 index 2798cd5..0000000 --- a/modules/udiff/export.go +++ /dev/null @@ -1,10 +0,0 @@ -package udiff - -// UnifiedDiff is a unified diff. -type UnifiedDiff = unified - -// ToUnifiedDiff takes a file contents and a sequence of edits, and calculates -// a unified diff that represents those edits. -func ToUnifiedDiff(fromName, toName string, content string, edits []Edit, contextLines int) (UnifiedDiff, error) { - return toUnified(fromName, toName, content, edits, contextLines) -} diff --git a/modules/udiff/export_test.go b/modules/udiff/export_test.go deleted file mode 100644 index e787b7d..0000000 --- a/modules/udiff/export_test.go +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package udiff - -// This file exports some private declarations to tests. - -var LineEdits = lineEdits diff --git a/modules/udiff/lcs/common.go b/modules/udiff/lcs/common.go deleted file mode 100644 index c3e82dd..0000000 --- a/modules/udiff/lcs/common.go +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package lcs - -import ( - "log" - "sort" -) - -// lcs is a longest common sequence -type lcs []diag - -// A diag is a piece of the edit graph where A[X+i] == B[Y+i], for 0<=i l[j].Len - }) - return l -} - -// validate that the elements of the lcs do not overlap -// (can only happen when the two-sided algorithm ends early) -// expects the lcs to be sorted -func (l lcs) valid() bool { - for i := 1; i < len(l); i++ { - if l[i-1].X+l[i-1].Len > l[i].X { - return false - } - if l[i-1].Y+l[i-1].Len > l[i].Y { - return false - } - } - return true -} - -// repair overlapping lcs -// only called if two-sided stops early -func (l lcs) fix() lcs { - // from the set of diagonals in l, find a maximal non-conflicting set - // this problem may be NP-complete, but we use a greedy heuristic, - // which is quadratic, but with a better data structure, could be D log D. - // indepedent is not enough: {0,3,1} and {3,0,2} can't both occur in an lcs - // which has to have monotone x and y - if len(l) == 0 { - return nil - } - sort.Slice(l, func(i, j int) bool { return l[i].Len > l[j].Len }) - tmp := make(lcs, 0, len(l)) - tmp = append(tmp, l[0]) - for i := 1; i < len(l); i++ { - var dir direction - nxt := l[i] - for _, in := range tmp { - if dir, nxt = overlap(in, nxt); dir == empty || dir == bad { - break - } - } - if nxt.Len > 0 && dir != bad { - tmp = append(tmp, nxt) - } - } - tmp.sort() - if false && !tmp.valid() { // debug checking - log.Fatalf("here %d", len(tmp)) - } - return tmp -} - -type direction int - -const ( - empty direction = iota // diag is empty (so not in lcs) - leftdown // proposed acceptably to the left and below - rightup // proposed diag is acceptably to the right and above - bad // proposed diag is inconsistent with the lcs so far -) - -// overlap trims the proposed diag prop so it doesn't overlap with -// the existing diag that has already been added to the lcs. -func overlap(exist, prop diag) (direction, diag) { - if prop.X <= exist.X && exist.X < prop.X+prop.Len { - // remove the end of prop where it overlaps with the X end of exist - delta := prop.X + prop.Len - exist.X - prop.Len -= delta - if prop.Len <= 0 { - return empty, prop - } - } - if exist.X <= prop.X && prop.X < exist.X+exist.Len { - // remove the beginning of prop where overlaps with exist - delta := exist.X + exist.Len - prop.X - prop.Len -= delta - if prop.Len <= 0 { - return empty, prop - } - prop.X += delta - prop.Y += delta - } - if prop.Y <= exist.Y && exist.Y < prop.Y+prop.Len { - // remove the end of prop that overlaps (in Y) with exist - delta := prop.Y + prop.Len - exist.Y - prop.Len -= delta - if prop.Len <= 0 { - return empty, prop - } - } - if exist.Y <= prop.Y && prop.Y < exist.Y+exist.Len { - // remove the beginning of peop that overlaps with exist - delta := exist.Y + exist.Len - prop.Y - prop.Len -= delta - if prop.Len <= 0 { - return empty, prop - } - prop.X += delta // no test reaches this code - prop.Y += delta - } - if prop.X+prop.Len <= exist.X && prop.Y+prop.Len <= exist.Y { - return leftdown, prop - } - if exist.X+exist.Len <= prop.X && exist.Y+exist.Len <= prop.Y { - return rightup, prop - } - // prop can't be in an lcs that contains exist - return bad, prop -} - -// manipulating Diag and lcs - -// prepend a diagonal (x,y)-(x+1,y+1) segment either to an empty lcs -// or to its first Diag. prepend is only called to extend diagonals -// the backward direction. -func (lcs lcs) prepend(x, y int) lcs { - if len(lcs) > 0 { - d := &lcs[0] - if int(d.X) == x+1 && int(d.Y) == y+1 { - // extend the diagonal down and to the left - d.X, d.Y = int(x), int(y) - d.Len++ - return lcs - } - } - - r := diag{X: int(x), Y: int(y), Len: 1} - lcs = append([]diag{r}, lcs...) - return lcs -} - -// append appends a diagonal, or extends the existing one. -// by adding the edge (x,y)-(x+1.y+1). append is only called -// to extend diagonals in the forward direction. -func (lcs lcs) append(x, y int) lcs { - if len(lcs) > 0 { - last := &lcs[len(lcs)-1] - // Expand last element if adjoining. - if last.X+last.Len == x && last.Y+last.Len == y { - last.Len++ - return lcs - } - } - - return append(lcs, diag{X: x, Y: y, Len: 1}) -} - -// enforce constraint on d, k -func ok(d, k int) bool { - return d >= 0 && -d <= k && k <= d -} diff --git a/modules/udiff/lcs/common_test.go b/modules/udiff/lcs/common_test.go deleted file mode 100644 index f19245e..0000000 --- a/modules/udiff/lcs/common_test.go +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package lcs - -import ( - "log" - "math/rand" - "strings" - "testing" -) - -type Btest struct { - a, b string - lcs []string -} - -var Btests = []Btest{ - {"aaabab", "abaab", []string{"abab", "aaab"}}, - {"aabbba", "baaba", []string{"aaba"}}, - {"cabbx", "cbabx", []string{"cabx", "cbbx"}}, - {"c", "cb", []string{"c"}}, - {"aaba", "bbb", []string{"b"}}, - {"bbaabb", "b", []string{"b"}}, - {"baaabb", "bbaba", []string{"bbb", "baa", "bab"}}, - {"baaabb", "abbab", []string{"abb", "bab", "aab"}}, - {"baaba", "aaabba", []string{"aaba"}}, - {"ca", "cba", []string{"ca"}}, - {"ccbcbc", "abba", []string{"bb"}}, - {"ccbcbc", "aabba", []string{"bb"}}, - {"ccb", "cba", []string{"cb"}}, - {"caef", "axe", []string{"ae"}}, - {"bbaabb", "baabb", []string{"baabb"}}, - // Example from Myers: - {"abcabba", "cbabac", []string{"caba", "baba", "cbba"}}, - {"3456aaa", "aaa", []string{"aaa"}}, - {"aaa", "aaa123", []string{"aaa"}}, - {"aabaa", "aacaa", []string{"aaaa"}}, - {"1a", "a", []string{"a"}}, - {"abab", "bb", []string{"bb"}}, - {"123", "ab", []string{""}}, - {"a", "b", []string{""}}, - {"abc", "123", []string{""}}, - {"aa", "aa", []string{"aa"}}, - {"abcde", "12345", []string{""}}, - {"aaa3456", "aaa", []string{"aaa"}}, - {"abcde", "12345a", []string{"a"}}, - {"ab", "123", []string{""}}, - {"1a2", "a", []string{"a"}}, - // for two-sided - {"babaab", "cccaba", []string{"aba"}}, - {"aabbab", "cbcabc", []string{"bab"}}, - {"abaabb", "bcacab", []string{"baab"}}, - {"abaabb", "abaaaa", []string{"abaa"}}, - {"bababb", "baaabb", []string{"baabb"}}, - {"abbbaa", "cabacc", []string{"aba"}}, - {"aabbaa", "aacaba", []string{"aaaa", "aaba"}}, -} - -func init() { - log.SetFlags(log.Lshortfile) -} - -func check(t *testing.T, str string, lcs lcs, want []string) { - t.Helper() - if !lcs.valid() { - t.Errorf("bad lcs %v", lcs) - } - var got strings.Builder - for _, dd := range lcs { - got.WriteString(str[dd.X : dd.X+dd.Len]) - } - ans := got.String() - for _, w := range want { - if ans == w { - return - } - } - t.Fatalf("str=%q lcs=%v want=%q got=%q", str, lcs, want, ans) -} - -func checkDiffs(t *testing.T, before string, diffs []Diff, after string) { - t.Helper() - var ans strings.Builder - sofar := 0 // index of position in before - for _, d := range diffs { - if sofar < d.Start { - ans.WriteString(before[sofar:d.Start]) - } - ans.WriteString(after[d.ReplStart:d.ReplEnd]) - sofar = d.End - } - ans.WriteString(before[sofar:]) - if ans.String() != after { - t.Fatalf("diff %v took %q to %q, not to %q", diffs, before, ans.String(), after) - } -} - -func lcslen(l lcs) int { - ans := 0 - for _, d := range l { - ans += int(d.Len) - } - return ans -} - -// return a random string of length n made of characters from s -func randstr(s string, n int) string { - src := []rune(s) - x := make([]rune, n) - for i := 0; i < n; i++ { - x[i] = src[rand.Intn(len(src))] - } - return string(x) -} - -func TestLcsFix(t *testing.T) { - tests := []struct{ before, after lcs }{ - {lcs{diag{0, 0, 3}, diag{2, 2, 5}, diag{3, 4, 5}, diag{8, 9, 4}}, lcs{diag{0, 0, 2}, diag{2, 2, 1}, diag{3, 4, 5}, diag{8, 9, 4}}}, - {lcs{diag{1, 1, 6}, diag{6, 12, 3}}, lcs{diag{1, 1, 5}, diag{6, 12, 3}}}, - {lcs{diag{0, 0, 4}, diag{3, 5, 4}}, lcs{diag{0, 0, 3}, diag{3, 5, 4}}}, - {lcs{diag{0, 20, 1}, diag{0, 0, 3}, diag{1, 20, 4}}, lcs{diag{0, 0, 3}, diag{3, 22, 2}}}, - {lcs{diag{0, 0, 4}, diag{1, 1, 2}}, lcs{diag{0, 0, 4}}}, - {lcs{diag{0, 0, 4}}, lcs{diag{0, 0, 4}}}, - {lcs{}, lcs{}}, - {lcs{diag{0, 0, 4}, diag{1, 1, 6}, diag{3, 3, 2}}, lcs{diag{0, 0, 1}, diag{1, 1, 6}}}, - } - for n, x := range tests { - got := x.before.fix() - if len(got) != len(x.after) { - t.Errorf("got %v, expected %v, for %v", got, x.after, x.before) - } - olen := lcslen(x.after) - glen := lcslen(got) - if olen != glen { - t.Errorf("%d: lens(%d,%d) differ, %v, %v, %v", n, glen, olen, got, x.after, x.before) - } - } -} diff --git a/modules/udiff/lcs/doc.go b/modules/udiff/lcs/doc.go deleted file mode 100644 index 9029dd2..0000000 --- a/modules/udiff/lcs/doc.go +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// package lcs contains code to find longest-common-subsequences -// (and diffs) -package lcs - -/* -Compute longest-common-subsequences of two slices A, B using -algorithms from Myers' paper. A longest-common-subsequence -(LCS from now on) of A and B is a maximal set of lexically increasing -pairs of subscripts (x,y) with A[x]==B[y]. There may be many LCS, but -they all have the same length. An LCS determines a sequence of edits -that changes A into B. - -The key concept is the edit graph of A and B. -If A has length N and B has length M, then the edit graph has -vertices v[i][j] for 0 <= i <= N, 0 <= j <= M. There is a -horizontal edge from v[i][j] to v[i+1][j] whenever both are in -the graph, and a vertical edge from v[i][j] to f[i][j+1] similarly. -When A[i] == B[j] there is a diagonal edge from v[i][j] to v[i+1][j+1]. - -A path between in the graph between (0,0) and (N,M) determines a sequence -of edits converting A into B: each horizontal edge corresponds to removing -an element of A, and each vertical edge corresponds to inserting an -element of B. - -A vertex (x,y) is on (forward) diagonal k if x-y=k. A path in the graph -is of length D if it has D non-diagonal edges. The algorithms generate -forward paths (in which at least one of x,y increases at each edge), -or backward paths (in which at least one of x,y decreases at each edge), -or a combination. (Note that the orientation is the traditional mathematical one, -with the origin in the lower-left corner.) - -Here is the edit graph for A:"aabbaa", B:"aacaba". (I know the diagonals look weird.) - ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ - a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | - ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ - b | | | ___/‾‾‾ | ___/‾‾‾ | | | - ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ - a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | - ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ - c | | | | | | | - ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ - a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | - ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ - a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | - ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ - a a b b a a - - -The algorithm labels a vertex (x,y) with D,k if it is on diagonal k and at -the end of a maximal path of length D. (Because x-y=k it suffices to remember -only the x coordinate of the vertex.) - -The forward algorithm: Find the longest diagonal starting at (0,0) and -label its end with D=0,k=0. From that vertex take a vertical step and -then follow the longest diagonal (up and to the right), and label that vertex -with D=1,k=-1. From the D=0,k=0 point take a horizontal step and the follow -the longest diagonal (up and to the right) and label that vertex -D=1,k=1. In the same way, having labelled all the D vertices, -from a vertex labelled D,k find two vertices -tentatively labelled D+1,k-1 and D+1,k+1. There may be two on the same -diagonal, in which case take the one with the larger x. - -Eventually the path gets to (N,M), and the diagonals on it are the LCS. - -Here is the edit graph with the ends of D-paths labelled. (So, for instance, -0/2,2 indicates that x=2,y=2 is labelled with 0, as it should be, since the first -step is to go up the longest diagonal from (0,0).) -A:"aabbaa", B:"aacaba" - ⊙ ------- ⊙ ------- ⊙ -------(3/3,6)------- ⊙ -------(3/5,6)-------(4/6,6) - a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | - ⊙ ------- ⊙ ------- ⊙ -------(2/3,5)------- ⊙ ------- ⊙ ------- ⊙ - b | | | ___/‾‾‾ | ___/‾‾‾ | | | - ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ -------(3/5,4)------- ⊙ - a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | - ⊙ ------- ⊙ -------(1/2,3)-------(2/3,3)------- ⊙ ------- ⊙ ------- ⊙ - c | | | | | | | - ⊙ ------- ⊙ -------(0/2,2)-------(1/3,2)-------(2/4,2)-------(3/5,2)-------(4/6,2) - a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | - ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ - a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | - ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ - a a b b a a - -The 4-path is reconstructed starting at (4/6,6), horizontal to (3/5,6), diagonal to (3,4), vertical -to (2/3,3), horizontal to (1/2,3), vertical to (0/2,2), and diagonal to (0,0). As expected, -there are 4 non-diagonal steps, and the diagonals form an LCS. - -There is a symmetric backward algorithm, which gives (backwards labels are prefixed with a colon): -A:"aabbaa", B:"aacaba" - ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ - a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | - ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ --------(:0/5,5)-------- ⊙ - b | | | ____/‾‾‾ | ____/‾‾‾ | | | - ⊙ -------- ⊙ -------- ⊙ --------(:1/3,4)-------- ⊙ -------- ⊙ -------- ⊙ - a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | - (:3/0,3)--------(:2/1,3)-------- ⊙ --------(:2/3,3)--------(:1/4,3)-------- ⊙ -------- ⊙ - c | | | | | | | - ⊙ -------- ⊙ -------- ⊙ --------(:3/3,2)--------(:2/4,2)-------- ⊙ -------- ⊙ - a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | - (:3/0,1)-------- ⊙ -------- ⊙ -------- ⊙ --------(:3/4,1)-------- ⊙ -------- ⊙ - a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | - (:4/0,0)-------- ⊙ -------- ⊙ -------- ⊙ --------(:4/4,0)-------- ⊙ -------- ⊙ - a a b b a a - -Neither of these is ideal for use in an editor, where it is undesirable to send very long diffs to the -front end. It's tricky to decide exactly what 'very long diffs' means, as "replace A by B" is very short. -We want to control how big D can be, by stopping when it gets too large. The forward algorithm then -privileges common prefixes, and the backward algorithm privileges common suffixes. Either is an undesirable -asymmetry. - -Fortunately there is a two-sided algorithm, implied by results in Myers' paper. Here's what the labels in -the edit graph look like. -A:"aabbaa", B:"aacaba" - ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ - a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | - ⊙ --------- ⊙ --------- ⊙ --------- (2/3,5) --------- ⊙ --------- (:0/5,5)--------- ⊙ - b | | | ____/‾‾‾‾ | ____/‾‾‾‾ | | | - ⊙ --------- ⊙ --------- ⊙ --------- (:1/3,4)--------- ⊙ --------- ⊙ --------- ⊙ - a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | - ⊙ --------- (:2/1,3)--------- (1/2,3) ---------(2:2/3,3)--------- (:1/4,3)--------- ⊙ --------- ⊙ - c | | | | | | | - ⊙ --------- ⊙ --------- (0/2,2) --------- (1/3,2) ---------(2:2/4,2)--------- ⊙ --------- ⊙ - a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | - ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ - a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | - ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ - a a b b a a - -The algorithm stopped when it saw the backwards 2-path ending at (1,3) and the forwards 2-path ending at (3,5). The criterion -is a backwards path ending at (u,v) and a forward path ending at (x,y), where u <= x and the two points are on the same -diagonal. (Here the edgegraph has a diagonal, but the criterion is x-y=u-v.) Myers proves there is a forward -2-path from (0,0) to (1,3), and that together with the backwards 2-path ending at (1,3) gives the expected 4-path. -Unfortunately the forward path has to be constructed by another run of the forward algorithm; it can't be found from the -computed labels. That is the worst case. Had the code noticed (x,y)=(u,v)=(3,3) the whole path could be reconstructed -from the edgegraph. The implementation looks for a number of special cases to try to avoid computing an extra forward path. - -If the two-sided algorithm has stop early (because D has become too large) it will have found a forward LCS and a -backwards LCS. Ideally these go with disjoint prefixes and suffixes of A and B, but disjointness may fail and the two -computed LCS may conflict. (An easy example is where A is a suffix of B, and shares a short prefix. The backwards LCS -is all of A, and the forward LCS is a prefix of A.) The algorithm combines the two -to form a best-effort LCS. In the worst case the forward partial LCS may have to -be recomputed. -*/ - -/* Eugene Myers paper is titled -"An O(ND) Difference Algorithm and Its Variations" -and can be found at -http://www.xmailserver.org/diff2.pdf - -(There is a generic implementation of the algorithm the repository with git hash -b9ad7e4ade3a686d608e44475390ad428e60e7fc) -*/ diff --git a/modules/udiff/lcs/git.sh b/modules/udiff/lcs/git.sh deleted file mode 100644 index b25ba4a..0000000 --- a/modules/udiff/lcs/git.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -# -# Copyright 2022 The Go Authors. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. -# -# Creates a zip file containing all numbered versions -# of the commit history of a large source file, for use -# as input data for the tests of the diff algorithm. -# -# Run script from root of the x/tools repo. - -set -eu - -# WARNING: This script will install the latest version of $file -# The largest real source file in the x/tools repo. -# file=internal/golang/completion/completion.go -# file=internal/golang/diagnostics.go -file=internal/protocol/tsprotocol.go - -tmp=$(mktemp -d) -git log $file | - awk '/^commit / {print $2}' | - nl -ba -nrz | - while read n hash; do - git checkout --quiet $hash $file - cp -f $file $tmp/$n - done -(cd $tmp && zip -q - *) > testdata.zip -rm -fr $tmp -git restore --staged $file -git restore $file -echo "Created testdata.zip" diff --git a/modules/udiff/lcs/labels.go b/modules/udiff/lcs/labels.go deleted file mode 100644 index 504913d..0000000 --- a/modules/udiff/lcs/labels.go +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package lcs - -import ( - "fmt" -) - -// For each D, vec[D] has length D+1, -// and the label for (D, k) is stored in vec[D][(D+k)/2]. -type label struct { - vec [][]int -} - -// Temporary checking DO NOT COMMIT true TO PRODUCTION CODE -const debug = false - -// debugging. check that the (d,k) pair is valid -// (that is, -d<=k<=d and d+k even) -func checkDK(D, k int) { - if k >= -D && k <= D && (D+k)%2 == 0 { - return - } - panic(fmt.Sprintf("out of range, d=%d,k=%d", D, k)) -} - -func (t *label) set(D, k, x int) { - if debug { - checkDK(D, k) - } - for len(t.vec) <= D { - t.vec = append(t.vec, nil) - } - if t.vec[D] == nil { - t.vec[D] = make([]int, D+1) - } - t.vec[D][(D+k)/2] = x // known that D+k is even -} - -func (t *label) get(d, k int) int { - if debug { - checkDK(d, k) - } - return int(t.vec[d][(d+k)/2]) -} - -func newtriang(limit int) label { - if limit < 100 { - // Preallocate if limit is not large. - return label{vec: make([][]int, limit)} - } - return label{} -} diff --git a/modules/udiff/lcs/old.go b/modules/udiff/lcs/old.go deleted file mode 100644 index 4353da1..0000000 --- a/modules/udiff/lcs/old.go +++ /dev/null @@ -1,480 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package lcs - -// TODO(adonovan): remove unclear references to "old" in this package. - -import ( - "fmt" -) - -// A Diff is a replacement of a portion of A by a portion of B. -type Diff struct { - Start, End int // offsets of portion to delete in A - ReplStart, ReplEnd int // offset of replacement text in B -} - -// DiffStrings returns the differences between two strings. -// It does not respect rune boundaries. -func DiffStrings(a, b string) []Diff { return diff(stringSeqs{a, b}) } - -// DiffBytes returns the differences between two byte sequences. -// It does not respect rune boundaries. -func DiffBytes(a, b []byte) []Diff { return diff(bytesSeqs{a, b}) } - -// DiffRunes returns the differences between two rune sequences. -func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) } - -func diff(seqs sequences) []Diff { - // A limit on how deeply the LCS algorithm should search. The value is just a guess. - const maxDiffs = 100 - diff, _ := compute(seqs, twosided, maxDiffs/2) - return diff -} - -// compute computes the list of differences between two sequences, -// along with the LCS. It is exercised directly by tests. -// The algorithm is one of {forward, backward, twosided}. -func compute(seqs sequences, algo func(*editGraph) lcs, limit int) ([]Diff, lcs) { - if limit <= 0 { - limit = 1 << 25 // effectively infinity - } - alen, blen := seqs.lengths() - g := &editGraph{ - seqs: seqs, - vf: newtriang(limit), - vb: newtriang(limit), - limit: limit, - ux: alen, - uy: blen, - delta: alen - blen, - } - lcs := algo(g) - diffs := lcs.toDiffs(alen, blen) - return diffs, lcs -} - -// editGraph carries the information for computing the lcs of two sequences. -type editGraph struct { - seqs sequences - vf, vb label // forward and backward labels - - limit int // maximal value of D - // the bounding rectangle of the current edit graph - lx, ly, ux, uy int - delta int // common subexpression: (ux-lx)-(uy-ly) -} - -// toDiffs converts an LCS to a list of edits. -func (lcs lcs) toDiffs(alen, blen int) []Diff { - var diffs []Diff - var pa, pb int // offsets in a, b - for _, l := range lcs { - if pa < l.X || pb < l.Y { - diffs = append(diffs, Diff{pa, l.X, pb, l.Y}) - } - pa = l.X + l.Len - pb = l.Y + l.Len - } - if pa < alen || pb < blen { - diffs = append(diffs, Diff{pa, alen, pb, blen}) - } - return diffs -} - -// --- FORWARD --- - -// fdone decides if the forward path has reached the upper right -// corner of the rectangle. If so, it also returns the computed lcs. -func (e *editGraph) fdone(D, k int) (bool, lcs) { - // x, y, k are relative to the rectangle - x := e.vf.get(D, k) - y := x - k - if x == e.ux && y == e.uy { - return true, e.forwardlcs(D, k) - } - return false, nil -} - -// run the forward algorithm, until success or up to the limit on D. -func forward(e *editGraph) lcs { - e.setForward(0, 0, e.lx) - if ok, ans := e.fdone(0, 0); ok { - return ans - } - // from D to D+1 - for D := 0; D < e.limit; D++ { - e.setForward(D+1, -(D + 1), e.getForward(D, -D)) - if ok, ans := e.fdone(D+1, -(D + 1)); ok { - return ans - } - e.setForward(D+1, D+1, e.getForward(D, D)+1) - if ok, ans := e.fdone(D+1, D+1); ok { - return ans - } - for k := -D + 1; k <= D-1; k += 2 { - // these are tricky and easy to get backwards - lookv := e.lookForward(k, e.getForward(D, k-1)+1) - lookh := e.lookForward(k, e.getForward(D, k+1)) - if lookv > lookh { - e.setForward(D+1, k, lookv) - } else { - e.setForward(D+1, k, lookh) - } - if ok, ans := e.fdone(D+1, k); ok { - return ans - } - } - } - // D is too large - // find the D path with maximal x+y inside the rectangle and - // use that to compute the found part of the lcs - kmax := -e.limit - 1 - diagmax := -1 - for k := -e.limit; k <= e.limit; k += 2 { - x := e.getForward(e.limit, k) - y := x - k - if x+y > diagmax && x <= e.ux && y <= e.uy { - diagmax, kmax = x+y, k - } - } - return e.forwardlcs(e.limit, kmax) -} - -// recover the lcs by backtracking from the farthest point reached -func (e *editGraph) forwardlcs(D, k int) lcs { - var ans lcs - for x := e.getForward(D, k); x != 0 || x-k != 0; { - if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) { - // if (x-1,y) is labelled D-1, x--,D--,k--,continue - D, k, x = D-1, k-1, x-1 - continue - } else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) { - // if (x,y-1) is labelled D-1, x, D--,k++, continue - D, k = D-1, k+1 - continue - } - // if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue - y := x - k - ans = ans.prepend(x+e.lx-1, y+e.ly-1) - x-- - } - return ans -} - -// start at (x,y), go up the diagonal as far as possible, -// and label the result with d -func (e *editGraph) lookForward(k, relx int) int { - rely := relx - k - x, y := relx+e.lx, rely+e.ly - if x < e.ux && y < e.uy { - x += e.seqs.commonPrefixLen(x, e.ux, y, e.uy) - } - return x -} - -func (e *editGraph) setForward(d, k, relx int) { - x := e.lookForward(k, relx) - e.vf.set(d, k, x-e.lx) -} - -func (e *editGraph) getForward(d, k int) int { - x := e.vf.get(d, k) - return x -} - -// --- BACKWARD --- - -// bdone decides if the backward path has reached the lower left corner -func (e *editGraph) bdone(D, k int) (bool, lcs) { - // x, y, k are relative to the rectangle - x := e.vb.get(D, k) - y := x - (k + e.delta) - if x == 0 && y == 0 { - return true, e.backwardlcs(D, k) - } - return false, nil -} - -// run the backward algorithm, until success or up to the limit on D. -func backward(e *editGraph) lcs { - e.setBackward(0, 0, e.ux) - if ok, ans := e.bdone(0, 0); ok { - return ans - } - // from D to D+1 - for D := 0; D < e.limit; D++ { - e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1) - if ok, ans := e.bdone(D+1, -(D + 1)); ok { - return ans - } - e.setBackward(D+1, D+1, e.getBackward(D, D)) - if ok, ans := e.bdone(D+1, D+1); ok { - return ans - } - for k := -D + 1; k <= D-1; k += 2 { - // these are tricky and easy to get wrong - lookv := e.lookBackward(k, e.getBackward(D, k-1)) - lookh := e.lookBackward(k, e.getBackward(D, k+1)-1) - if lookv < lookh { - e.setBackward(D+1, k, lookv) - } else { - e.setBackward(D+1, k, lookh) - } - if ok, ans := e.bdone(D+1, k); ok { - return ans - } - } - } - - // D is too large - // find the D path with minimal x+y inside the rectangle and - // use that to compute the part of the lcs found - kmax := -e.limit - 1 - diagmin := 1 << 25 - for k := -e.limit; k <= e.limit; k += 2 { - x := e.getBackward(e.limit, k) - y := x - (k + e.delta) - if x+y < diagmin && x >= 0 && y >= 0 { - diagmin, kmax = x+y, k - } - } - if kmax < -e.limit { - panic(fmt.Sprintf("no paths when limit=%d?", e.limit)) - } - return e.backwardlcs(e.limit, kmax) -} - -// recover the lcs by backtracking -func (e *editGraph) backwardlcs(D, k int) lcs { - var ans lcs - for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; { - if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) { - // D--, k--, x unchanged - D, k = D-1, k-1 - continue - } else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) { - // D--, k++, x++ - D, k, x = D-1, k+1, x+1 - continue - } - y := x - (k + e.delta) - ans = ans.append(x+e.lx, y+e.ly) - x++ - } - return ans -} - -// start at (x,y), go down the diagonal as far as possible, -func (e *editGraph) lookBackward(k, relx int) int { - rely := relx - (k + e.delta) // forward k = k + e.delta - x, y := relx+e.lx, rely+e.ly - if x > 0 && y > 0 { - x -= e.seqs.commonSuffixLen(0, x, 0, y) - } - return x -} - -// convert to rectangle, and label the result with d -func (e *editGraph) setBackward(d, k, relx int) { - x := e.lookBackward(k, relx) - e.vb.set(d, k, x-e.lx) -} - -func (e *editGraph) getBackward(d, k int) int { - x := e.vb.get(d, k) - return x -} - -// -- TWOSIDED --- - -func twosided(e *editGraph) lcs { - // The termination condition could be improved, as either the forward - // or backward pass could succeed before Myers' Lemma applies. - // Aside from questions of efficiency (is the extra testing cost-effective) - // this is more likely to matter when e.limit is reached. - e.setForward(0, 0, e.lx) - e.setBackward(0, 0, e.ux) - - // from D to D+1 - for D := 0; D < e.limit; D++ { - // just finished a backwards pass, so check - if got, ok := e.twoDone(D, D); ok { - return e.twolcs(D, D, got) - } - // do a forwards pass (D to D+1) - e.setForward(D+1, -(D + 1), e.getForward(D, -D)) - e.setForward(D+1, D+1, e.getForward(D, D)+1) - for k := -D + 1; k <= D-1; k += 2 { - // these are tricky and easy to get backwards - lookv := e.lookForward(k, e.getForward(D, k-1)+1) - lookh := e.lookForward(k, e.getForward(D, k+1)) - if lookv > lookh { - e.setForward(D+1, k, lookv) - } else { - e.setForward(D+1, k, lookh) - } - } - // just did a forward pass, so check - if got, ok := e.twoDone(D+1, D); ok { - return e.twolcs(D+1, D, got) - } - // do a backward pass, D to D+1 - e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1) - e.setBackward(D+1, D+1, e.getBackward(D, D)) - for k := -D + 1; k <= D-1; k += 2 { - // these are tricky and easy to get wrong - lookv := e.lookBackward(k, e.getBackward(D, k-1)) - lookh := e.lookBackward(k, e.getBackward(D, k+1)-1) - if lookv < lookh { - e.setBackward(D+1, k, lookv) - } else { - e.setBackward(D+1, k, lookh) - } - } - } - - // D too large. combine a forward and backward partial lcs - // first, a forward one - kmax := -e.limit - 1 - diagmax := -1 - for k := -e.limit; k <= e.limit; k += 2 { - x := e.getForward(e.limit, k) - y := x - k - if x+y > diagmax && x <= e.ux && y <= e.uy { - diagmax, kmax = x+y, k - } - } - if kmax < -e.limit { - panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit)) - } - lcs := e.forwardlcs(e.limit, kmax) - // now a backward one - // find the D path with minimal x+y inside the rectangle and - // use that to compute the lcs - diagmin := 1 << 25 // infinity - for k := -e.limit; k <= e.limit; k += 2 { - x := e.getBackward(e.limit, k) - y := x - (k + e.delta) - if x+y < diagmin && x >= 0 && y >= 0 { - diagmin, kmax = x+y, k - } - } - if kmax < -e.limit { - panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit)) - } - lcs = append(lcs, e.backwardlcs(e.limit, kmax)...) - // These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs) - ans := lcs.fix() - return ans -} - -// Does Myers' Lemma apply? -func (e *editGraph) twoDone(df, db int) (int, bool) { - if (df+db+e.delta)%2 != 0 { - return 0, false // diagonals cannot overlap - } - kmin := -db + e.delta - if -df > kmin { - kmin = -df - } - kmax := db + e.delta - if df < kmax { - kmax = df - } - for k := kmin; k <= kmax; k += 2 { - x := e.vf.get(df, k) - u := e.vb.get(db, k-e.delta) - if u <= x { - // is it worth looking at all the other k? - for l := k; l <= kmax; l += 2 { - x := e.vf.get(df, l) - y := x - l - u := e.vb.get(db, l-e.delta) - v := u - l - if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux { - return l, true - } - } - return k, true - } - } - return 0, false -} - -func (e *editGraph) twolcs(df, db, kf int) lcs { - // db==df || db+1==df - x := e.vf.get(df, kf) - y := x - kf - kb := kf - e.delta - u := e.vb.get(db, kb) - v := u - kf - - // Myers proved there is a df-path from (0,0) to (u,v) - // and a db-path from (x,y) to (N,M). - // In the first case the overall path is the forward path - // to (u,v) followed by the backward path to (N,M). - // In the second case the path is the backward path to (x,y) - // followed by the forward path to (x,y) from (0,0). - - // Look for some special cases to avoid computing either of these paths. - if x == u { - // "babaab" "cccaba" - // already patched together - lcs := e.forwardlcs(df, kf) - lcs = append(lcs, e.backwardlcs(db, kb)...) - return lcs.sort() - } - - // is (u-1,v) or (u,v-1) labelled df-1? - // if so, that forward df-1-path plus a horizontal or vertical edge - // is the df-path to (u,v), then plus the db-path to (N,M) - if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 { - // "aabbab" "cbcabc" - lcs := e.forwardlcs(df-1, u-1-v) - lcs = append(lcs, e.backwardlcs(db, kb)...) - return lcs.sort() - } - if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u { - // "abaabb" "bcacab" - lcs := e.forwardlcs(df-1, u-(v-1)) - lcs = append(lcs, e.backwardlcs(db, kb)...) - return lcs.sort() - } - - // The path can't possibly contribute to the lcs because it - // is all horizontal or vertical edges - if u == 0 || v == 0 || x == e.ux || y == e.uy { - // "abaabb" "abaaaa" - if u == 0 || v == 0 { - return e.backwardlcs(db, kb) - } - return e.forwardlcs(df, kf) - } - - // is (x+1,y) or (x,y+1) labelled db-1? - if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 { - // "bababb" "baaabb" - lcs := e.backwardlcs(db-1, kb+1) - lcs = append(lcs, e.forwardlcs(df, kf)...) - return lcs.sort() - } - if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x { - // "abbbaa" "cabacc" - lcs := e.backwardlcs(db-1, kb-1) - lcs = append(lcs, e.forwardlcs(df, kf)...) - return lcs.sort() - } - - // need to compute another path - // "aabbaa" "aacaba" - lcs := e.backwardlcs(db, kb) - oldx, oldy := e.ux, e.uy - e.ux = u - e.uy = v - lcs = append(lcs, forward(e)...) - e.ux, e.uy = oldx, oldy - return lcs.sort() -} diff --git a/modules/udiff/lcs/old_test.go b/modules/udiff/lcs/old_test.go deleted file mode 100644 index 95e3fa7..0000000 --- a/modules/udiff/lcs/old_test.go +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package lcs - -import ( - "fmt" - "log" - "math/rand/v2" - "os" - "strings" - "testing" -) - -func TestAlgosOld(t *testing.T) { - for i, algo := range []func(*editGraph) lcs{forward, backward, twosided} { - t.Run(strings.Fields("forward backward twosided")[i], func(t *testing.T) { - for _, tx := range Btests { - lim := len(tx.a) + len(tx.b) - - diffs, lcs := compute(stringSeqs{tx.a, tx.b}, algo, lim) - check(t, tx.a, lcs, tx.lcs) - checkDiffs(t, tx.a, diffs, tx.b) - - diffs, lcs = compute(stringSeqs{tx.b, tx.a}, algo, lim) - check(t, tx.b, lcs, tx.lcs) - checkDiffs(t, tx.b, diffs, tx.a) - } - }) - } -} - -func TestIntOld(t *testing.T) { - // need to avoid any characters in btests - lfill, rfill := "AAAAAAAAAAAA", "BBBBBBBBBBBB" - for _, tx := range Btests { - if len(tx.a) < 2 || len(tx.b) < 2 { - continue - } - left := tx.a + lfill - right := tx.b + rfill - lim := len(tx.a) + len(tx.b) - diffs, lcs := compute(stringSeqs{left, right}, twosided, lim) - check(t, left, lcs, tx.lcs) - checkDiffs(t, left, diffs, right) - diffs, lcs = compute(stringSeqs{right, left}, twosided, lim) - check(t, right, lcs, tx.lcs) - checkDiffs(t, right, diffs, left) - - left = lfill + tx.a - right = rfill + tx.b - diffs, lcs = compute(stringSeqs{left, right}, twosided, lim) - check(t, left, lcs, tx.lcs) - checkDiffs(t, left, diffs, right) - diffs, lcs = compute(stringSeqs{right, left}, twosided, lim) - check(t, right, lcs, tx.lcs) - checkDiffs(t, right, diffs, left) - } -} - -func TestSpecialOld(t *testing.T) { // exercises lcs.fix - a := "golang.org/x/tools/intern" - b := "github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/intern" - diffs, lcs := compute(stringSeqs{a, b}, twosided, 4) - if !lcs.valid() { - t.Errorf("%d,%v", len(diffs), lcs) - } -} - -func TestRegressionOld001(t *testing.T) { - a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/aymanbagabas/go-udiff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n" - - b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/aymanbagabas/go-udiff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n" - for i := 1; i < len(b); i++ { - diffs, lcs := compute(stringSeqs{a, b}, twosided, i) // 14 from gopls - if !lcs.valid() { - t.Errorf("%d,%v", len(diffs), lcs) - } - checkDiffs(t, a, diffs, b) - } -} - -func TestRegressionOld002(t *testing.T) { - a := "n\"\n)\n" - b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n" - for i := 1; i <= len(b); i++ { - diffs, lcs := compute(stringSeqs{a, b}, twosided, i) - if !lcs.valid() { - t.Errorf("%d,%v", len(diffs), lcs) - } - checkDiffs(t, a, diffs, b) - } -} - -func TestRegressionOld003(t *testing.T) { - a := "golang.org/x/hello v1.0.0\nrequire golang.org/x/unused v1" - b := "golang.org/x/hello v1" - for i := 1; i <= len(a); i++ { - diffs, lcs := compute(stringSeqs{a, b}, twosided, i) - if !lcs.valid() { - t.Errorf("%d,%v", len(diffs), lcs) - } - checkDiffs(t, a, diffs, b) - } -} - -func TestRandOld(t *testing.T) { - for i := 0; i < 1000; i++ { - // TODO(adonovan): use ASCII and bytesSeqs here? The use of - // non-ASCII isn't relevant to the property exercised by the test. - a := []rune(randstr("abω", 16)) - b := []rune(randstr("abωc", 16)) - seq := runesSeqs{a, b} - - const lim = 24 // large enough to get true lcs - _, forw := compute(seq, forward, lim) - _, back := compute(seq, backward, lim) - _, two := compute(seq, twosided, lim) - if lcslen(two) != lcslen(forw) || lcslen(forw) != lcslen(back) { - t.Logf("\n%v\n%v\n%v", forw, back, two) - t.Fatalf("%d forw:%d back:%d two:%d", i, lcslen(forw), lcslen(back), lcslen(two)) - } - if !two.valid() || !forw.valid() || !back.valid() { - t.Errorf("check failure") - } - } -} - -// TestDiffAPI tests the public API functions (Diff{Bytes,Strings,Runes}) -// to ensure at least minimal parity of the three representations. -func TestDiffAPI(t *testing.T) { - for _, test := range []struct { - a, b string - wantStrings, wantBytes, wantRunes string - }{ - {"abcXdef", "abcxdef", "[{3 4 3 4}]", "[{3 4 3 4}]", "[{3 4 3 4}]"}, // ASCII - {"abcωdef", "abcΩdef", "[{3 5 3 5}]", "[{3 5 3 5}]", "[{3 4 3 4}]"}, // non-ASCII - } { - - gotStrings := fmt.Sprint(DiffStrings(test.a, test.b)) - if gotStrings != test.wantStrings { - t.Errorf("DiffStrings(%q, %q) = %v, want %v", - test.a, test.b, gotStrings, test.wantStrings) - } - gotBytes := fmt.Sprint(DiffBytes([]byte(test.a), []byte(test.b))) - if gotBytes != test.wantBytes { - t.Errorf("DiffBytes(%q, %q) = %v, want %v", - test.a, test.b, gotBytes, test.wantBytes) - } - gotRunes := fmt.Sprint(DiffRunes([]rune(test.a), []rune(test.b))) - if gotRunes != test.wantRunes { - t.Errorf("DiffRunes(%q, %q) = %v, want %v", - test.a, test.b, gotRunes, test.wantRunes) - } - } -} - -func BenchmarkTwoOld(b *testing.B) { - tests := genBench("abc", 96) - for i := 0; i < b.N; i++ { - for _, tt := range tests { - _, two := compute(stringSeqs{tt.before, tt.after}, twosided, 100) - if !two.valid() { - b.Error("check failed") - } - } - } -} - -func BenchmarkForwOld(b *testing.B) { - tests := genBench("abc", 96) - for i := 0; i < b.N; i++ { - for _, tt := range tests { - _, two := compute(stringSeqs{tt.before, tt.after}, forward, 100) - if !two.valid() { - b.Error("check failed") - } - } - } -} - -func genBench(set string, n int) []struct{ before, after string } { - // before and after for benchmarks. 24 strings of length n with - // before and after differing at least once, and about 5% - var ans []struct{ before, after string } - for i := 0; i < 24; i++ { - // maybe b should have an approximately known number of diffs - a := randstr(set, n) - cnt := 0 - bb := make([]rune, 0, n) - for _, r := range a { - if rand.Float64() < .05 { - cnt++ - r = 'N' - } - bb = append(bb, r) - } - if cnt == 0 { - // avoid == shortcut - bb[n/2] = 'N' - } - ans = append(ans, struct{ before, after string }{a, string(bb)}) - } - return ans -} - -// This benchmark represents a common case for a diff command: -// large file with a single relatively small diff in the middle. -// (It's not clear whether this is representative of gopls workloads -// or whether it is important to gopls diff performance.) -// -// TODO(adonovan) opt: it could be much faster. For example, -// comparing a file against itself is about 10x faster than with the -// small deletion in the middle. Strangely, comparing a file against -// itself minus the last byte is faster still; I don't know why. -// There is much low-hanging fruit here for further improvement. -func BenchmarkLargeFileSmallDiff(b *testing.B) { - data, err := os.ReadFile("old.go") // large file - if err != nil { - log.Fatal(err) - } - - n := len(data) - - src := string(data) - dst := src[:n*49/100] + src[n*51/100:] // remove 2% from the middle - b.Run("string", func(b *testing.B) { - for i := 0; i < b.N; i++ { - compute(stringSeqs{src, dst}, twosided, len(src)+len(dst)) - } - }) - - srcBytes := []byte(src) - dstBytes := []byte(dst) - b.Run("bytes", func(b *testing.B) { - for i := 0; i < b.N; i++ { - compute(bytesSeqs{srcBytes, dstBytes}, twosided, len(srcBytes)+len(dstBytes)) - } - }) - - srcRunes := []rune(src) - dstRunes := []rune(dst) - b.Run("runes", func(b *testing.B) { - for i := 0; i < b.N; i++ { - compute(runesSeqs{srcRunes, dstRunes}, twosided, len(srcRunes)+len(dstRunes)) - } - }) -} diff --git a/modules/udiff/lcs/sequence.go b/modules/udiff/lcs/sequence.go deleted file mode 100644 index 2d72d26..0000000 --- a/modules/udiff/lcs/sequence.go +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package lcs - -// This file defines the abstract sequence over which the LCS algorithm operates. - -// sequences abstracts a pair of sequences, A and B. -type sequences interface { - lengths() (int, int) // len(A), len(B) - commonPrefixLen(ai, aj, bi, bj int) int // len(commonPrefix(A[ai:aj], B[bi:bj])) - commonSuffixLen(ai, aj, bi, bj int) int // len(commonSuffix(A[ai:aj], B[bi:bj])) -} - -type stringSeqs struct{ a, b string } - -func (s stringSeqs) lengths() (int, int) { return len(s.a), len(s.b) } -func (s stringSeqs) commonPrefixLen(ai, aj, bi, bj int) int { - return commonPrefixLenString(s.a[ai:aj], s.b[bi:bj]) -} -func (s stringSeqs) commonSuffixLen(ai, aj, bi, bj int) int { - return commonSuffixLenString(s.a[ai:aj], s.b[bi:bj]) -} - -// The explicit capacity in s[i:j:j] leads to more efficient code. - -type bytesSeqs struct{ a, b []byte } - -func (s bytesSeqs) lengths() (int, int) { return len(s.a), len(s.b) } -func (s bytesSeqs) commonPrefixLen(ai, aj, bi, bj int) int { - return commonPrefixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj]) -} -func (s bytesSeqs) commonSuffixLen(ai, aj, bi, bj int) int { - return commonSuffixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj]) -} - -type runesSeqs struct{ a, b []rune } - -func (s runesSeqs) lengths() (int, int) { return len(s.a), len(s.b) } -func (s runesSeqs) commonPrefixLen(ai, aj, bi, bj int) int { - return commonPrefixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj]) -} -func (s runesSeqs) commonSuffixLen(ai, aj, bi, bj int) int { - return commonSuffixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj]) -} - -// TODO(adonovan): optimize these functions using ideas from: -// - https://go.dev/cl/408116 common.go -// - https://go.dev/cl/421435 xor_generic.go - -// TODO(adonovan): factor using generics when available, -// but measure performance impact. - -// commonPrefixLen* returns the length of the common prefix of a[ai:aj] and b[bi:bj]. -func commonPrefixLenBytes(a, b []byte) int { - n := min(len(a), len(b)) - i := 0 - for i < n && a[i] == b[i] { - i++ - } - return i -} -func commonPrefixLenRunes(a, b []rune) int { - n := min(len(a), len(b)) - i := 0 - for i < n && a[i] == b[i] { - i++ - } - return i -} -func commonPrefixLenString(a, b string) int { - n := min(len(a), len(b)) - i := 0 - for i < n && a[i] == b[i] { - i++ - } - return i -} - -// commonSuffixLen* returns the length of the common suffix of a[ai:aj] and b[bi:bj]. -func commonSuffixLenBytes(a, b []byte) int { - n := min(len(a), len(b)) - i := 0 - for i < n && a[len(a)-1-i] == b[len(b)-1-i] { - i++ - } - return i -} -func commonSuffixLenRunes(a, b []rune) int { - n := min(len(a), len(b)) - i := 0 - for i < n && a[len(a)-1-i] == b[len(b)-1-i] { - i++ - } - return i -} -func commonSuffixLenString(a, b string) int { - n := min(len(a), len(b)) - i := 0 - for i < n && a[len(a)-1-i] == b[len(b)-1-i] { - i++ - } - return i -} - -func min(x, y int) int { - if x < y { - return x - } else { - return y - } -} diff --git a/modules/udiff/myers/diff.go b/modules/udiff/myers/diff.go deleted file mode 100644 index 3aab0ff..0000000 --- a/modules/udiff/myers/diff.go +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package myers implements the Myers diff algorithm. -package myers - -import ( - "strings" - - diff "github.com/antgroup/hugescm/modules/udiff" -) - -// Sources: -// https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/ -// https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2 - -// ComputeEdits returns the diffs of two strings using a simple -// line-based implementation, like [diff.Strings]. -// -// Deprecated: this implementation is moribund. However, when diffs -// appear in marker test expectations, they are the particular diffs -// produced by this implementation. The marker test framework -// asserts diff(orig, got)==wantDiff, but ideally it would compute -// got==apply(orig, wantDiff) so that the notation of the diff -// is immaterial. -func ComputeEdits(before, after string) []diff.Edit { - beforeLines := splitLines(before) - ops := operations(beforeLines, splitLines(after)) - - // Build a table mapping line number to offset. - lineOffsets := make([]int, 0, len(beforeLines)+1) - total := 0 - for i := range beforeLines { - lineOffsets = append(lineOffsets, total) - total += len(beforeLines[i]) - } - lineOffsets = append(lineOffsets, total) // EOF - - edits := make([]diff.Edit, 0, len(ops)) - for _, op := range ops { - start, end := lineOffsets[op.I1], lineOffsets[op.I2] - switch op.Kind { - case opDelete: - // Delete: before[I1:I2] is deleted. - edits = append(edits, diff.Edit{Start: start, End: end}) - case opInsert: - // Insert: after[J1:J2] is inserted at before[I1:I1]. - if content := strings.Join(op.Content, ""); content != "" { - edits = append(edits, diff.Edit{Start: start, End: end, New: content}) - } - } - } - return edits -} - -// opKind is used to denote the type of operation a line represents. -type opKind int - -const ( - opDelete opKind = iota // line deleted from input (-) - opInsert // line inserted into output (+) - opEqual // line present in input and output -) - -func (kind opKind) String() string { - switch kind { - case opDelete: - return "delete" - case opInsert: - return "insert" - case opEqual: - return "equal" - default: - panic("unknown opKind") - } -} - -type operation struct { - Kind opKind - Content []string // content from b - I1, I2 int // indices of the line in a - J1 int // indices of the line in b, J2 implied by len(Content) -} - -// operations returns the list of operations to convert a into b, consolidating -// operations for multiple lines and not including equal lines. -func operations(a, b []string) []*operation { - if len(a) == 0 && len(b) == 0 { - return nil - } - - trace, offset := shortestEditSequence(a, b) - snakes := backtrack(trace, len(a), len(b), offset) - - M, N := len(a), len(b) - - var i int - solution := make([]*operation, len(a)+len(b)) - - add := func(op *operation, i2, j2 int) { - if op == nil { - return - } - op.I2 = i2 - if op.Kind == opInsert { - op.Content = b[op.J1:j2] - } - solution[i] = op - i++ - } - x, y := 0, 0 - for _, snake := range snakes { - if len(snake) < 2 { - continue - } - var op *operation - // delete (horizontal) - for snake[0]-snake[1] > x-y { - if op == nil { - op = &operation{ - Kind: opDelete, - I1: x, - J1: y, - } - } - x++ - if x == M { - break - } - } - add(op, x, y) - op = nil - // insert (vertical) - for snake[0]-snake[1] < x-y { - if op == nil { - op = &operation{ - Kind: opInsert, - I1: x, - J1: y, - } - } - y++ - } - add(op, x, y) - op = nil - // equal (diagonal) - for x < snake[0] { - x++ - y++ - } - if x >= M && y >= N { - break - } - } - return solution[:i] -} - -// backtrack uses the trace for the edit sequence computation and returns the -// "snakes" that make up the solution. A "snake" is a single deletion or -// insertion followed by zero or diagonals. -func backtrack(trace [][]int, x, y, offset int) [][]int { - snakes := make([][]int, len(trace)) - d := len(trace) - 1 - for ; x > 0 && y > 0 && d > 0; d-- { - V := trace[d] - if len(V) == 0 { - continue - } - snakes[d] = []int{x, y} - - k := x - y - - var kPrev int - if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) { - kPrev = k + 1 - } else { - kPrev = k - 1 - } - - x = V[kPrev+offset] - y = x - kPrev - } - if x < 0 || y < 0 { - return snakes - } - snakes[d] = []int{x, y} - return snakes -} - -// shortestEditSequence returns the shortest edit sequence that converts a into b. -func shortestEditSequence(a, b []string) ([][]int, int) { - M, N := len(a), len(b) - V := make([]int, 2*(N+M)+1) - offset := N + M - trace := make([][]int, N+M+1) - - // Iterate through the maximum possible length of the SES (N+M). - for d := 0; d <= N+M; d++ { - copyV := make([]int, len(V)) - // k lines are represented by the equation y = x - k. We move in - // increments of 2 because end points for even d are on even k lines. - for k := -d; k <= d; k += 2 { - // At each point, we either go down or to the right. We go down if - // k == -d, and we go to the right if k == d. We also prioritize - // the maximum x value, because we prefer deletions to insertions. - var x int - if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) { - x = V[k+1+offset] // down - } else { - x = V[k-1+offset] + 1 // right - } - - y := x - k - - // Diagonal moves while we have equal contents. - for x < M && y < N && a[x] == b[y] { - x++ - y++ - } - - V[k+offset] = x - - // Return if we've exceeded the maximum values. - if x == M && y == N { - // Makes sure to save the state of the array before returning. - copy(copyV, V) - trace[d] = copyV - return trace, offset - } - } - - // Save the state of the array. - copy(copyV, V) - trace[d] = copyV - } - return nil, 0 -} - -func splitLines(text string) []string { - lines := strings.SplitAfter(text, "\n") - if lines[len(lines)-1] == "" { - lines = lines[:len(lines)-1] - } - return lines -} diff --git a/modules/udiff/myers/diff_test.go b/modules/udiff/myers/diff_test.go deleted file mode 100644 index ec5e2f6..0000000 --- a/modules/udiff/myers/diff_test.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package myers_test - -import ( - "testing" - - "github.com/antgroup/hugescm/modules/udiff/difftest" - "github.com/antgroup/hugescm/modules/udiff/myers" -) - -func TestDiff(t *testing.T) { - difftest.DiffTest(t, myers.ComputeEdits) -} diff --git a/modules/udiff/ndiff.go b/modules/udiff/ndiff.go deleted file mode 100644 index c45ec2a..0000000 --- a/modules/udiff/ndiff.go +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package udiff - -import ( - "bytes" - "unicode/utf8" - - "github.com/antgroup/hugescm/modules/udiff/lcs" -) - -// Strings computes the differences between two strings. -// The resulting edits respect rune boundaries. -func Strings(before, after string) []Edit { - if before == after { - return nil // common case - } - - if isASCII(before) && isASCII(after) { - // TODO(adonovan): opt: specialize diffASCII for strings. - return diffASCII([]byte(before), []byte(after)) - } - return diffRunes([]rune(before), []rune(after)) -} - -// Bytes computes the differences between two byte slices. -// The resulting edits respect rune boundaries. -func Bytes(before, after []byte) []Edit { - if bytes.Equal(before, after) { - return nil // common case - } - - if isASCII(before) && isASCII(after) { - return diffASCII(before, after) - } - return diffRunes(runes(before), runes(after)) -} - -func diffASCII(before, after []byte) []Edit { - diffs := lcs.DiffBytes(before, after) - - // Convert from LCS diffs. - res := make([]Edit, len(diffs)) - for i, d := range diffs { - res[i] = Edit{d.Start, d.End, string(after[d.ReplStart:d.ReplEnd])} - } - return res -} - -func diffRunes(before, after []rune) []Edit { - diffs := lcs.DiffRunes(before, after) - - // The diffs returned by the lcs package use indexes - // into whatever slice was passed in. - // Convert rune offsets to byte offsets. - res := make([]Edit, len(diffs)) - lastEnd := 0 - utf8Len := 0 - for i, d := range diffs { - utf8Len += runesLen(before[lastEnd:d.Start]) // text between edits - start := utf8Len - utf8Len += runesLen(before[d.Start:d.End]) // text deleted by this edit - res[i] = Edit{start, utf8Len, string(after[d.ReplStart:d.ReplEnd])} - lastEnd = d.End - } - return res -} - -// runes is like []rune(string(bytes)) without the duplicate allocation. -func runes(bytes []byte) []rune { - n := utf8.RuneCount(bytes) - runes := make([]rune, n) - for i := 0; i < n; i++ { - r, sz := utf8.DecodeRune(bytes) - bytes = bytes[sz:] - runes[i] = r - } - return runes -} - -// runesLen returns the length in bytes of the UTF-8 encoding of runes. -func runesLen(runes []rune) (len int) { - for _, r := range runes { - len += utf8.RuneLen(r) - } - return len -} - -// isASCII reports whether s contains only ASCII. -func isASCII[S string | []byte](s S) bool { - for i := 0; i < len(s); i++ { - if s[i] >= utf8.RuneSelf { - return false - } - } - return true -} diff --git a/modules/udiff/unified.go b/modules/udiff/unified.go deleted file mode 100644 index 268ad22..0000000 --- a/modules/udiff/unified.go +++ /dev/null @@ -1,251 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package udiff - -import ( - "fmt" - "log" - "strings" -) - -// DefaultContextLines is the number of unchanged lines of surrounding -// context displayed by Unified. Use ToUnified to specify a different value. -const DefaultContextLines = 3 - -// Unified returns a unified diff of the old and new strings. -// The old and new labels are the names of the old and new files. -// If the strings are equal, it returns the empty string. -func Unified(oldLabel, newLabel, old, new string) string { - edits := Strings(old, new) - unified, err := ToUnified(oldLabel, newLabel, old, edits, DefaultContextLines) - if err != nil { - // Can't happen: edits are consistent. - log.Fatalf("internal error in diff.Unified: %v", err) - } - return unified -} - -// ToUnified applies the edits to content and returns a unified diff, -// with contextLines lines of (unchanged) context around each diff hunk. -// The old and new labels are the names of the content and result files. -// It returns an error if the edits are inconsistent; see ApplyEdits. -func ToUnified(oldLabel, newLabel, content string, edits []Edit, contextLines int) (string, error) { - u, err := toUnified(oldLabel, newLabel, content, edits, contextLines) - if err != nil { - return "", err - } - return u.String(), nil -} - -// unified represents a set of edits as a unified diff. -type unified struct { - // From is the name of the original file. - From string - // To is the name of the modified file. - To string - // Hunks is the set of edit Hunks needed to transform the file content. - Hunks []*hunk -} - -// Hunk represents a contiguous set of line edits to apply. -type hunk struct { - // The line in the original source where the hunk starts. - FromLine int - // The line in the original source where the hunk finishes. - ToLine int - // The set of line based edits to apply. - Lines []line -} - -// Line represents a single line operation to apply as part of a Hunk. -type line struct { - // Kind is the type of line this represents, deletion, insertion or copy. - Kind OpKind - // Content is the Content of this line. - // For deletion it is the line being removed, for all others it is the line - // to put in the output. - Content string -} - -// OpKind is used to denote the type of operation a line represents. -type OpKind int - -const ( - // Delete is the operation kind for a line that is present in the input - // but not in the output. - Delete OpKind = iota - // Insert is the operation kind for a line that is new in the output. - Insert - // Equal is the operation kind for a line that is the same in the input and - // output, often used to provide context around edited lines. - Equal -) - -// String returns a human readable representation of an OpKind. It is not -// intended for machine processing. -func (k OpKind) String() string { - switch k { - case Delete: - return "delete" - case Insert: - return "insert" - case Equal: - return "equal" - default: - panic("unknown operation kind") - } -} - -// toUnified takes a file contents and a sequence of edits, and calculates -// a unified diff that represents those edits. -func toUnified(fromName, toName string, content string, edits []Edit, contextLines int) (unified, error) { - gap := contextLines * 2 - u := unified{ - From: fromName, - To: toName, - } - if len(edits) == 0 { - return u, nil - } - var err error - edits, err = lineEdits(content, edits) // expand to whole lines - if err != nil { - return u, err - } - lines := splitLines(content) - var h *hunk - last := 0 - toLine := 0 - for _, edit := range edits { - // Compute the zero-based line numbers of the edit start and end. - // TODO(adonovan): opt: compute incrementally, avoid O(n^2). - start := strings.Count(content[:edit.Start], "\n") - end := strings.Count(content[:edit.End], "\n") - if edit.End == len(content) && len(content) > 0 && content[len(content)-1] != '\n' { - end++ // EOF counts as an implicit newline - } - - switch { - case h != nil && start == last: - // direct extension - case h != nil && start <= last+gap: - // within range of previous lines, add the joiners - addEqualLines(h, lines, last, start) - default: - // need to start a new hunk - if h != nil { - // add the edge to the previous hunk - addEqualLines(h, lines, last, last+contextLines) - u.Hunks = append(u.Hunks, h) - } - toLine += start - last - h = &hunk{ - FromLine: start + 1, - ToLine: toLine + 1, - } - // add the edge to the new hunk - delta := addEqualLines(h, lines, start-contextLines, start) - h.FromLine -= delta - h.ToLine -= delta - } - last = start - for i := start; i < end; i++ { - h.Lines = append(h.Lines, line{Kind: Delete, Content: lines[i]}) - last++ - } - if edit.New != "" { - for _, content := range splitLines(edit.New) { - h.Lines = append(h.Lines, line{Kind: Insert, Content: content}) - toLine++ - } - } - } - if h != nil { - // add the edge to the final hunk - addEqualLines(h, lines, last, last+contextLines) - u.Hunks = append(u.Hunks, h) - } - return u, nil -} - -func splitLines(text string) []string { - lines := strings.SplitAfter(text, "\n") - if lines[len(lines)-1] == "" { - lines = lines[:len(lines)-1] - } - return lines -} - -func addEqualLines(h *hunk, lines []string, start, end int) int { - delta := 0 - for i := start; i < end; i++ { - if i < 0 { - continue - } - if i >= len(lines) { - return delta - } - h.Lines = append(h.Lines, line{Kind: Equal, Content: lines[i]}) - delta++ - } - return delta -} - -// String converts a unified diff to the standard textual form for that diff. -// The output of this function can be passed to tools like patch. -func (u unified) String() string { - if len(u.Hunks) == 0 { - return "" - } - b := new(strings.Builder) - fmt.Fprintf(b, "--- %s\n", u.From) - fmt.Fprintf(b, "+++ %s\n", u.To) - for _, hunk := range u.Hunks { - fromCount, toCount := 0, 0 - for _, l := range hunk.Lines { - switch l.Kind { - case Delete: - fromCount++ - case Insert: - toCount++ - default: - fromCount++ - toCount++ - } - } - fmt.Fprint(b, "@@") - if fromCount > 1 { - fmt.Fprintf(b, " -%d,%d", hunk.FromLine, fromCount) - } else if hunk.FromLine == 1 && fromCount == 0 { - // Match odd GNU diff -u behavior adding to empty file. - fmt.Fprintf(b, " -0,0") - } else { - fmt.Fprintf(b, " -%d", hunk.FromLine) - } - if toCount > 1 { - fmt.Fprintf(b, " +%d,%d", hunk.ToLine, toCount) - } else if hunk.ToLine == 1 && toCount == 0 { - // Match odd GNU diff -u behavior adding to empty file. - fmt.Fprintf(b, " +0,0") - } else { - fmt.Fprintf(b, " +%d", hunk.ToLine) - } - fmt.Fprint(b, " @@\n") - for _, l := range hunk.Lines { - switch l.Kind { - case Delete: - fmt.Fprintf(b, "-%s", l.Content) - case Insert: - fmt.Fprintf(b, "+%s", l.Content) - default: - fmt.Fprintf(b, " %s", l.Content) - } - if !strings.HasSuffix(l.Content, "\n") { - fmt.Fprintf(b, "\n\\ No newline at end of file\n") - } - } - } - return b.String() -}