diff --git a/.gitignore b/.gitignore index 529f7a6..da16254 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ bin/* local/* .DS_Store *.gop1 +*.tomlp1 *.rej /out/ /.vscode/ diff --git a/cmd/zeta/main.go b/cmd/zeta/main.go index a698aa5..3b74974 100644 --- a/cmd/zeta/main.go +++ b/cmd/zeta/main.go @@ -52,6 +52,7 @@ type App struct { MergeBase command.MergeBase `cmd:"merge-base" help:"Find optimal common ancestors for merge"` LsFiles command.LsFiles `cmd:"ls-files" help:"Show information about files in the index and the working tree"` HashObject command.HashObject `cmd:"hash-object" help:"Compute hash or create object"` + MergeFile command.MergeFile `cmd:"merge-file" help:"Run a three-way file merge"` Version command.Version `cmd:"version" help:"Display version information"` Debug bool `name:"debug" help:"Enable debug mode; analyze timing"` } diff --git a/modules/diferenco/diferenco.go b/modules/diferenco/diferenco.go index 1552d11..36babad 100644 --- a/modules/diferenco/diferenco.go +++ b/modules/diferenco/diferenco.go @@ -3,8 +3,10 @@ package diferenco import ( "context" "errors" + "fmt" "io" "slices" + "strings" ) // https://github.com/Wilfred/difftastic/wiki/Line-Based-Diffs @@ -36,27 +38,42 @@ const ( Patience ) +var ( + ErrUnsupportedAlgorithm = errors.New("unsupport algorithm") +) + +var ( + algorithmValueMap = map[string]Algorithm{ + "histogram": Histogram, + "onp": ONP, + "myers": Myers, + "patience": Patience, + "minimal": Minimal, + } + algorithmNameMap = map[Algorithm]string{ + Unspecified: "unspecified", + Histogram: "histogram", + ONP: "onp", + Myers: "myers", + Minimal: "minimal", + Patience: "patience", + } +) + func (a Algorithm) String() string { - switch a { - case Unspecified: - return "Unspecified" - case Histogram: - return "Histogram" - case Myers: - return "Myers" - case Minimal: - return "Minimal" - case ONP: - return "O(NP)" - case Patience: - return "Patience" + n, ok := algorithmNameMap[a] + if ok { + return n } - return "Unknown" + return "unspecified" } -var ( - ErrUnsupportedAlgorithm = errors.New("unsupport algorithm") -) +func AlgorithmFromName(s string) (Algorithm, error) { + if a, ok := algorithmValueMap[strings.ToLower(s)]; ok { + return a, nil + } + return Unspecified, fmt.Errorf("unsupport algoritm '%s' %w", s, ErrUnsupportedAlgorithm) +} // commonPrefixLength returns the length of the common prefix of two T slices. func commonPrefixLength[E comparable](a, b []E) int { @@ -139,10 +156,10 @@ type Options struct { From, To *File S1, S2 string R1, R2 io.Reader - A Algorithm + A Algorithm // algorithm } -func diffInternal(ctx context.Context, L1, L2 []int, a Algorithm) ([]Change, error) { +func diffInternal[E comparable](ctx context.Context, L1, L2 []E, a Algorithm) ([]Change, error) { if a == Unspecified { switch { case len(L1) < 5000 && len(L2) < 5000: diff --git a/modules/diferenco/merge.go b/modules/diferenco/merge.go index d378927..f5b932a 100644 --- a/modules/diferenco/merge.go +++ b/modules/diferenco/merge.go @@ -16,8 +16,9 @@ package diferenco import ( "context" + "errors" "fmt" - "slices" + "io" "sort" "strings" ) @@ -68,114 +69,10 @@ const ( Sep2 = "=======" // Sep3 signifies the end of a conflict. Sep3 = ">>>>>>>" + // SepO origin content + SepO = "|||||||" ) -type candidate struct { - file1index int - file2index int - chain *candidate -} - -// Text diff algorithm following Hunt and McIlroy 1976. -// J. W. Hunt and M. D. McIlroy, An algorithm for differential file -// comparison, Bell Telephone Laboratories CSTR #41 (1976) -// http://www.cs.dartmouth.edu/~doug/ -func d3Lcs[E comparable](file1, file2 []E) *candidate { - var equivalenceClasses map[E][]int - var file2indices []int - - var candidates []*candidate - var line E - var c *candidate - var i, j, jX, r, s int - - equivalenceClasses = make(map[E][]int) - for j = 0; j < len(file2); j++ { - line = file2[j] - equivalenceClasses[line] = append(equivalenceClasses[line], j) - } - - candidates = append(candidates, &candidate{file1index: -1, file2index: -1, chain: nil}) - - for i = 0; i < len(file1); i++ { - line = file1[i] - file2indices = equivalenceClasses[line] // || [] - - r = 0 - c = candidates[0] - - for jX = 0; jX < len(file2indices); jX++ { - j = file2indices[jX] - - for s = r; s < len(candidates); s++ { - if (candidates[s].file2index < j) && ((s == len(candidates)-1) || (candidates[s+1].file2index > j)) { - break - } - } - - if s < len(candidates) { - newCandidate := &candidate{file1index: i, file2index: j, chain: candidates[s]} - if r == len(candidates) { - candidates = append(candidates, c) - } else { - candidates[r] = c - } - r = s + 1 - c = newCandidate - if r == len(candidates) { - break // no point in examining further (j)s - } - } - } - - if r == len(candidates) { - candidates = append(candidates, c) - } else { - if r > len(candidates) { - panic("out of range") - } else { - candidates[r] = c - } - } - } - - // At this point, we know the LCS: it's in the reverse of the - // linked-list through .chain of candidates[candidates.length - 1]. - - return candidates[len(candidates)-1] -} - -type diffIndicesResult struct { - file1 []int - file2 []int -} - -// We apply the LCS to give a simple representation of the -// offsets and lengths of mismatched chunks in the input -// files. This is used by diff3MergeIndices below. -func diffIndices[E comparable](file1, file2 []E) []*diffIndicesResult { - var result []*diffIndicesResult - tail1 := len(file1) - tail2 := len(file2) - - for candidate := d3Lcs(file1, file2); candidate != nil; candidate = candidate.chain { - mismatchLength1 := tail1 - candidate.file1index - 1 - mismatchLength2 := tail2 - candidate.file2index - 1 - tail1 = candidate.file1index - tail2 = candidate.file2index - - if mismatchLength1 != 0 || mismatchLength2 != 0 { - result = append(result, &diffIndicesResult{ - file1: []int{tail1 + 1, mismatchLength1}, - file2: []int{tail2 + 1, mismatchLength2}, - }) - } - } - - slices.Reverse(result) - return result -} - type hunk [5]int type hunkList []*hunk @@ -194,13 +91,18 @@ func (h hunkList) Less(i, j int) bool { return h[i][0] < h[j][0] } // Computer Science (FSTTCS), December 2007. // // (http://www.cis.upenn.edu/~bcpierce/papers/diff3-short.pdf) -func diff3MergeIndices[E comparable](a, o, b []E) [][]int { - m1 := diffIndices(o, a) - m2 := diffIndices(o, b) - +func diff3MergeIndices[E comparable](ctx context.Context, o, a, b []E, algo Algorithm) ([][]int, error) { + m1, err := diffInternal(ctx, o, a, algo) + if err != nil { + return nil, err + } + m2, err := diffInternal(ctx, o, b, algo) + if err != nil { + return nil, err + } var hunks []*hunk - addHunk := func(h *diffIndicesResult, side int) { - hunks = append(hunks, &hunk{h.file1[0], side, h.file1[1], h.file2[0], h.file2[1]}) + addHunk := func(h Change, side int) { + hunks = append(hunks, &hunk{h.P1, side, h.Del, h.P2, h.Ins}) } for i := 0; i < len(m1); i++ { addHunk(m1[i], 0) @@ -276,7 +178,7 @@ func diff3MergeIndices[E comparable](a, o, b []E) [][]int { } copyCommon(len(o)) - return result + return result, nil } // Conflict describes a merge conflict @@ -298,10 +200,13 @@ type Diff3MergeResult[E comparable] struct { // Diff3Merge applies the output of diff3MergeIndices to actually // construct the merged file; the returned result alternates // between 'ok' and 'conflict' blocks. -func Diff3Merge[E comparable](a, o, b []E, excludeFalseConflicts bool) []*Diff3MergeResult[E] { +func Diff3Merge[E comparable](ctx context.Context, o, a, b []E, algo Algorithm, excludeFalseConflicts bool) ([]*Diff3MergeResult[E], error) { var result []*Diff3MergeResult[E] files := [][]E{a, o, b} - indices := diff3MergeIndices(a, o, b) + indices, err := diff3MergeIndices(ctx, o, a, b, algo) + if err != nil { + return nil, err + } var okLines []E flushOk := func() { @@ -356,43 +261,178 @@ func Diff3Merge[E comparable](a, o, b []E, excludeFalseConflicts bool) []*Diff3M } flushOk() - return result + return result, nil +} + +const ( + // Only show the zealously minified conflicting lines of the local changes and the incoming (other) changes, + // hiding the base version entirely. + // + // ```text + // line1-changed-by-both + // <<<<<<< local + // line2-to-be-changed-in-incoming + // ======= + // line2-changed + // >>>>>>> incoming + // ``` + STYLE_DEFAULT = iota + // Show non-minimized hunks of local changes, the base, and the incoming (other) changes. + // + // This mode does not hide any information. + // + // ```text + // <<<<<<< local + // line1-changed-by-both + // line2-to-be-changed-in-incoming + // ||||||| 9a8d80c + // line1-to-be-changed-by-both + // line2-to-be-changed-in-incoming + // ======= + // line1-changed-by-both + // line2-changed + // >>>>>>> incoming + // ``` + STYLE_DIFF3 + // Like diff3, but will show *minimized* hunks of local change and the incoming (other) changes, + // as well as non-minimized hunks of the base. + // + // ```text + // line1-changed-by-both + // <<<<<<< local + // line2-to-be-changed-in-incoming + // ||||||| 9a8d80c + // line1-to-be-changed-by-both + // line2-to-be-changed-in-incoming + // ======= + // line2-changed + // >>>>>>> incoming + // ``` + STYLE_ZEALOUS_DIFF3 +) + +var ( + styles = map[string]int{ + "merge": STYLE_DEFAULT, + "diff3": STYLE_DIFF3, + "zdiff3": STYLE_ZEALOUS_DIFF3, + } +) + +func ParseConflictStyle(s string) int { + if s, ok := styles[strings.ToLower(s)]; ok { + return s + } + return STYLE_DEFAULT +} + +type MergeOptions struct { + TextO, TextA, TextB string + RO, R1, R2 io.Reader // when if set + LabelO, LabelA, LabelB string + A Algorithm + Style int // Conflict Style +} + +func (opts *MergeOptions) ValidateOptions() error { + if opts == nil { + return errors.New("invalid merge options") + } + if opts.A == Unspecified { + opts.A = Histogram + } + if len(opts.LabelO) != 0 { + opts.LabelO = " " + opts.LabelO + } + if len(opts.LabelA) != 0 { + opts.LabelA = " " + opts.LabelA + } + if len(opts.LabelB) != 0 { + opts.LabelB = " " + opts.LabelB + } + return nil +} + +func (s *Sink) writeConflict(out io.Writer, opts *MergeOptions, conflict *Conflict[int]) { + if opts.Style == STYLE_DIFF3 { + fmt.Fprintf(out, "%s%s\n", Sep1, opts.LabelA) + s.WriteLine(out, conflict.a...) + fmt.Fprintf(out, "%s%s\n", SepO, opts.LabelO) + s.WriteLine(out, conflict.o...) + fmt.Fprintf(out, "%s\n", Sep2) + s.WriteLine(out, conflict.b...) + fmt.Fprintf(out, "%s%s\n", Sep3, opts.LabelB) + return + } + a, b := conflict.a, conflict.b + prefix := commonPrefixLength(a, b) + s.WriteLine(out, a[:prefix]...) + a = a[prefix:] + b = b[prefix:] + suffix := commonSuffixLength(a, b) + fmt.Fprintf(out, "%s%s\n", Sep1, opts.LabelA) + s.WriteLine(out, a[:len(a)-suffix]...) + + if opts.Style == STYLE_ZEALOUS_DIFF3 { + // Zealous Diff3 + fmt.Fprintf(out, "%s%s\n", SepO, opts.LabelO) + s.WriteLine(out, conflict.o...) + } + + fmt.Fprintf(out, "%s\n", Sep2) + s.WriteLine(out, b[:len(b)-suffix]...) + fmt.Fprintf(out, "%s%s\n", Sep3, opts.LabelB) + if suffix != 0 { + s.WriteLine(out, b[suffix:]...) + } } // Merge implements the diff3 algorithm to merge two texts into a common base. -func Merge(ctx context.Context, o, a, b string, labelO, labelA, labelB string) (string, bool, error) { +// +// Support multiple diff algorithms and multiple conflict styles +func Merge(ctx context.Context, opts *MergeOptions) (string, bool, error) { + if err := opts.ValidateOptions(); err != nil { + return "", false, err + } select { case <-ctx.Done(): return "", false, ctx.Err() default: } - if len(labelA) != 0 { - labelA = " " + labelA + s := NewSink(NEWLINE_RAW) + slicesO, err := s.parseLines(opts.RO, opts.TextO) + if err != nil { + return "", false, err + } + slicesA, err := s.parseLines(opts.R1, opts.TextA) + if err != nil { + return "", false, err } - if len(labelB) != 0 { - labelB = " " + labelB + slicesB, err := s.parseLines(opts.R2, opts.TextB) + if err != nil { + return "", false, err + } + regions, err := Diff3Merge(ctx, slicesO, slicesA, slicesB, opts.A, true) + if err != nil { + return "", false, err } - sink := NewSink(NEWLINE_RAW) - slicesO := sink.SplitLines(o) - slicesA := sink.SplitLines(a) - slicesB := sink.SplitLines(b) - regions := Diff3Merge(slicesA, slicesO, slicesB, true) out := &strings.Builder{} - out.Grow(max(len(o), len(a), len(b))) + out.Grow(max(len(opts.TextO), len(opts.TextA), len(opts.TextB))) var conflicts = false for _, r := range regions { if r.ok != nil { - sink.WriteLine(out, r.ok...) + s.WriteLine(out, r.ok...) continue } if r.conflict != nil { conflicts = true - fmt.Fprintf(out, "%s%s\n", Sep1, labelA) - sink.WriteLine(out, r.conflict.a...) - fmt.Fprintf(out, "%s\n", Sep2) - sink.WriteLine(out, r.conflict.b...) - fmt.Fprintf(out, "%s%s\n", Sep3, labelB) + s.writeConflict(out, opts, r.conflict) } } return out.String(), conflicts, nil } + +// DefaultMerge implements the diff3 algorithm to merge two texts into a common base. +func DefaultMerge(ctx context.Context, o, a, b string, labelO, labelA, labelB string) (string, bool, error) { + return Merge(ctx, &MergeOptions{TextO: o, TextA: a, TextB: b, LabelO: labelO, LabelA: labelA, LabelB: labelB, A: Histogram}) +} diff --git a/modules/diferenco/merge_test.go b/modules/diferenco/merge_test.go index 88b11d5..5bcef04 100644 --- a/modules/diferenco/merge_test.go +++ b/modules/diferenco/merge_test.go @@ -32,12 +32,26 @@ tomatoes wine ` - content, conflict, err := Merge(context.Background(), textO, textA, textB, "o.txt", "a.txt", "b.txt") + content, conflict, err := DefaultMerge(context.Background(), textO, textA, textB, "o.txt", "a.txt", "b.txt") if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return } fmt.Fprintf(os.Stderr, "%s\nconflicts: %v\n", content, conflict) + + content, conflict, err = Merge(context.Background(), &MergeOptions{TextO: textO, TextA: textA, TextB: textB, LabelO: "o.txt", LabelA: "a.txt", LabelB: "b.txt", Style: STYLE_ZEALOUS_DIFF3}) + if err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err) + return + } + fmt.Fprintf(os.Stderr, "ZEALOUS_DIFF3\n%s\nconflicts: %v\n", content, conflict) + + content, conflict, err = Merge(context.Background(), &MergeOptions{TextO: textO, TextA: textA, TextB: textB, LabelO: "o.txt", LabelA: "a.txt", LabelB: "b.txt", Style: STYLE_DIFF3}) + if err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err) + return + } + fmt.Fprintf(os.Stderr, "DIFF3\n%s\nconflicts: %v\n", content, conflict) } func TestMerge2(t *testing.T) { @@ -57,7 +71,7 @@ onions wine ` - content, conflict, err := Merge(context.Background(), textO, textA, textA, "o.txt", "a.txt", "b.txt") + content, conflict, err := DefaultMerge(context.Background(), textO, textA, textA, "o.txt", "a.txt", "b.txt") if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return @@ -95,10 +109,25 @@ wine 77777 ` - content, conflict, err := Merge(context.Background(), textO, textA, textB, "o.txt", "a.txt", "b.txt") + content, conflict, err := DefaultMerge(context.Background(), textO, textA, textB, "o.txt", "a.txt", "b.txt") if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return } fmt.Fprintf(os.Stderr, "%s\nconflicts: %v\n", content, conflict) + + content, conflict, err = Merge(context.Background(), &MergeOptions{TextO: textO, TextA: textA, TextB: textB, LabelO: "o.txt", LabelA: "a.txt", LabelB: "b.txt", Style: STYLE_ZEALOUS_DIFF3}) + if err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err) + return + } + fmt.Fprintf(os.Stderr, "%s\nconflicts: %v\n", content, conflict) + + content, conflict, err = Merge(context.Background(), &MergeOptions{TextO: textO, TextA: textA, TextB: textB, LabelO: "o.txt", LabelA: "a.txt", LabelB: "b.txt", Style: STYLE_DIFF3}) + if err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err) + return + } + fmt.Fprintf(os.Stderr, "%s\nconflicts: %v\n", content, conflict) + } diff --git a/modules/zeta/object/text.go b/modules/diferenco/text.go similarity index 63% rename from modules/zeta/object/text.go rename to modules/diferenco/text.go index 1fdba8e..4a804a9 100644 --- a/modules/zeta/object/text.go +++ b/modules/diferenco/text.go @@ -1,7 +1,4 @@ -// Copyright ©️ Ant Group. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -package object +package diferenco import ( "bytes" @@ -23,43 +20,41 @@ import ( // #define MAX_XDIFF_SIZE (1024UL * 1024 * 1023) const ( - // MAX_DIFF_SIZE 100MiB - MAX_DIFF_SIZE = 100 * 1024 * 1024 + MAX_DIFF_SIZE = 100 << 20 // MAX_DIFF_SIZE 100MiB BINARY = "binary" - sniffLen = 8000 UTF8 = "UTF-8" + sniffLen = 8000 ) var ( - ErrNotTextContent = errors.New("not a text content") + ErrNonTextContent = errors.New("non-text content") ) -func textCharset(s string) string { +func checkCharset(s string) string { if _, charset, ok := strings.Cut(s, ";"); ok { return strings.TrimPrefix(strings.TrimSpace(charset), "charset=") } - return "UTF-8" + return UTF8 } -func resolveCharset(payload []byte) string { +func detectCharset(payload []byte) string { result := mime.DetectAny(payload) for p := result; p != nil; p = p.Parent() { if p.Is("text/plain") { - return textCharset(p.String()) + return checkCharset(p.String()) } } return BINARY } -// readText: Read all text content: automatically detect text encoding and convert to UTF-8, binary will return ErrNotTextContent -func readText(r io.Reader) (string, string, error) { +func readUnifiedText(r io.Reader) (string, string, error) { sniffBytes, err := streamio.ReadMax(r, sniffLen) if err != nil { return "", "", err } - charset := resolveCharset(sniffBytes) + charset := detectCharset(sniffBytes) if charset == BINARY { - return "", "", ErrNotTextContent + return "", "", ErrNonTextContent } reader := io.MultiReader(bytes.NewReader(sniffBytes), r) if strings.EqualFold(charset, UTF8) { @@ -75,7 +70,7 @@ func readText(r io.Reader) (string, string, error) { } buf, err := chardet.DecodeFromCharset(b.Bytes(), charset) if err != nil { - return "", "", ErrNotTextContent + return "", "", ErrNonTextContent } if len(buf) == 0 { return "", "", nil @@ -83,30 +78,31 @@ func readText(r io.Reader) (string, string, error) { return unsafe.String(unsafe.SliceData(buf), len(buf)), charset, nil } -func readTextUTF8(r io.Reader) (string, error) { +func readRawText(r io.Reader, size int) (string, error) { var b bytes.Buffer if _, err := b.ReadFrom(io.LimitReader(r, sniffLen)); err != nil { return "", err } if bytes.IndexByte(b.Bytes(), 0) != -1 { - return "", ErrNotTextContent + return "", ErrNonTextContent } + b.Grow(size) if _, err := b.ReadFrom(r); err != nil { return "", err } - return b.String(), nil + content := b.Bytes() + return unsafe.String(unsafe.SliceData(content), len(content)), nil } -// GetUnifiedText: Read all text content. -func GetUnifiedText(r io.Reader, size int64, codecvt bool) (string, string, error) { +func ReadUnifiedText(r io.Reader, size int64, textConv bool) (content string, charset string, err error) { if size > MAX_DIFF_SIZE { - return "", "", ErrNotTextContent + return "", "", ErrNonTextContent } - if codecvt { - return readText(r) + if textConv { + return readUnifiedText(r) } - s, err := readTextUTF8(r) - return s, UTF8, err + content, err = readRawText(r, int(size)) + return content, UTF8, err } func NewUnifiedReader(r io.Reader) (io.Reader, error) { @@ -114,7 +110,7 @@ func NewUnifiedReader(r io.Reader) (io.Reader, error) { if err != nil { return nil, err } - charset := resolveCharset(sniffBytes) + charset := detectCharset(sniffBytes) reader := io.MultiReader(bytes.NewReader(sniffBytes), r) // binary or UTF-8 not need convert if charset == BINARY || strings.EqualFold(charset, UTF8) { @@ -122,3 +118,14 @@ func NewUnifiedReader(r io.Reader) (io.Reader, error) { } return chardet.NewReader(reader, charset), nil } + +func NewTextReader(r io.Reader) (io.Reader, error) { + sniffBytes, err := streamio.ReadMax(r, sniffLen) + if err != nil { + return nil, err + } + if bytes.IndexByte(sniffBytes, 0) != -1 { + return nil, ErrNonTextContent + } + return io.MultiReader(bytes.NewReader(sniffBytes), r), nil +} diff --git a/modules/zeta/config/config.go b/modules/zeta/config/config.go index 171051b..6dd9e7a 100644 --- a/modules/zeta/config/config.go +++ b/modules/zeta/config/config.go @@ -165,12 +165,30 @@ func (t *Transport) Overwrite(o *Transport) { } } +type Diff struct { + Algorithm string `toml:"algorithm,omitempty"` +} + +func (d *Diff) Overwrite(o *Diff) { + d.Algorithm = overwrite(d.Algorithm, o.Algorithm) +} + +type Merge struct { + ConflictStyle string `toml:"conflictStyle,omitempty"` +} + +func (m *Merge) Overwrite(o *Merge) { + m.ConflictStyle = overwrite(m.ConflictStyle, o.ConflictStyle) +} + type Config struct { Core Core `toml:"core,omitempty"` User User `toml:"user,omitempty"` Fragment Fragment `toml:"fragment,omitempty"` HTTP HTTP `toml:"http,omitempty"` Transport Transport `toml:"transport,omitempty"` + Diff Diff `toml:"diff,omitempty"` + Merge Merge `toml:"merge,omitempty"` } // Overwrite: use local config overwrite config @@ -180,4 +198,6 @@ func (c *Config) Overwrite(co *Config) { c.Fragment.Overwrite(&co.Fragment) c.HTTP.Overwrite(&co.HTTP) c.Transport.Overwrite(&co.Transport) + c.Diff.Overwrite(&co.Diff) + c.Merge.Overwrite(&co.Merge) } diff --git a/modules/zeta/object/file.go b/modules/zeta/object/file.go index 6856461..6bd5937 100644 --- a/modules/zeta/object/file.go +++ b/modules/zeta/object/file.go @@ -62,6 +62,10 @@ func (f *File) OriginReader(ctx context.Context) (io.ReadCloser, int64, error) { return &readCloser{Reader: br.Contents, Closer: br}, br.Size, nil } +const ( + sniffLen = 8000 +) + func (f *File) Reader(ctx context.Context) (io.ReadCloser, bool, error) { if f.b == nil { return nil, false, io.ErrUnexpectedEOF @@ -89,7 +93,7 @@ func (f *File) UnifiedText(ctx context.Context, codecvt bool) (content string, e return "", err } defer r.Close() - content, _, err = GetUnifiedText(r, f.Size, codecvt) + content, _, err = diferenco.ReadUnifiedText(r, f.Size, codecvt) return content, err } diff --git a/modules/zeta/object/patch.go b/modules/zeta/object/patch.go index e512be5..d4349cb 100644 --- a/modules/zeta/object/patch.go +++ b/modules/zeta/object/patch.go @@ -26,7 +26,7 @@ type PatchOptions struct { } func sizeOverflow(f *File) bool { - return f != nil && f.Size > MAX_DIFF_SIZE + return f != nil && f.Size > diferenco.MAX_DIFF_SIZE } func fileStatName(from, to *File) string { @@ -64,14 +64,14 @@ func fileStatWithContext(ctx context.Context, opts *PatchOptions, c *Change) (*F return s, nil } fromContent, err := from.UnifiedText(ctx, opts.Textconv) - if plumbing.IsNoSuchObject(err) || err == ErrNotTextContent { + if plumbing.IsNoSuchObject(err) || err == diferenco.ErrNonTextContent { return s, nil } if err != nil { return nil, err } toContent, err := to.UnifiedText(ctx, opts.Textconv) - if plumbing.IsNoSuchObject(err) || err == ErrNotTextContent { + if plumbing.IsNoSuchObject(err) || err == diferenco.ErrNonTextContent { return s, nil } if err != nil { @@ -122,14 +122,14 @@ func filePatchWithContext(ctx context.Context, opts *PatchOptions, c *Change) (* return &diferenco.Unified{From: from.asFile(), To: to.asFile(), IsBinary: true}, nil } fromContent, err := from.UnifiedText(ctx, opts.Textconv) - if plumbing.IsNoSuchObject(err) || err == ErrNotTextContent { + if plumbing.IsNoSuchObject(err) || err == diferenco.ErrNonTextContent { return &diferenco.Unified{From: from.asFile(), To: to.asFile(), IsBinary: true}, nil } if err != nil { return nil, err } toContent, err := to.UnifiedText(ctx, opts.Textconv) - if plumbing.IsNoSuchObject(err) || err == ErrNotTextContent { + if plumbing.IsNoSuchObject(err) || err == diferenco.ErrNonTextContent { return &diferenco.Unified{From: from.asFile(), To: to.asFile(), IsBinary: true}, nil } if err != nil { diff --git a/pkg/command/command_diff.go b/pkg/command/command_diff.go index 5aacf81..0b5ad8b 100644 --- a/pkg/command/command_diff.go +++ b/pkg/command/command_diff.go @@ -61,22 +61,9 @@ func (c *Diff) Passthrough(paths []string) { c.passthroughArgs = append(c.passthroughArgs, paths...) } -var ( - diffAlgorithms = map[string]diferenco.Algorithm{ - "histogram": diferenco.Histogram, - "onp": diferenco.ONP, - "myers": diferenco.Myers, - "patience": diferenco.Patience, - "minimal": diferenco.Minimal, - } -) - func (c *Diff) checkAlgorithm() (diferenco.Algorithm, error) { if len(c.DiffAlgorithm) != 0 { - if a, ok := diffAlgorithms[c.DiffAlgorithm]; ok { - return a, nil - } - return diferenco.Unspecified, fmt.Errorf("unsupport algorithms %s'", c.DiffAlgorithm) + return diferenco.AlgorithmFromName(c.DiffAlgorithm) } if c.Histogram { return diferenco.Histogram, nil diff --git a/pkg/command/command_merge_file.go b/pkg/command/command_merge_file.go new file mode 100644 index 0000000..8a25f67 --- /dev/null +++ b/pkg/command/command_merge_file.go @@ -0,0 +1,127 @@ +package command + +import ( + "context" + "fmt" + "io" + "os" + + "github.com/antgroup/hugescm/modules/diferenco" + "github.com/antgroup/hugescm/pkg/zeta" +) + +type MergeFile struct { + Stdout bool `name:"stdout" short:"p" negatable:"" help:"Send results to standard output"` + ObjectID bool `name:"object-id" negatable:"" help:"Use object IDs instead of filenames"` + Diff3 bool `name:"diff3" negatable:"" help:"Use a diff3 based merge"` + ZDiff3 bool `name:"zdiff3" negatable:"" help:"Use a zealous diff3 based merge"` + DiffAlgorithm string `name:"diff-algorithm" help:"Choose a diff algorithm, supported: histogram|onp|myers|patience|minimal"` + L []string `name:":L" short:"L" help:"Set labels for file1/orig-file/file2"` + F1 string `arg:"" name:"0" help:"file1"` + O string `arg:"" name:"1" help:"orig-file"` + F2 string `arg:"" name:"2" help:"file2"` +} + +const ( + mergeFileSummaryFormat = `%szeta merge-file [] [-L [-L [-L ]]] ` +) + +func (c *MergeFile) Summary() string { + return fmt.Sprintf(mergeFileSummaryFormat, W("Usage: ")) +} + +func readText(p string, textConv bool) (string, error) { + fd, err := os.Open(p) + if err != nil { + return "", err + } + defer fd.Close() + si, err := fd.Stat() + if err != nil { + return "", err + } + content, _, err := diferenco.ReadUnifiedText(fd, si.Size(), textConv) + return content, err +} + +func (c *MergeFile) mergeExtra(g *Globals) error { + var a diferenco.Algorithm + var err error + if len(c.DiffAlgorithm) != 0 { + if a, err = diferenco.AlgorithmFromName(c.DiffAlgorithm); err != nil { + fmt.Fprintf(os.Stderr, "parse diff.algorithm error: %v\n", err) + return err + } + } + var style int + switch { + case c.Diff3: + style = diferenco.STYLE_DIFF3 + case c.ZDiff3: + style = diferenco.STYLE_ZEALOUS_DIFF3 + } + g.DbgPrint("algorithm: %s conflict style: %v", a, style) + textO, err := readText(c.O, false) + if err != nil { + fmt.Fprintf(os.Stderr, "merge-file: open error: %v\n", err) + return err + } + textA, err := readText(c.F1, false) + if err != nil { + fmt.Fprintf(os.Stderr, "merge-file: open error: %v\n", err) + return err + } + textB, err := readText(c.F2, false) + if err != nil { + fmt.Fprintf(os.Stderr, "merge-file: open error: %v\n", err) + return err + } + mergedText, conflict, err := diferenco.Merge(context.Background(), &diferenco.MergeOptions{ + TextO: textO, + TextA: textA, + TextB: textB, + LabelO: c.O, + LabelA: c.F1, + LabelB: c.F1, + A: a, + Style: style, + }) + if err != nil { + fmt.Fprintf(os.Stderr, "merge-file: merge error: %v\n", err) + return err + } + _, _ = io.WriteString(os.Stdout, mergedText) + if conflict { + return &zeta.ErrExitCode{ExitCode: 1, Message: "conflict"} + } + return nil +} + +func (c *MergeFile) Run(g *Globals) error { + if !c.ObjectID { + return c.mergeExtra(g) + } + r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ + Worktree: g.CWD, + Values: g.Values, + Verbose: g.Verbose, + }) + if err != nil { + return err + } + defer r.Close() + var style int + switch { + case c.Diff3: + style = diferenco.STYLE_DIFF3 + case c.ZDiff3: + style = diferenco.STYLE_ZEALOUS_DIFF3 + } + if err := r.MergeFile(context.Background(), &zeta.MergeFileOptions{O: c.O, A: c.F1, B: c.F2, Style: style, DiffAlgorithm: c.DiffAlgorithm, Stdout: c.Stdout}); err != nil { + if !zeta.IsExitCode(err, 1) { + diev("merge-file: error: %v", err) + } + return err + } + return nil +} diff --git a/pkg/tr/languages/zh-CN.toml b/pkg/tr/languages/zh-CN.toml index d5009d0..b69d02a 100644 --- a/pkg/tr/languages/zh-CN.toml +++ b/pkg/tr/languages/zh-CN.toml @@ -380,6 +380,14 @@ "Write the object into the object database" = "将对象写入对象数据库" "Read the object from stdin" = "从标准输入读取对象" "Process file as it were from this path" = "处理文件并假设其来自于此路径" +# merge-file +"Run a three-way file merge" = "运行三向文件合并" +"Send results to standard output" = "将结果发送到标准输出" +"Use object IDs instead of filenames" = "使用对象 ID 替换文件名" +"Use a diff3 based merge" = "使用基于 diff3 的合并" +"Use a zealous diff3 based merge" = "使用基于狂热 diff3(zealous diff3)的合并" +"Set labels for file1/orig-file/file2" = "为 文件1/初始文件/文件2 设置标签" + # Others "WARNING" = "警告" "not zeta repository" = "不是 zeta 存储库" diff --git a/pkg/zeta/cat.go b/pkg/zeta/cat.go index 0a2231a..7e9d563 100644 --- a/pkg/zeta/cat.go +++ b/pkg/zeta/cat.go @@ -13,6 +13,7 @@ import ( "os" "strings" + "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/zeta/backend" "github.com/antgroup/hugescm/modules/zeta/object" @@ -108,7 +109,7 @@ func (r *Repository) catBlob(ctx context.Context, w io.Writer, oid plumbing.Hash } reader := b.Contents if textconv { - if reader, err = object.NewUnifiedReader(b.Contents); err != nil { + if reader, err = diferenco.NewUnifiedReader(b.Contents); err != nil { return err } } diff --git a/pkg/zeta/merge_file.go b/pkg/zeta/merge_file.go new file mode 100644 index 0000000..76454f6 --- /dev/null +++ b/pkg/zeta/merge_file.go @@ -0,0 +1,131 @@ +package zeta + +import ( + "context" + "fmt" + "io" + "os" + "os/exec" + "strings" + + "github.com/antgroup/hugescm/modules/diferenco" + "github.com/antgroup/hugescm/pkg/zeta/odb" +) + +func (r *Repository) resolveMergeDriver() odb.MergeDriver { + if driverName, ok := os.LookupEnv(ENV_ZETA_MERGE_TEXT_DRIVER); ok { + switch driverName { + case "git": + if _, err := exec.LookPath("git"); err == nil { + r.DbgPrint("Use git merge-file as text merge driver") + return r.odb.ExternalMerge + } + case "diff3": + if _, err := exec.LookPath("diff3"); err == nil { + r.DbgPrint("Use diff3 as text merge driver") + return r.odb.Diff3Merge + } + default: + r.DbgPrint("Unsupport merge driver '%s'", driverName) + } + } + var diffAlgorithm diferenco.Algorithm + var err error + if len(r.Diff.Algorithm) != 0 { + if diffAlgorithm, err = diferenco.AlgorithmFromName(r.Diff.Algorithm); err != nil { + warn("diff: bad config: diff.algorithm value: %s", r.Diff.Algorithm) + } + } + mergeConflictStyle := diferenco.ParseConflictStyle(r.Merge.ConflictStyle) + return func(ctx context.Context, o, a, b, labelO, labelA, labelB string) (string, bool, error) { + return diferenco.Merge(ctx, &diferenco.MergeOptions{ + TextO: o, + TextA: a, + TextB: b, + LabelO: labelO, + LabelA: labelA, + LabelB: labelB, + A: diffAlgorithm, + Style: mergeConflictStyle, + }) + } +} + +type MergeFileOptions struct { + O, A, B string + Style int + DiffAlgorithm string + Stdout bool + TextConv bool +} + +func (opts *MergeFileOptions) diffAlgorithmFromName(defaultDiffAlgorithm string) diferenco.Algorithm { + if len(opts.DiffAlgorithm) != 0 { + if diffAlgorithm, err := diferenco.AlgorithmFromName(opts.DiffAlgorithm); err == nil { + return diffAlgorithm + } + warn("diff: bad --diff-algorithm value: %s", opts.DiffAlgorithm) + } + if len(defaultDiffAlgorithm) != 0 { + if diffAlgorithm, err := diferenco.AlgorithmFromName(defaultDiffAlgorithm); err == nil { + return diffAlgorithm + } + warn("diff: bad config: diff.algorithm value: %s", defaultDiffAlgorithm) + } + return diferenco.Unspecified +} + +func (r *Repository) MergeFile(ctx context.Context, opts *MergeFileOptions) error { + diffAlgorithm := opts.diffAlgorithmFromName(r.Diff.Algorithm) + r.DbgPrint("algorithm: %s conflict style: %v", diffAlgorithm, opts.Style) + o, err := r.Revision(ctx, opts.O) + if err != nil { + return err + } + textO, _, err := r.readMissingText(ctx, o, false) + if err != nil { + return err + } + a, err := r.Revision(ctx, opts.A) + if err != nil { + return err + } + textA, _, err := r.readMissingText(ctx, a, false) + if err != nil { + return err + } + b, err := r.Revision(ctx, opts.B) + if err != nil { + return err + } + textB, _, err := r.readMissingText(ctx, b, false) + if err != nil { + return err + } + merged, conflict, err := diferenco.Merge(ctx, &diferenco.MergeOptions{ + TextO: textO, + TextA: textA, + TextB: textB, + LabelO: o.String()[0:8], + LabelA: a.String()[0:8], + LabelB: b.String()[0:8], + A: diffAlgorithm, + Style: opts.Style, + }) + if err != nil { + return err + } + if opts.Stdout { + _, _ = io.WriteString(os.Stdout, merged) + } else { + oid, err := r.odb.HashTo(ctx, strings.NewReader(merged), int64(len(merged))) + if err != nil { + return err + } + _, _ = fmt.Fprintln(os.Stdout, oid.String()) + } + if conflict { + return &ErrExitCode{ExitCode: 1, Message: "conflict"} + } + return nil +} diff --git a/pkg/zeta/merge_tree.go b/pkg/zeta/merge_tree.go index 7049f96..8765303 100644 --- a/pkg/zeta/merge_tree.go +++ b/pkg/zeta/merge_tree.go @@ -9,7 +9,6 @@ import ( "errors" "fmt" "os" - "os/exec" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/plumbing" @@ -44,27 +43,7 @@ func (r *Repository) readMissingText(ctx context.Context, oid plumbing.Hash, tex return "", "", err } defer br.Close() - return object.GetUnifiedText(br.Contents, br.Size, textConv) -} - -func (r *Repository) resolveMergeDriver() odb.MergeDriver { - if driverName, ok := os.LookupEnv(ENV_ZETA_MERGE_TEXT_DRIVER); ok { - switch driverName { - case "git": - if _, err := exec.LookPath("git"); err == nil { - r.DbgPrint("Use git merge-file as text merge driver") - return r.odb.ExternalMerge - } - case "diff3": - if _, err := exec.LookPath("diff3"); err == nil { - r.DbgPrint("Use diff3 as text merge driver") - return r.odb.Diff3Merge - } - default: - r.DbgPrint("Unsupport merge driver '%s'", driverName) - } - } - return diferenco.Merge + return diferenco.ReadUnifiedText(br.Contents, br.Size, textConv) } func (o *MergeTreeOptions) formatJson(result *odb.MergeResult) { diff --git a/pkg/zeta/misc.go b/pkg/zeta/misc.go index 5475bc2..8c1752d 100644 --- a/pkg/zeta/misc.go +++ b/pkg/zeta/misc.go @@ -287,6 +287,16 @@ type ErrExitCode struct { Message string } +func IsExitCode(err error, i int) bool { + if err == nil { + return false + } + if e, ok := err.(*ErrExitCode); ok { + return e.ExitCode == i + } + return false +} + func (e *ErrExitCode) Error() string { return e.Message } diff --git a/pkg/zeta/odb/merge.go b/pkg/zeta/odb/merge.go index 4fc0a47..d131b42 100644 --- a/pkg/zeta/odb/merge.go +++ b/pkg/zeta/odb/merge.go @@ -380,7 +380,7 @@ func (d *ODB) mergeEntry(ctx context.Context, ch *ChangeEntry, opts *MergeOption M: opts.MergeDriver, G: opts.TextGetter, }) - if err == object.ErrNotTextContent { + if err == diferenco.ErrNonTextContent { result.Messages = append(result.Messages, tr.Sprintf("warning: Cannot merge binary files: %s (%s vs. %s)", ch.Path, opts.Branch1, opts.Branch2)) result.Conflicts = append(result.Conflicts, ch.makeConflict(CONFLICT_BINARY)) return &TreeEntry{Path: ch.Path, TreeEntry: ch.Our}, nil @@ -428,7 +428,7 @@ func (d *ODB) mergeEntry(ctx context.Context, ch *ChangeEntry, opts *MergeOption M: opts.MergeDriver, G: opts.TextGetter, }) - if err == object.ErrNotTextContent { + if err == diferenco.ErrNonTextContent { result.Messages = append(result.Messages, tr.Sprintf("warning: Cannot merge binary files: %s (%s vs. %s)", ch.Path, opts.Branch1, opts.Branch2)) result.Conflicts = append(result.Conflicts, ch.makeConflict(CONFLICT_BINARY)) return &TreeEntry{Path: ch.Path, TreeEntry: ch.Our}, nil @@ -487,7 +487,7 @@ func (d *ODB) unifiedText(ctx context.Context, oid plumbing.Hash, textConv bool) return "", "", err } defer br.Close() - return object.GetUnifiedText(br.Contents, br.Size, textConv) + return diferenco.ReadUnifiedText(br.Contents, br.Size, textConv) } // MergeTree: three way merge tree @@ -499,7 +499,7 @@ func (d *ODB) MergeTree(ctx context.Context, o, a, b *object.Tree, opts *MergeOp opts.Branch2 = "Branch2" } if opts.MergeDriver == nil { - opts.MergeDriver = diferenco.Merge // fallback + opts.MergeDriver = diferenco.DefaultMerge // fallback } if opts.TextGetter == nil { opts.TextGetter = d.unifiedText diff --git a/pkg/zeta/odb/merge_driver.go b/pkg/zeta/odb/merge_driver.go index 895ccb3..34dbb2c 100644 --- a/pkg/zeta/odb/merge_driver.go +++ b/pkg/zeta/odb/merge_driver.go @@ -9,8 +9,8 @@ import ( "strings" "github.com/antgroup/hugescm/modules/chardet" + "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/plumbing" - "github.com/antgroup/hugescm/modules/zeta/object" ) type MergeDriver func(ctx context.Context, o, a, b string, labelO, labelA, labelB string) (string, bool, error) @@ -47,7 +47,7 @@ func (d *ODB) mergeText(ctx context.Context, opts *mergeOptions) (*mergeTextResu if err != nil { return nil, err } - if !opts.Textconv || strings.EqualFold(charset, object.UTF8) { + if !opts.Textconv || strings.EqualFold(charset, diferenco.UTF8) { size := int64(len(mergedText)) oid, err := d.HashTo(ctx, strings.NewReader(mergedText), size) if err != nil { diff --git a/pkg/zeta/odb/merge_test.go b/pkg/zeta/odb/merge_test.go index 10d5550..2162aa3 100644 --- a/pkg/zeta/odb/merge_test.go +++ b/pkg/zeta/odb/merge_test.go @@ -175,7 +175,7 @@ tomatoes onions wine ` - s, conflict, err := diferenco.Merge(context.Background(), textO, textA, textB, "a.txt", "a.txt", "b.txt") + s, conflict, err := diferenco.DefaultMerge(context.Background(), textO, textA, textB, "a.txt", "a.txt", "b.txt") if err != nil { return } diff --git a/pkg/zeta/worktree_diff.go b/pkg/zeta/worktree_diff.go index 66603b0..eca083c 100644 --- a/pkg/zeta/worktree_diff.go +++ b/pkg/zeta/worktree_diff.go @@ -24,7 +24,7 @@ func (w *Worktree) openText(p string, size int64, textConv bool) (string, error) return "", err } defer fd.Close() - content, _, err := object.GetUnifiedText(fd, size, textConv) + content, _, err := diferenco.ReadUnifiedText(fd, size, textConv) return content, err } @@ -34,7 +34,7 @@ func (w *Worktree) openBlobText(ctx context.Context, oid plumbing.Hash, textConv return "", err } defer br.Close() - content, _, err := object.GetUnifiedText(br.Contents, br.Size, textConv) + content, _, err := diferenco.ReadUnifiedText(br.Contents, br.Size, textConv) return content, err } @@ -46,11 +46,11 @@ func (w *Worktree) readContent(ctx context.Context, p noder.Path, textConv bool) switch a := p.Last().(type) { case *filesystem.Node: f = &diferenco.File{Name: name, Hash: a.HashRaw().String(), Mode: uint32(a.Mode())} - if a.Size() > object.MAX_DIFF_SIZE { + if a.Size() > diferenco.MAX_DIFF_SIZE { return f, "", false, true, nil } content, err = w.openText(name, a.Size(), textConv) - if err == object.ErrNotTextContent { + if err == diferenco.ErrNonTextContent { return f, "", false, true, nil } return f, content, false, false, nil @@ -59,12 +59,12 @@ func (w *Worktree) readContent(ctx context.Context, p noder.Path, textConv bool) if a.IsFragments() { return f, "", true, false, err } - if a.Size() > object.MAX_DIFF_SIZE { + if a.Size() > diferenco.MAX_DIFF_SIZE { return f, "", false, true, nil } content, err = w.openBlobText(ctx, a.HashRaw(), textConv) // When the current repository uses an incomplete checkout mechanism, we treat these files as binary files, i.e. no differences can be calculated. - if err == object.ErrNotTextContent || plumbing.IsNoSuchObject(err) { + if err == diferenco.ErrNonTextContent || plumbing.IsNoSuchObject(err) { return f, "", false, true, nil } return f, content, false, false, nil @@ -73,11 +73,11 @@ func (w *Worktree) readContent(ctx context.Context, p noder.Path, textConv bool) if a.IsFragments() { return f, "", true, false, err } - if a.Size() > object.MAX_DIFF_SIZE { + if a.Size() > diferenco.MAX_DIFF_SIZE { return f, "", false, true, nil } content, err = w.openBlobText(ctx, a.HashRaw(), textConv) - if err == object.ErrNotTextContent || plumbing.IsNoSuchObject(err) { + if err == diferenco.ErrNonTextContent || plumbing.IsNoSuchObject(err) { return f, "", false, true, nil } return f, content, a.IsFragments(), false, nil @@ -336,8 +336,14 @@ func (w *Worktree) between(ctx context.Context, opts *DiffOptions) error { } func (w *Worktree) DiffContext(ctx context.Context, opts *DiffOptions) error { + if opts.Algorithm == diferenco.Unspecified && len(w.Diff.Algorithm) != 0 { + if a, err := diferenco.AlgorithmFromName(w.Diff.Algorithm); err != nil { + warn("diff: bad config, key: diff.algorithm value: %s", w.Diff.Algorithm) + } else { + opts.Algorithm = a + } + } if len(opts.From) != 0 && len(opts.To) != 0 { - return w.between(ctx, opts) } if len(opts.From) != 0 { diff --git a/utils/diffbug/difffix_test.go b/utils/diffbug/difffix_test.go index 0e9304c..081cf3b 100644 --- a/utils/diffbug/difffix_test.go +++ b/utils/diffbug/difffix_test.go @@ -42,7 +42,7 @@ func TestDiffText(t *testing.T) { } e := diferenco.NewUnifiedEncoder(os.Stderr) e.SetColor(color.NewColorConfig()) - e.Encode([]*diferenco.Unified{u}) + _ = e.Encode([]*diferenco.Unified{u}) } func TestRuneToString(t *testing.T) {