-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
schemadiff
: supporting textual diff (#15388)
Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>
- Loading branch information
1 parent
2ddc0e8
commit f19b58e
Showing
7 changed files
with
1,113 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
/* | ||
Copyright 2024 The Vitess Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package schemadiff | ||
|
||
import ( | ||
"strings" | ||
) | ||
|
||
// TextualAnnotationType is an enum for the type of annotation that can be applied to a line of text. | ||
type TextualAnnotationType int | ||
|
||
const ( | ||
UnchangedTextualAnnotationType TextualAnnotationType = iota | ||
AddedTextualAnnotationType | ||
RemovedTextualAnnotationType | ||
) | ||
|
||
// AnnotatedText is a some text and its annotation type. The text is usually single-line, but it | ||
// can be multi-line, as in the case of partition specs. | ||
type AnnotatedText struct { | ||
text string | ||
typ TextualAnnotationType | ||
} | ||
|
||
// TextualAnnotations is a sequence of annotated texts. It is the annotated representation of a statement. | ||
type TextualAnnotations struct { | ||
texts []*AnnotatedText | ||
hasAnyChanges bool | ||
} | ||
|
||
func NewTextualAnnotations() *TextualAnnotations { | ||
return &TextualAnnotations{} | ||
} | ||
|
||
func (a *TextualAnnotations) Len() int { | ||
return len(a.texts) | ||
} | ||
|
||
func (a *TextualAnnotations) mark(text string, typ TextualAnnotationType) { | ||
a.texts = append(a.texts, &AnnotatedText{text: text, typ: typ}) | ||
if typ != UnchangedTextualAnnotationType { | ||
a.hasAnyChanges = true | ||
} | ||
} | ||
|
||
func (a *TextualAnnotations) MarkAdded(text string) { | ||
a.mark(text, AddedTextualAnnotationType) | ||
} | ||
|
||
func (a *TextualAnnotations) MarkRemoved(text string) { | ||
a.mark(text, RemovedTextualAnnotationType) | ||
} | ||
|
||
func (a *TextualAnnotations) MarkUnchanged(text string) { | ||
a.mark(text, UnchangedTextualAnnotationType) | ||
} | ||
|
||
// ByType returns the subset of annotations by given type. | ||
func (a *TextualAnnotations) ByType(typ TextualAnnotationType) (r []*AnnotatedText) { | ||
for _, text := range a.texts { | ||
if text.typ == typ { | ||
r = append(r, text) | ||
} | ||
} | ||
return r | ||
} | ||
|
||
func (a *TextualAnnotations) Added() (r []*AnnotatedText) { | ||
return a.ByType(AddedTextualAnnotationType) | ||
} | ||
|
||
func (a *TextualAnnotations) Removed() (r []*AnnotatedText) { | ||
return a.ByType(RemovedTextualAnnotationType) | ||
} | ||
|
||
// Export beautifies the annotated text and returns it as a string. | ||
func (a *TextualAnnotations) Export() string { | ||
textLines := make([]string, 0, len(a.texts)) | ||
for _, annotatedText := range a.texts { | ||
switch annotatedText.typ { | ||
case AddedTextualAnnotationType: | ||
annotatedText.text = "+" + annotatedText.text | ||
case RemovedTextualAnnotationType: | ||
annotatedText.text = "-" + annotatedText.text | ||
default: | ||
// text unchanged | ||
if a.hasAnyChanges { | ||
// If there is absolutely no change, we don't add a space anywhere | ||
annotatedText.text = " " + annotatedText.text | ||
} | ||
} | ||
textLines = append(textLines, annotatedText.text) | ||
} | ||
return strings.Join(textLines, "\n") | ||
} | ||
|
||
// annotatedStatement returns a new TextualAnnotations object that annotates the given statement with the given annotations. | ||
// The given annotations were created by the diffing algorithm, and represent the CanonicalString of some node. | ||
// However, the given statement is just some text, and we need to find the annotations (some of which may be multi-line) | ||
// inside our text, and return a per-line annotation. | ||
func annotatedStatement(stmt string, annotationType TextualAnnotationType, annotations *TextualAnnotations) *TextualAnnotations { | ||
stmtLines := strings.Split(stmt, "\n") | ||
result := NewTextualAnnotations() | ||
annotationLines := map[string]bool{} // single-line breakdown of all annotations | ||
for _, annotation := range annotations.ByType(annotationType) { | ||
// An annotated text could be multiline. Partition specs are such. | ||
lines := strings.Split(annotation.text, "\n") | ||
for _, line := range lines { | ||
line = strings.TrimSpace(line) | ||
if line != "" { | ||
annotationLines[line] = true | ||
} | ||
} | ||
} | ||
annotationLinesMutations := map[string](map[string]bool){} | ||
// Mutations are expected ways to find an annotation inside a `CREATE TABLE` statement. | ||
for annotationLine := range annotationLines { | ||
possibleMutations := map[string]bool{ | ||
annotationLine: true, | ||
") " + annotationLine: true, // e.g. ") ENGINE=InnoDB" | ||
") " + annotationLine + ",": true, // e.g. ") ENGINE=InnoDB,[\n ROW_FORMAT=COMPRESSED]" | ||
"(" + annotationLine + ")": true, // e.g. "(PARTITION p0 VALUES LESS THAN (10)) | ||
"(" + annotationLine + ",": true, // e.g. "(PARTITION p0 VALUES LESS THAN (10), | ||
annotationLine + ",": true, // e.g. "i int unsigned," | ||
annotationLine + ")": true, // e.g. "PARTITION p9 VALUES LESS THAN (90))" | ||
} | ||
annotationLinesMutations[annotationLine] = possibleMutations | ||
} | ||
for i := range stmtLines { | ||
lineAnnotated := false | ||
trimmedLine := strings.TrimSpace(stmtLines[i]) | ||
if trimmedLine == "" { | ||
continue | ||
} | ||
for annotationLine := range annotationLines { | ||
if lineAnnotated { | ||
break | ||
} | ||
possibleMutations := annotationLinesMutations[annotationLine] | ||
if possibleMutations[trimmedLine] { | ||
// Annotate this line! | ||
result.mark(stmtLines[i], annotationType) | ||
lineAnnotated = true | ||
// No need to match this annotation again | ||
delete(annotationLines, annotationLine) | ||
delete(possibleMutations, annotationLine) | ||
} | ||
} | ||
if !lineAnnotated { | ||
result.MarkUnchanged(stmtLines[i]) | ||
} | ||
} | ||
return result | ||
} | ||
|
||
// annotateAll blindly annotates all lines of the given statement with the given annotation type. | ||
func annotateAll(stmt string, annotationType TextualAnnotationType) *TextualAnnotations { | ||
stmtLines := strings.Split(stmt, "\n") | ||
result := NewTextualAnnotations() | ||
for _, line := range stmtLines { | ||
result.mark(line, annotationType) | ||
} | ||
return result | ||
} | ||
|
||
// unifiedAnnotated takes two annotations of from, to statements and returns a unified annotation. | ||
func unifiedAnnotated(from *TextualAnnotations, to *TextualAnnotations) *TextualAnnotations { | ||
unified := NewTextualAnnotations() | ||
fromIndex := 0 | ||
toIndex := 0 | ||
for fromIndex < from.Len() || toIndex < to.Len() { | ||
matchingLine := "" | ||
if fromIndex < from.Len() { | ||
fromLine := from.texts[fromIndex] | ||
if fromLine.typ == RemovedTextualAnnotationType { | ||
unified.MarkRemoved(fromLine.text) | ||
fromIndex++ | ||
continue | ||
} | ||
matchingLine = fromLine.text | ||
} | ||
if toIndex < to.Len() { | ||
toLine := to.texts[toIndex] | ||
if toLine.typ == AddedTextualAnnotationType { | ||
unified.MarkAdded(toLine.text) | ||
toIndex++ | ||
continue | ||
} | ||
if matchingLine == "" { | ||
matchingLine = toLine.text | ||
} | ||
} | ||
unified.MarkUnchanged(matchingLine) | ||
fromIndex++ | ||
toIndex++ | ||
} | ||
return unified | ||
} | ||
|
||
// annotatedDiff returns the annotated representations of the from and to entities, and their unified representation. | ||
func annotatedDiff(diff EntityDiff, entityAnnotations *TextualAnnotations) (from *TextualAnnotations, to *TextualAnnotations, unified *TextualAnnotations) { | ||
fromEntity, toEntity := diff.Entities() | ||
switch { | ||
case fromEntity == nil && toEntity == nil: | ||
// Should never get here. | ||
return nil, nil, nil | ||
case fromEntity == nil: | ||
// A new entity was created. | ||
from = NewTextualAnnotations() | ||
to = annotateAll(toEntity.Create().CanonicalStatementString(), AddedTextualAnnotationType) | ||
case toEntity == nil: | ||
// An entity was dropped. | ||
from = annotateAll(fromEntity.Create().CanonicalStatementString(), RemovedTextualAnnotationType) | ||
to = NewTextualAnnotations() | ||
case entityAnnotations == nil: | ||
// Entity was modified, and we have no prior info about entity annotations. Treat this is as a complete rewrite. | ||
from = annotateAll(fromEntity.Create().CanonicalStatementString(), RemovedTextualAnnotationType) | ||
to = annotateAll(toEntity.Create().CanonicalStatementString(), AddedTextualAnnotationType) | ||
default: | ||
// Entity was modified, and we have prior info about entity annotations. | ||
from = annotatedStatement(fromEntity.Create().CanonicalStatementString(), RemovedTextualAnnotationType, entityAnnotations) | ||
to = annotatedStatement(toEntity.Create().CanonicalStatementString(), AddedTextualAnnotationType, entityAnnotations) | ||
} | ||
return from, to, unifiedAnnotated(from, to) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/* | ||
Copyright 2024 The Vitess Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package schemadiff | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
|
||
"vitess.io/vitess/go/vt/sqlparser" | ||
) | ||
|
||
func TestAnnotateAll(t *testing.T) { | ||
stmt := `create table t( | ||
id int, | ||
name varchar(100), | ||
primary key(id) | ||
) engine=innodb` | ||
annotations := annotateAll(stmt, RemovedTextualAnnotationType) | ||
assert.Equal(t, 5, annotations.Len()) | ||
expect := `-create table t( | ||
- id int, | ||
- name varchar(100), | ||
- primary key(id) | ||
-) engine=innodb` | ||
assert.Equal(t, expect, annotations.Export()) | ||
} | ||
|
||
func TestUnifiedAnnotated(t *testing.T) { | ||
tcases := []struct { | ||
name string | ||
from string | ||
to string | ||
fromAnnotations *TextualAnnotations | ||
toAnnotations *TextualAnnotations | ||
expected string | ||
}{ | ||
{ | ||
"no change", | ||
"CREATE TABLE t1 (a int)", | ||
"CREATE TABLE t1 (a int)", | ||
&TextualAnnotations{}, | ||
&TextualAnnotations{}, | ||
"CREATE TABLE `t1` (\n\t`a` int\n)", | ||
}, | ||
{ | ||
"simple", | ||
"CREATE TABLE t1 (a int)", | ||
"CREATE TABLE t1 (a int, b int)", | ||
&TextualAnnotations{}, | ||
&TextualAnnotations{texts: []*AnnotatedText{{text: "`b` int", typ: AddedTextualAnnotationType}}}, | ||
" CREATE TABLE `t1` (\n \t`a` int\n+\t`b` int\n )", | ||
}, | ||
} | ||
parser := sqlparser.NewTestParser() | ||
|
||
for _, tcase := range tcases { | ||
t.Run(tcase.name, func(t *testing.T) { | ||
fromStmt, err := parser.ParseStrictDDL(tcase.from) | ||
require.NoError(t, err) | ||
annotatedFrom := annotatedStatement(sqlparser.CanonicalString(fromStmt), RemovedTextualAnnotationType, tcase.fromAnnotations) | ||
toStmt, err := parser.ParseStrictDDL(tcase.to) | ||
require.NoError(t, err) | ||
annotatedTo := annotatedStatement(sqlparser.CanonicalString(toStmt), AddedTextualAnnotationType, tcase.toAnnotations) | ||
unified := unifiedAnnotated(annotatedFrom, annotatedTo) | ||
export := unified.Export() | ||
assert.Equalf(t, tcase.expected, export, "from: %v, to: %v", annotatedFrom.Export(), annotatedTo.Export()) | ||
}) | ||
} | ||
} | ||
|
||
func TestUnifiedAnnotatedAll(t *testing.T) { | ||
stmt := `create table t( | ||
id int, | ||
name varchar(100), | ||
primary key(id) | ||
) engine=innodb` | ||
annotatedTo := annotateAll(stmt, AddedTextualAnnotationType) | ||
annotatedFrom := NewTextualAnnotations() | ||
unified := unifiedAnnotated(annotatedFrom, annotatedTo) | ||
expect := `+create table t( | ||
+ id int, | ||
+ name varchar(100), | ||
+ primary key(id) | ||
+) engine=innodb` | ||
assert.Equal(t, expect, unified.Export()) | ||
} |
Oops, something went wrong.