Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change diff.Difference to always return an edit-script #45

Merged
merged 1 commit into from
Sep 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions cmp/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,15 +377,23 @@ func (s *state) compareArray(vx, vy reflect.Value, t reflect.Type) {
s.curPath.push(step)

// Compute an edit-script for slices vx and vy.
eq, es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
step.xkey, step.ykey = ix, iy
return s.statelessCompare(vx.Index(ix), vy.Index(iy))
})

// Equal or no edit-script, so report entire slices as is.
if eq || es == nil {
// Report the entire slice as is if the arrays are of primitive kind,
// and the arrays are different enough.
isPrimitive := false
switch t.Elem().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
isPrimitive = true
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally, I'd hoist isPrimitive into a function of its own. Up to you.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to leave it as is for now. The sequence of grouping all of the Ints and Uints together seems to appear very often in the code:

$ rgrep "reflect\.(Int|Uint),"
compare.go:216:	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
compare.go:219:	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
compare.go:389:	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
compare.go:390:		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
internal/value/sort.go:47:	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
internal/value/sort.go:49:	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
internal/value/format.go:52:	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
internal/value/format.go:54:	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
internal/value/format.go:219:	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
internal/value/format.go:221:	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:

I may just make a helper function to quickly identify whether something is an Int or Uint regardless of size.

if isPrimitive && es.Dist() > (vx.Len()+vy.Len())/4 {
s.curPath.pop() // Pop first since we are reporting the whole slice
s.report(eq, vx, vy)
s.report(false, vx, vy)
return
}

Expand Down
51 changes: 51 additions & 0 deletions cmp/compare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,37 @@ func TestDiff(t *testing.T) {
func comparerTests() []test {
const label = "Comparer"

type tarHeader struct {
Name string
Mode int64
Uid int
Gid int
Size int64
ModTime time.Time
Typeflag byte
Linkname string
Uname string
Gname string
Devmajor int64
Devminor int64
AccessTime time.Time
ChangeTime time.Time
Xattrs map[string]string
}

makeTarHeaders := func(tf byte) (hs []tarHeader) {
for i := 0; i < 5; i++ {
hs = append(hs, tarHeader{
Name: fmt.Sprintf("some/dummy/test/file%d", i),
Mode: 0664, Uid: i * 1000, Gid: i * 1000, Size: 1 << uint(i),
ModTime: now.Add(time.Duration(i) * time.Hour),
Uname: "user", Gname: "group",
Typeflag: tf,
})
}
return hs
}

return []test{{
label: label,
x: 1,
Expand Down Expand Up @@ -299,6 +330,26 @@ root:
:
-: &<nil>
+: <non-existent>`,
}, {
label: label,
x: makeTarHeaders('0'),
y: makeTarHeaders('\x00'),
wantDiff: `
{[]cmp_test.tarHeader}[0].Typeflag:
-: 0x30
+: 0x00
{[]cmp_test.tarHeader}[1].Typeflag:
-: 0x30
+: 0x00
{[]cmp_test.tarHeader}[2].Typeflag:
-: 0x30
+: 0x00
{[]cmp_test.tarHeader}[3].Typeflag:
-: 0x30
+: 0x00
{[]cmp_test.tarHeader}[4].Typeflag:
-: 0x30
+: 0x00`,
}, {
label: label,
x: make([]int, 1000),
Expand Down
18 changes: 4 additions & 14 deletions cmp/internal/diff/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ func (r Result) Similar() bool {
// Difference reports whether two lists of lengths nx and ny are equal
// given the definition of equality provided as f.
//
// This function may return a edit-script, which is a sequence of operations
// needed to convert one list into the other. If non-nil, the following
// invariants for the edit-script are maintained:
// This function returns an edit-script, which is a sequence of operations
// needed to convert one list into the other. The following invariants for
// the edit-script are maintained:
// • eq == (es.Dist()==0)
// • nx == es.LenX()
// • ny == es.LenY()
Expand All @@ -117,17 +117,7 @@ func (r Result) Similar() bool {
// produces an edit-script with a minimal Levenshtein distance). This algorithm
// favors performance over optimality. The exact output is not guaranteed to
// be stable and may change over time.
func Difference(nx, ny int, f EqualFunc) (eq bool, es EditScript) {
es = searchGraph(nx, ny, f)
st := es.stats()
eq = len(es) == st.NI
if !eq && st.NI < (nx+ny)/4 {
return eq, nil // Edit-script more distracting than helpful
}
return eq, es
}

func searchGraph(nx, ny int, f EqualFunc) EditScript {
func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// This algorithm is based on traversing what is known as an "edit-graph".
// See Figure 1 from "An O(ND) Difference Algorithm and Its Variations"
// by Eugene W. Myers. Since D can be as large as N itself, this is
Expand Down
40 changes: 17 additions & 23 deletions cmp/internal/diff/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,17 @@ func TestDifference(t *testing.T) {
y: " 1234567890",
want: "X.........Y",
}, {
x: "0123456789",
y: "9876543210",
x: "0 1 2 3 45 6 7 8 9 ",
y: " 9 8 7 6 54 3 2 1 0",
want: "XYXYXYXYX.YXYXYXYXY",
}, {
x: "0123456789",
y: "6725819034",
x: "0 1 2345678 9 ",
y: " 6 72 5 819034",
want: "XYXY.XX.XX.Y.YYY",
}, {
x: "FBQMOIGTLN72X90E4SP651HKRJUDA83CVZW",
y: "5WHXO10R9IVKZLCTAJ8P3NSEQM472G6UBDF",
x: "F B Q M O I G T L N72X90 E 4S P 651HKRJU DA 83CVZW",
y: " 5 W H XO10R9IV K ZLCTAJ8P3N SEQM4 7 2G6 UBD F ",
want: "XYXYXYXY.YYYY.YXYXY.YYYYYYY.XXXXXY.YY.XYXYY.XXXXXX.Y.XYXXXXXX",
}}

for _, tt := range tests {
Expand Down Expand Up @@ -333,26 +336,17 @@ func generateStrings(n int, px, py, pm float32, seed int64) (string, string) {
}

func testStrings(t *testing.T, x, y string) EditScript {
wantEq := x == y
eq, es := Difference(len(x), len(y), func(ix, iy int) Result {
es := Difference(len(x), len(y), func(ix, iy int) Result {
return compareByte(x[ix], y[iy])
})
if eq != wantEq {
t.Errorf("equality mismatch: got %v, want %v", eq, wantEq)
if es.LenX() != len(x) {
t.Errorf("es.LenX = %d, want %d", es.LenX(), len(x))
}
if es != nil {
if es.LenX() != len(x) {
t.Errorf("es.LenX = %d, want %d", es.LenX(), len(x))
}
if es.LenY() != len(y) {
t.Errorf("es.LenY = %d, want %d", es.LenY(), len(y))
}
if got := (es.Dist() == 0); got != wantEq {
t.Errorf("violation of equality invariant: got %v, want %v", got, wantEq)
}
if !validateScript(x, y, es) {
t.Errorf("invalid edit script: %v", es)
}
if es.LenY() != len(y) {
t.Errorf("es.LenY = %d, want %d", es.LenY(), len(y))
}
if !validateScript(x, y, es) {
t.Errorf("invalid edit script: %v", es)
}
return es
}
Expand Down