Source file src/cmd/vendor/golang.org/x/tools/internal/diff/lcs/old.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package lcs
     6  
     7  // TODO(adonovan): remove unclear references to "old" in this package.
     8  
     9  import (
    10  	"fmt"
    11  )
    12  
    13  // A Diff is a replacement of a portion of A by a portion of B.
    14  type Diff struct {
    15  	Start, End         int // offsets of portion to delete in A
    16  	ReplStart, ReplEnd int // offset of replacement text in B
    17  }
    18  
    19  // DiffBytes returns the differences between two byte sequences.
    20  // It does not respect rune boundaries.
    21  func DiffBytes(a, b []byte) []Diff { return diff(bytesSeqs{a, b}) }
    22  
    23  // DiffRunes returns the differences between two rune sequences.
    24  func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) }
    25  
    26  func diff(seqs sequences) []Diff {
    27  	// A limit on how deeply the LCS algorithm should search. The value is just a guess.
    28  	const maxDiffs = 100
    29  	diff, _ := compute(seqs, twosided, maxDiffs/2)
    30  	return diff
    31  }
    32  
    33  // compute computes the list of differences between two sequences,
    34  // along with the LCS. It is exercised directly by tests.
    35  // The algorithm is one of {forward, backward, twosided}.
    36  func compute(seqs sequences, algo func(*editGraph) lcs, limit int) ([]Diff, lcs) {
    37  	if limit <= 0 {
    38  		limit = 1 << 25 // effectively infinity
    39  	}
    40  	alen, blen := seqs.lengths()
    41  	g := &editGraph{
    42  		seqs:  seqs,
    43  		vf:    newtriang(limit),
    44  		vb:    newtriang(limit),
    45  		limit: limit,
    46  		ux:    alen,
    47  		uy:    blen,
    48  		delta: alen - blen,
    49  	}
    50  	lcs := algo(g)
    51  	diffs := lcs.toDiffs(alen, blen)
    52  	return diffs, lcs
    53  }
    54  
    55  // editGraph carries the information for computing the lcs of two sequences.
    56  type editGraph struct {
    57  	seqs   sequences
    58  	vf, vb label // forward and backward labels
    59  
    60  	limit int // maximal value of D
    61  	// the bounding rectangle of the current edit graph
    62  	lx, ly, ux, uy int
    63  	delta          int // common subexpression: (ux-lx)-(uy-ly)
    64  }
    65  
    66  // toDiffs converts an LCS to a list of edits.
    67  func (lcs lcs) toDiffs(alen, blen int) []Diff {
    68  	var diffs []Diff
    69  	var pa, pb int // offsets in a, b
    70  	for _, l := range lcs {
    71  		if pa < l.X || pb < l.Y {
    72  			diffs = append(diffs, Diff{pa, l.X, pb, l.Y})
    73  		}
    74  		pa = l.X + l.Len
    75  		pb = l.Y + l.Len
    76  	}
    77  	if pa < alen || pb < blen {
    78  		diffs = append(diffs, Diff{pa, alen, pb, blen})
    79  	}
    80  	return diffs
    81  }
    82  
    83  // --- FORWARD ---
    84  
    85  // fdone decides if the forward path has reached the upper right
    86  // corner of the rectangle. If so, it also returns the computed lcs.
    87  func (e *editGraph) fdone(D, k int) (bool, lcs) {
    88  	// x, y, k are relative to the rectangle
    89  	x := e.vf.get(D, k)
    90  	y := x - k
    91  	if x == e.ux && y == e.uy {
    92  		return true, e.forwardlcs(D, k)
    93  	}
    94  	return false, nil
    95  }
    96  
    97  // run the forward algorithm, until success or up to the limit on D.
    98  func forward(e *editGraph) lcs {
    99  	e.setForward(0, 0, e.lx)
   100  	if ok, ans := e.fdone(0, 0); ok {
   101  		return ans
   102  	}
   103  	// from D to D+1
   104  	for D := range e.limit {
   105  		e.setForward(D+1, -(D + 1), e.getForward(D, -D))
   106  		if ok, ans := e.fdone(D+1, -(D + 1)); ok {
   107  			return ans
   108  		}
   109  		e.setForward(D+1, D+1, e.getForward(D, D)+1)
   110  		if ok, ans := e.fdone(D+1, D+1); ok {
   111  			return ans
   112  		}
   113  		for k := -D + 1; k <= D-1; k += 2 {
   114  			// these are tricky and easy to get backwards
   115  			lookv := e.lookForward(k, e.getForward(D, k-1)+1)
   116  			lookh := e.lookForward(k, e.getForward(D, k+1))
   117  			if lookv > lookh {
   118  				e.setForward(D+1, k, lookv)
   119  			} else {
   120  				e.setForward(D+1, k, lookh)
   121  			}
   122  			if ok, ans := e.fdone(D+1, k); ok {
   123  				return ans
   124  			}
   125  		}
   126  	}
   127  	// D is too large
   128  	// find the D path with maximal x+y inside the rectangle and
   129  	// use that to compute the found part of the lcs
   130  	kmax := -e.limit - 1
   131  	diagmax := -1
   132  	for k := -e.limit; k <= e.limit; k += 2 {
   133  		x := e.getForward(e.limit, k)
   134  		y := x - k
   135  		if x+y > diagmax && x <= e.ux && y <= e.uy {
   136  			diagmax, kmax = x+y, k
   137  		}
   138  	}
   139  	return e.forwardlcs(e.limit, kmax)
   140  }
   141  
   142  // recover the lcs by backtracking from the farthest point reached
   143  func (e *editGraph) forwardlcs(D, k int) lcs {
   144  	var ans lcs
   145  	for x := e.getForward(D, k); x != 0 || x-k != 0; {
   146  		if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) {
   147  			// if (x-1,y) is labelled D-1, x--,D--,k--,continue
   148  			D, k, x = D-1, k-1, x-1
   149  			continue
   150  		} else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) {
   151  			// if (x,y-1) is labelled D-1, x, D--,k++, continue
   152  			D, k = D-1, k+1
   153  			continue
   154  		}
   155  		// if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue
   156  		y := x - k
   157  		ans = ans.prepend(x+e.lx-1, y+e.ly-1)
   158  		x--
   159  	}
   160  	return ans
   161  }
   162  
   163  // start at (x,y), go up the diagonal as far as possible,
   164  // and label the result with d
   165  func (e *editGraph) lookForward(k, relx int) int {
   166  	rely := relx - k
   167  	x, y := relx+e.lx, rely+e.ly
   168  	if x < e.ux && y < e.uy {
   169  		x += e.seqs.commonPrefixLen(x, e.ux, y, e.uy)
   170  	}
   171  	return x
   172  }
   173  
   174  func (e *editGraph) setForward(d, k, relx int) {
   175  	x := e.lookForward(k, relx)
   176  	e.vf.set(d, k, x-e.lx)
   177  }
   178  
   179  func (e *editGraph) getForward(d, k int) int {
   180  	x := e.vf.get(d, k)
   181  	return x
   182  }
   183  
   184  // --- BACKWARD ---
   185  
   186  // bdone decides if the backward path has reached the lower left corner
   187  func (e *editGraph) bdone(D, k int) (bool, lcs) {
   188  	// x, y, k are relative to the rectangle
   189  	x := e.vb.get(D, k)
   190  	y := x - (k + e.delta)
   191  	if x == 0 && y == 0 {
   192  		return true, e.backwardlcs(D, k)
   193  	}
   194  	return false, nil
   195  }
   196  
   197  // run the backward algorithm, until success or up to the limit on D.
   198  // (used only by tests)
   199  func backward(e *editGraph) lcs {
   200  	e.setBackward(0, 0, e.ux)
   201  	if ok, ans := e.bdone(0, 0); ok {
   202  		return ans
   203  	}
   204  	// from D to D+1
   205  	for D := range e.limit {
   206  		e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
   207  		if ok, ans := e.bdone(D+1, -(D + 1)); ok {
   208  			return ans
   209  		}
   210  		e.setBackward(D+1, D+1, e.getBackward(D, D))
   211  		if ok, ans := e.bdone(D+1, D+1); ok {
   212  			return ans
   213  		}
   214  		for k := -D + 1; k <= D-1; k += 2 {
   215  			// these are tricky and easy to get wrong
   216  			lookv := e.lookBackward(k, e.getBackward(D, k-1))
   217  			lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
   218  			if lookv < lookh {
   219  				e.setBackward(D+1, k, lookv)
   220  			} else {
   221  				e.setBackward(D+1, k, lookh)
   222  			}
   223  			if ok, ans := e.bdone(D+1, k); ok {
   224  				return ans
   225  			}
   226  		}
   227  	}
   228  
   229  	// D is too large
   230  	// find the D path with minimal x+y inside the rectangle and
   231  	// use that to compute the part of the lcs found
   232  	kmax := -e.limit - 1
   233  	diagmin := 1 << 25
   234  	for k := -e.limit; k <= e.limit; k += 2 {
   235  		x := e.getBackward(e.limit, k)
   236  		y := x - (k + e.delta)
   237  		if x+y < diagmin && x >= 0 && y >= 0 {
   238  			diagmin, kmax = x+y, k
   239  		}
   240  	}
   241  	if kmax < -e.limit {
   242  		panic(fmt.Sprintf("no paths when limit=%d?", e.limit))
   243  	}
   244  	return e.backwardlcs(e.limit, kmax)
   245  }
   246  
   247  // recover the lcs by backtracking
   248  func (e *editGraph) backwardlcs(D, k int) lcs {
   249  	var ans lcs
   250  	for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; {
   251  		if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) {
   252  			// D--, k--, x unchanged
   253  			D, k = D-1, k-1
   254  			continue
   255  		} else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) {
   256  			// D--, k++, x++
   257  			D, k, x = D-1, k+1, x+1
   258  			continue
   259  		}
   260  		y := x - (k + e.delta)
   261  		ans = ans.append(x+e.lx, y+e.ly)
   262  		x++
   263  	}
   264  	return ans
   265  }
   266  
   267  // start at (x,y), go down the diagonal as far as possible,
   268  func (e *editGraph) lookBackward(k, relx int) int {
   269  	rely := relx - (k + e.delta) // forward k = k + e.delta
   270  	x, y := relx+e.lx, rely+e.ly
   271  	if x > 0 && y > 0 {
   272  		x -= e.seqs.commonSuffixLen(0, x, 0, y)
   273  	}
   274  	return x
   275  }
   276  
   277  // convert to rectangle, and label the result with d
   278  func (e *editGraph) setBackward(d, k, relx int) {
   279  	x := e.lookBackward(k, relx)
   280  	e.vb.set(d, k, x-e.lx)
   281  }
   282  
   283  func (e *editGraph) getBackward(d, k int) int {
   284  	x := e.vb.get(d, k)
   285  	return x
   286  }
   287  
   288  // -- TWOSIDED ---
   289  
   290  func twosided(e *editGraph) lcs {
   291  	// The termination condition could be improved, as either the forward
   292  	// or backward pass could succeed before Myers' Lemma applies.
   293  	// Aside from questions of efficiency (is the extra testing cost-effective)
   294  	// this is more likely to matter when e.limit is reached.
   295  	e.setForward(0, 0, e.lx)
   296  	e.setBackward(0, 0, e.ux)
   297  
   298  	// from D to D+1
   299  	for D := range e.limit {
   300  		// just finished a backwards pass, so check
   301  		if got, ok := e.twoDone(D, D); ok {
   302  			return e.twolcs(D, D, got)
   303  		}
   304  		// do a forwards pass (D to D+1)
   305  		e.setForward(D+1, -(D + 1), e.getForward(D, -D))
   306  		e.setForward(D+1, D+1, e.getForward(D, D)+1)
   307  		for k := -D + 1; k <= D-1; k += 2 {
   308  			// these are tricky and easy to get backwards
   309  			lookv := e.lookForward(k, e.getForward(D, k-1)+1)
   310  			lookh := e.lookForward(k, e.getForward(D, k+1))
   311  			if lookv > lookh {
   312  				e.setForward(D+1, k, lookv)
   313  			} else {
   314  				e.setForward(D+1, k, lookh)
   315  			}
   316  		}
   317  		// just did a forward pass, so check
   318  		if got, ok := e.twoDone(D+1, D); ok {
   319  			return e.twolcs(D+1, D, got)
   320  		}
   321  		// do a backward pass, D to D+1
   322  		e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
   323  		e.setBackward(D+1, D+1, e.getBackward(D, D))
   324  		for k := -D + 1; k <= D-1; k += 2 {
   325  			// these are tricky and easy to get wrong
   326  			lookv := e.lookBackward(k, e.getBackward(D, k-1))
   327  			lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
   328  			if lookv < lookh {
   329  				e.setBackward(D+1, k, lookv)
   330  			} else {
   331  				e.setBackward(D+1, k, lookh)
   332  			}
   333  		}
   334  	}
   335  
   336  	// D too large. combine a forward and backward partial lcs
   337  	// first, a forward one
   338  	kmax := -e.limit - 1
   339  	diagmax := -1
   340  	for k := -e.limit; k <= e.limit; k += 2 {
   341  		x := e.getForward(e.limit, k)
   342  		y := x - k
   343  		if x+y > diagmax && x <= e.ux && y <= e.uy {
   344  			diagmax, kmax = x+y, k
   345  		}
   346  	}
   347  	if kmax < -e.limit {
   348  		panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit))
   349  	}
   350  	lcs := e.forwardlcs(e.limit, kmax)
   351  	// now a backward one
   352  	// find the D path with minimal x+y inside the rectangle and
   353  	// use that to compute the lcs
   354  	diagmin := 1 << 25 // infinity
   355  	for k := -e.limit; k <= e.limit; k += 2 {
   356  		x := e.getBackward(e.limit, k)
   357  		y := x - (k + e.delta)
   358  		if x+y < diagmin && x >= 0 && y >= 0 {
   359  			diagmin, kmax = x+y, k
   360  		}
   361  	}
   362  	if kmax < -e.limit {
   363  		panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit))
   364  	}
   365  	lcs = append(lcs, e.backwardlcs(e.limit, kmax)...)
   366  	// These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs)
   367  	ans := lcs.fix()
   368  	return ans
   369  }
   370  
   371  // Does Myers' Lemma apply?
   372  func (e *editGraph) twoDone(df, db int) (int, bool) {
   373  	if (df+db+e.delta)%2 != 0 {
   374  		return 0, false // diagonals cannot overlap
   375  	}
   376  	kmin := max(-df, -db+e.delta)
   377  	kmax := min(df, db+e.delta)
   378  	for k := kmin; k <= kmax; k += 2 {
   379  		x := e.vf.get(df, k)
   380  		u := e.vb.get(db, k-e.delta)
   381  		if u <= x {
   382  			// is it worth looking at all the other k?
   383  			for l := k; l <= kmax; l += 2 {
   384  				x := e.vf.get(df, l)
   385  				y := x - l
   386  				u := e.vb.get(db, l-e.delta)
   387  				v := u - l
   388  				if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux {
   389  					return l, true
   390  				}
   391  			}
   392  			return k, true
   393  		}
   394  	}
   395  	return 0, false
   396  }
   397  
   398  func (e *editGraph) twolcs(df, db, kf int) lcs {
   399  	// db==df || db+1==df
   400  	x := e.vf.get(df, kf)
   401  	y := x - kf
   402  	kb := kf - e.delta
   403  	u := e.vb.get(db, kb)
   404  	v := u - kf
   405  
   406  	// Myers proved there is a df-path from (0,0) to (u,v)
   407  	// and a db-path from (x,y) to (N,M).
   408  	// In the first case the overall path is the forward path
   409  	// to (u,v) followed by the backward path to (N,M).
   410  	// In the second case the path is the backward path to (x,y)
   411  	// followed by the forward path to (x,y) from (0,0).
   412  
   413  	// Look for some special cases to avoid computing either of these paths.
   414  	if x == u {
   415  		// "babaab" "cccaba"
   416  		// already patched together
   417  		lcs := e.forwardlcs(df, kf)
   418  		lcs = append(lcs, e.backwardlcs(db, kb)...)
   419  		return lcs.sort()
   420  	}
   421  
   422  	// is (u-1,v) or (u,v-1) labelled df-1?
   423  	// if so, that forward df-1-path plus a horizontal or vertical edge
   424  	// is the df-path to (u,v), then plus the db-path to (N,M)
   425  	if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 {
   426  		//  "aabbab" "cbcabc"
   427  		lcs := e.forwardlcs(df-1, u-1-v)
   428  		lcs = append(lcs, e.backwardlcs(db, kb)...)
   429  		return lcs.sort()
   430  	}
   431  	if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u {
   432  		//  "abaabb" "bcacab"
   433  		lcs := e.forwardlcs(df-1, u-(v-1))
   434  		lcs = append(lcs, e.backwardlcs(db, kb)...)
   435  		return lcs.sort()
   436  	}
   437  
   438  	// The path can't possibly contribute to the lcs because it
   439  	// is all horizontal or vertical edges
   440  	if u == 0 || v == 0 || x == e.ux || y == e.uy {
   441  		// "abaabb" "abaaaa"
   442  		if u == 0 || v == 0 {
   443  			return e.backwardlcs(db, kb)
   444  		}
   445  		return e.forwardlcs(df, kf)
   446  	}
   447  
   448  	// is (x+1,y) or (x,y+1) labelled db-1?
   449  	if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 {
   450  		// "bababb" "baaabb"
   451  		lcs := e.backwardlcs(db-1, kb+1)
   452  		lcs = append(lcs, e.forwardlcs(df, kf)...)
   453  		return lcs.sort()
   454  	}
   455  	if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x {
   456  		// "abbbaa" "cabacc"
   457  		lcs := e.backwardlcs(db-1, kb-1)
   458  		lcs = append(lcs, e.forwardlcs(df, kf)...)
   459  		return lcs.sort()
   460  	}
   461  
   462  	// need to compute another path
   463  	// "aabbaa" "aacaba"
   464  	lcs := e.backwardlcs(db, kb)
   465  	oldx, oldy := e.ux, e.uy
   466  	e.ux = u
   467  	e.uy = v
   468  	lcs = append(lcs, forward(e)...)
   469  	e.ux, e.uy = oldx, oldy
   470  	return lcs.sort()
   471  }
   472  

View as plain text