28 enum { minLengthToMatch = 3,
29 maxComplexity = 16 * 1024 * 1024 };
37 : text (t), start (s), length (len) {}
39 void incrementStart()
noexcept { ++text; ++start; --length; }
54 static void addDeletion (
TextDiff& td,
int index,
int length)
62 static void diffSkippingCommonStart (TextDiff& td, StringRegion a, StringRegion b)
69 if (ca != cb || ca == 0)
76 diffRecursively (td, a, b);
79 static void diffRecursively (TextDiff& td, StringRegion a, StringRegion b)
81 int indexA = 0, indexB = 0;
82 auto len = findLongestCommonSubstring (a.text, a.length, indexA,
83 b.text, b.length, indexB);
85 if (len >= minLengthToMatch)
87 if (indexA > 0 && indexB > 0)
88 diffSkippingCommonStart (td, StringRegion (a.text, a.start, indexA),
89 StringRegion (b.text, b.start, indexB));
91 addDeletion (td, b.start, indexA);
93 addInsertion (td, b.text, b.start, indexB);
95 diffRecursively (td, StringRegion (a.text + (indexA + len), a.start + indexA + len, a.length - indexA - len),
96 StringRegion (b.text + (indexB + len), b.start + indexB + len, b.length - indexB - len));
100 if (a.length > 0) addDeletion (td, b.start, a.length);
101 if (b.length > 0) addInsertion (td, b.text, b.start, b.length);
108 if (lenA == 0 || lenB == 0)
111 if (lenA * lenB > maxComplexity)
112 return findCommonSuffix (a, lenA, indexInA,
115 auto scratchSpace =
sizeof (int) * (2 + 2 * (
size_t) lenB);
117 if (scratchSpace < 4096)
119 auto* scratch = (
int*) alloca (scratchSpace);
120 return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
123 HeapBlock<int> scratch (scratchSpace);
124 return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
129 const size_t scratchSpace,
int*
const lines)
noexcept
131 zeromem (lines, scratchSpace);
134 auto* l1 = l0 + lenB + 1;
136 int loopsWithoutImprovement = 0;
139 for (
int i = 0; i < lenA; ++i)
141 auto ca = a.getAndAdvance();
144 for (
int j = 0; j < lenB; ++j)
146 if (ca != b2.getAndAdvance())
152 auto len = l0[j] + 1;
155 if (len > bestLength)
157 loopsWithoutImprovement = 0;
165 if (++loopsWithoutImprovement > 100)
171 indexInA -= bestLength - 1;
172 indexInB -= bestLength - 1;
183 while (length < lenA && length < lenB && *a == *b)
190 indexInA = lenA - length;
191 indexInB = lenB - length;
198 TextDiffHelpers::diffSkippingCommonStart (*
this, original, target);
216 return text.replaceSection (start, length, insertedText);
226 DiffTests() :
UnitTest (
"TextDiff class",
"Text") {}
230 juce_wchar buffer[500] = { 0 };
232 for (
int i = r.
nextInt (numElementsInArray (buffer) - 1); --i >= 0;)
238 buffer[i] = (juce_wchar) (1 + r.
nextInt (0x10ffff - 1));
240 while (! CharPointer_UTF16::canRepresent (buffer[i]));
243 buffer[i] = (juce_wchar) (
'a' + r.
nextInt (3));
246 return CharPointer_UTF32 (buffer);
249 void testDiff (
const String& a,
const String& b)
251 TextDiff diff (a, b);
252 auto result = diff.appliedTo (a);
253 expectEquals (result, b);
256 void runTest()
override
258 beginTest (
"TextDiff");
260 auto r = getRandom();
262 testDiff (String(), String());
263 testDiff (
"x", String());
264 testDiff (String(),
"x");
267 testDiff (
"xxx",
"x");
268 testDiff (
"x",
"xxx");
270 for (
int i = 1000; --i >= 0;)
272 auto s = createString (r);
273 testDiff (s, createString (r));
274 testDiff (s + createString (r), s + createString (r));
279static DiffTests diffTests;
Wraps a pointer to a null-terminated UTF-8 character string, and provides various methods to operate ...
A random number generator.
int nextInt() noexcept
Returns the next random 32 bit integer.
CharPointerType getCharPointer() const noexcept
Returns the character pointer currently being used to store this string.
int length() const noexcept
Returns the number of characters in the string.
bool isEmpty() const noexcept
Returns true if the string contains no characters.
CharPointer_UTF8 CharPointerType
This is the character encoding type used internally to store the string.
Calculates and applies a sequence of changes to convert one text string into another.
TextDiff(const String &original, const String &target)
Creates a set of diffs for converting the original string into the target.
Array< Change > changes
The list of changes required to perform the transformation.
String appliedTo(String text) const
Applies this sequence of changes to the original string, producing the target string that was specifi...
This is a base class for classes that perform a unit test.
Describes a change, which can be either an insertion or deletion.
String appliedTo(const String &original) const noexcept
Returns the result of applying this change to a string.
int length
If this change is a deletion, this specifies the number of characters to delete.
int start
Specifies the character index in a string at which text should be inserted or deleted.
String insertedText
If this change is a deletion, this string will be empty; otherwise, it'll be the text that should be ...
bool isDeletion() const noexcept
Returns true if this change is a deletion, or false for an insertion.