uses
lazUTF8;
// First index is 1, last index is Length(Str)
procedure UTF8DiffBytePos(Str1, Str2: string; var Start1, Start2: integer; Reverse: boolean = False);
var
b: byte;
begin
if Reverse then begin
while (Start1 >= 1) and (Start2 >= 1) and (Str1[Start1] = Str2[Start2]) do begin
Dec(Start1);
Dec(Start2);
end;
if Start1 >= 1 then begin
// Check UTF8 Characror leader byte
b := Ord(Str1[Start1]) shr 6;
while (b <> 3) and (b shr 1 <> 0) do begin
Dec(Start1);
b := Ord(Str1[Start1]) shr 6;
end;
end;
if Start2 >= 1 then begin
// Check UTF8 Characror leader byte
b := Ord(Str2[Start2]) shr 6;
while (b <> 3) and (b shr 1 <> 0) do begin
Dec(Start2);
b := Ord(Str2[Start2]) shr 6;
end;
end;
end else begin
while (Start1 <= Str1.Length) and (Start2 <= Str2.Length) and (Str1[Start1] = Str2[Start2]) do begin
Inc(Start1);
Inc(Start2);
end;
if Start1 <= Str1.Length then begin
// Check UTF8 Characror leader byte
b := Ord(Str1[Start1]) shr 6;
while (b <> 3) and (b shr 1 <> 0) do begin
Dec(Start1);
b := Ord(Str1[Start1]) shr 6;
end;
end;
if Start2 <= Str2.Length then begin
// Check UTF8 Characror leader byte
b := Ord(Str2[Start2]) shr 6;
while (b <> 3) and (b shr 1 <> 0) do begin
Dec(Start2);
b := Ord(Str2[Start2]) shr 6;
end;
end;
end;
end;
// First index is 1, last index is UTF8Length(Str)
procedure UTF8Diff(Str1, Str2: string; var Start1, Start2: integer; Reverse: boolean = False);
begin
if not Reverse then begin
Dec(Start1);
Dec(Start2);
end;
Start1 := UTF8CharToByteIndex(PChar(Str1), Str1.Length, Start1);
Start2 := UTF8CharToByteIndex(PChar(Str2), Str2.Length, Start2);
if not Reverse then begin
Inc(Start1);
Inc(Start2);
end;
UTF8DiffBytePos(Str1, Str2, Start1, Start2, Reverse);
if Start1 > 0 then Start1 := UTF8LengthFast(PChar(Str1), Start1 - 1) + 1;
if Start2 > 0 then Start2 := UTF8LengthFast(PChar(Str2), Start2 - 1) + 1;
end;
procedure Test;
var
Str1: string = '一二三四五六七八九十1234567890一二三四五六七八九十1234567890';
Str2: string = '五六七八九十1234567890一二三四1234567890';
Pos1, Pos2: integer;
i: integer;
Stream: TStringStream;
TickCount: QWord;
begin
Pos1 := 8;
Pos2 := 4;
UTF8Diff(Str1, Str2, Pos1, Pos2);
writeln('Different Pos in Str1: ', Pos1);
writeln('Different Pos in Str2: ', Pos2);
writeln('----------');
Pos1 := 8;
Pos2 := 4;
UTF8Diff(Str1, Str2, Pos1, Pos2, True);
writeln('Different Pos in Str1: ', Pos1);
writeln('Different Pos in Str2: ', Pos2);
writeln('----------');
Stream := TStringStream.Create('');
for i := 1 to 50000 do
Stream.WriteString('一二三四五六七八九十1234567890');
Str1 := Stream.DataString + '尾';
Str2 := Stream.DataString + '巴';
Stream.Free;
Pos1 := 1;
Pos2 := 1;
TickCount := GetTickCount64;
UTF8Diff(Str1, Str2, Pos1, Pos2);
writeln('Time Used: ', GetTickCount64 - TickCount);
writeln('Different Pos in Str1: ', Pos1);
writeln('Different Pos in Str2: ', Pos2);
end;
{ TForm1 }
procedure TForm1.FormCreate(Sender: TObject);
begin
Test;
end;