Ok,
Here is a UTF8StringReplace that will cope with variant char-width's correctly.
example:
edit1.text := utf8stringReplace('ii_abcDEfghIIjklmn_iI','İi','舒淇',[rfignorecase,rfReplaceAll]);
result:
舒淇_abcDEfgh舒淇jklmn_舒淇
type TUTF8Indexed = record
charWidths:array of byte;
rawData:array of DWord;
end;
function UTF8ToUTF8Indexed(const Src:string):TUTF8Indexed;
var
charLen:PtrInt;
p,pe:pchar;
cw,i:integer;
begin
charLen := UTF8Length(Src);
setlength(result.charWidths,charLen);
setlength(result.rawData,charlen);
//fill our raw data with 0's, so that compares will work when
//out UF8, have different sizes.
FillByte(result.rawData[0],sizeof(DWord),0);
p := @Src[1];
pe := @Src[length(src)];
i:=0;
while p <= pe do
begin
cw := UTF8CharacterLength(p);
result.charwidths[i] := cw;
move(p^,result.rawData[i],cw);
inc(p,cw);
inc(i);
end;
end;
function UTF8IndexedToUTF8(const Src:TUTF8Indexed):string;
var
uPos,iPos,len,cc:PtrInt;
begin
//first set our result to maximum size possible,
cc := length(Src.rawData);
setlength(result,4*cc);
uPos := 0;
for iPos := 0 to cc-1 do
begin
move(src.rawData[iPos],result[upos+1],src.charWidths[iPos]);
inc(upos,src.charWidths[ipos]);
end;
setlength(result,uPos);
end;
function UTF8StringReplace(const S, OldPattern, NewPattern: string; Flags: TReplaceFlags): string;
var
osrc,src,search:TUTF8Indexed;
outpos,lpTextLen,lpSearchLen,lpTextEnd,lpTextPos:PtrInt;
const
//constanst for string expansion, basically lets start at 128 bytes, then
//increase double each time and max out at 32K.
SizeResultStart=128;
SizeResultMaxInc=32*1024;
//
procedure NeedSize(s:PtrInt);
var
cl,ns:PtrInt;
begin
//use our constants to control string expansion
cl := length(result);
if cl>=s then exit;
ns := cl;
if ns > SizeResultMaxInc then ns := SizeResultMaxInc;
setlength(result,cl+ns);
end;
//
procedure CopyNextChar;
begin
NeedSize(outpos+src.charWidths[lpTextPos]);
move(osrc.rawData[lpTextPos],result[outpos+1],osrc.charWidths[lpTextPos]);
inc(outpos);
inc(lpTextPos);
end;
//
function AllEqual:boolean;
var
lp:PtrInt;
begin
result := false;
for lp := 1 to length(search.rawData)-1 do
begin
if search.RawData[lp] <> src.rawData[lpTextPos+lp] then exit;
end;
result := true;
end;
//
begin
if rfIgnoreCase in Flags then begin
//this is our ignore case, so lets lowercase search & text
osrc := UTF8ToUTF8Indexed(S);
src := UTF8ToUTF8Indexed(UTF8LowerCase(S));
//osrc and src should be same size, lets double check
if length(osrc.rawData)<>length(src.rawData) then
raise Exception.Create('Lengths not equal');
search := UTF8ToUTF8Indexed(UTF8LowerCase(OldPattern));
//if there is nothing in our search, then there is nothing to replace
if length(search.rawData) < 1 then begin
result := S;
exit;
end;
//ok mow have src & search in lowercase so our finds will work
setlength(result,SizeResultStart);
outpos := 0;
lpTextLen := length(src.charWidths);
lpSearchLen := length(search.charWidths);
lpTextEnd := lpTextLen-lpSearchLen+1;
lpTextPos := 0;
//let's find our first char match
while lpTextPos < lpTextEnd do
begin
if src.rawData[lpTextPos]=search.rawData[0] then
begin
//we have a match, are the rest equal.
if AllEqual then
begin
if length(newPattern)>0 then
begin
NeedSize(outpos+length(newPattern));
move(newPattern[1],result[outpos+1],length(newPattern));
inc(outpos,length(newPattern));
end;
inc(lpTextPos,length(search.rawData));
if rfReplaceAll in Flags then continue
else break;
end;
end;
//if we get here, there was no match, just copy char to output
CopyNextChar;
end;
//grab any remaining..
while lpTextPos < lpTextLen do CopyNextChar;
//resize result, to correct size.
setlength(result,outpos);
end else begin
//if were not bothered about case, the standard StringReplace should work.
result := StringReplace(S,OldPattern,NewPattern,Flags);
end;
end;