Forum > FPC development

CompareText improvement

(1/7) > >>

edgarrod71:
Hi, I'm always trying to improve the language I love, so I want to share a faster function for comparing strings, would you add it to sysstrh.inc? 


--- Code: Pascal  [+][-]window.onload = function(){var x1 = document.getElementById("main_content_section"); if (x1) { var x = document.getElementsByClassName("geshi");for (var i = 0; i < x.length; i++) { x[i].style.maxHeight='none'; x[i].style.height = Math.min(x[i].clientHeight+15,306)+'px'; x[i].style.resize = "vertical";}};} ---type  TFoT = (Falso, Verdadero); class function CompareText(const S1, S2: string): integer; inline;  // sysstrh.incvar  i, count, count1, count2: integer; Chr1, Chr2: byte;  P1, P2: PChar;begin  Count1 := Length(S1);         // 5  Count2 := Length(S2);         // 5  if (Count1>Count2) then       // 3    Count := Count2             // 3  else    Count := Count1;            // 2  i := 0;                       // 1 mov %eax, -0x18(%rbp)  if count>0 then               // 2 ____ tot 21     begin      P1 := @S1[1];             // 2      P2 := @S2[1];             // 2      while i < Count do        // 2        begin          Chr1 := byte(p1^);    // 3          Chr2 := byte(p2^);    // 3          if Chr1 <> Chr2 then  // 3            begin              if Chr1 in [97..122] then  // 4                dec(Chr1,32);            // 1 subb              if Chr2 in [97..122] then  // 4                dec(Chr2,32);            // 1              if Chr1 <> Chr2 then       // 3 Break inplicit jmp?                Break;            end;          Inc(P1); Inc(P2); Inc(I);      // 3        end;    end;  if i < Count then   // simply rest?    // 3    result := Chr1-Chr2                  // 5  else    result := count1-count2;             // 4   tot 64 asm instructionsend; Class function TextComp(const S1, S2: string): boolean; inline;var L1, L2: integer;  i: integer = 0;  P1, P2: PChar;  Chr1, Chr2: byte;begin  L1 := length(S1);                  // 5  L2 := length(S2);                  // 5  Result := L1 = L2;                 // 3  if (L1 > 0) and Result then begin  // 4 Tot 17    P1 := @S1[1];                    // 2    P2 := @S2[1];                    // 2    repeat      Chr1 := ord(P1^);              // 3      Chr2 := ord(P2^);              // 3      if Chr1 <> Chr2 then begin     // 3        if Chr1 in [$41..$5A] then   // 4          Chr1 := Chr1 or $20;       // 1 orb        if Chr2 in [$41..$5A] then   // 4          Chr2 := Chr2 or $20;       // 1        if Chr1 <> Chr2 then begin   // 3          Result := False;           // 1          Exit;                      // 1        end;      end;      inc(i); inc(P1); inc(P2);   // 3    until i >= L1;                // 3    tot 51  end;end; const Maxi = 10000000;var  S1: string = 'FrEe PaScAl and DELPHI are easier THAN C++';  S2: String = 'fReE pAsCaL AND delphi ARE EASIER than c++';  i, j: integer;  B: boolean;  t: longint;  S: String;initialization  writeln(Format('Comparing S1=''%s'' and S2=''%s''', [S1, S2]));   t := GetTickCount64();  for i := -Maxi to Maxi do    j := CompareText(S1, S2);  t := GetTickCount64() - t;  writeln(Format('CompareText: %d, %d ms', [j, t]));   t := GetTickCount64();  for i := -Maxi to Maxi do    B := TextComp(S1, S2);  t := GetTickCount64() - t;  WriteStr(S, TFoT(B));  writeln(Format('TextComp: %s, %d ms', [S, t]));   halt(0);end.
--- Benchmarks ---

--- Code: Text  [+][-]window.onload = function(){var x1 = document.getElementById("main_content_section"); if (x1) { var x = document.getElementsByClassName("geshi");for (var i = 0; i < x.length; i++) { x[i].style.maxHeight='none'; x[i].style.height = Math.min(x[i].clientHeight+15,306)+'px'; x[i].style.resize = "vertical";}};} ---Comparing S1='FrEe PaScAl and DELPHI are easier THAN C++' and S2='fReE pAsCaL AND delphi ARE EASIER than c++'CompareText: 0, 10117 msTextComp: Verdadero, 8839 ms
more than 10% faster!

if you add this function, please, mention my email somewhere: edgarrod71@gmail.com

marcov:
The standard comparetext on the Windows target supports casing of accents and other special letters.

Martin_fr:
Somewhere in LazUtils there are some similar functions "Compare....Fast"

The idea was to compare like you do, until you hit a unicode char > 127.
So when comparing terms from the English language you would benefit.

The problem is even that approach causes failures. (and I am not sure if they are currently fixed).

One issue is, that those comparisons could not be used for sorting (even if sort order did not matter). Because they were not transitive.
They may have reported that
- String2 goes AFTER String1
- String3 goes AFTER String2
- String3 goes BEFORE String1
And sorting can not fulfil the last statement.

Some of that was fix-able (but not sure if currently fixed).
Other issues (don't recall their nature) might not be fixable.

So those functions can be used for equality check, but not sorting/ordering.

Stefan Glienke:
If that CompareText is a copy from the RTL then there is more to be gained than just 10%  8-)

Comparing byte by byte will not get you anywhere performance-wise.

For reference: https://fastcode.sourceforge.net/challenge_content/CompareText.html

edgarrod71:
@Stefan Glienke 10% in everything is really an improvement.  When you get any discount 10% on expensive things you start to smile... ;)   

By the way, where is your attachment in toll lazarus version?

@Martin_fr  I know that, it's simply a replacement for CompareText or an option to it.

Navigation

[0] Message Index

[#] Next page

Go to full version