Recent

Author Topic: Contest: fastest IsAnagram function  (Read 18453 times)

Bart

  • Hero Member
  • *****
  • Posts: 5706
    • Bart en Mariska's Webstek
Re: Contest: fastest IsAnagram function
« Reply #60 on: October 24, 2024, 10:24:50 pm »
And also here is my approach to it:

This fails my validityest:
Code: [Select]
TestValidity for Warfly
FAIL: valid anagram rejected (with IgnoreSpaces=TRUE):
S1: "1234567890"
S2: "0 1 2 3 4 5 6 7 8 9 "
Validitycheck FAIL for Warfly
These are anagrams and your code rejects them.

Bart

MathMan

  • Sr. Member
  • ****
  • Posts: 473
Re: Contest: fastest IsAnagram function
« Reply #61 on: October 24, 2024, 10:27:36 pm »
Here's mine. I focussed on minimising memory access and providing better chances for parallelism on modern cores. However - my solution may fail if strings are longer than 255 byte.

And I agree with some others that the test strings are very short. I think martin_fr mentioned that first but also Warfley did.

Code: Pascal  [Select][+][-]
  1. type
  2.   tMap = array[ 0..255 ] of UInt8;
  3.   tFreq = array[ 0..127 ] of UInt8;
  4.  
  5. const
  6.   Mapping: array [ 0..255 ] of UInt8 =
  7.   (
  8.     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
  9.     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
  10.     $20,$21,$22,$23,$24,$25,$26,$27,$28,$29,$2a,$2b,$2c,$2d,$2e,$2f,
  11.     $30,$31,$32,$33,$34,$35,$36,$37,$38,$39,$3a,$3b,$3c,$3d,$3e,$3f,
  12.     $40,$41,$42,$43,$44,$45,$46,$47,$48,$49,$4a,$4b,$4c,$4d,$4e,$4f,
  13.     $50,$51,$52,$53,$54,$55,$56,$57,$58,$59,$5a,$5b,$5c,$5d,$5e,$5f,
  14.     $60,$41,$42,$43,$44,$45,$46,$47,$48,$49,$4a,$4b,$4c,$4d,$4e,$4f,
  15.     $50,$51,$52,$53,$54,$55,$56,$57,$58,$59,$5a,$7b,$7c,$7d,$7e,$7f,
  16.     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
  17.     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
  18.     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
  19.     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
  20.     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
  21.     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
  22.     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
  23.     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
  24.   );
  25.  
  26. procedure Histogram( Str: pUInt8; Count: UInt8; constref Map: tMap; var Freq: tFreq );
  27.  
  28. var
  29.   Flag: UInt8;
  30.  
  31.   Part: UInt64;
  32.  
  33. begin
  34.   Flag := UInt8( Count<8 );
  35.   while( Flag=0 ) do
  36.   begin
  37.     Part := Unaligned( pUInt64( Str )^ );
  38.  
  39.     Inc( Freq[ Map[ Part and $ff ] ] );
  40.     Inc( Freq[ Map[ ( Part shr 8 ) and $ff ] ] );
  41.     Inc( Freq[ Map[ ( Part shr 16 ) and $ff ] ] );
  42.     Inc( Freq[ Map[ ( Part shr 24 ) and $ff ] ] );
  43.     Inc( Freq[ Map[ ( Part shr 32 ) and $ff ] ] );
  44.     Inc( Freq[ Map[ ( Part shr 40 ) and $ff ] ] );
  45.     Inc( Freq[ Map[ ( Part shr 48 ) and $ff ] ] );
  46.     Inc( Freq[ Map[ Part shr 56 ] ] );
  47.  
  48.     Dec( Count, 8 );
  49.     Inc( Str, 8 );
  50.     Flag := Freq[ 0 ] + UInt8( Count<8 );
  51.   end;
  52.  
  53.   Flag := UInt8( Count=0 );
  54.   while( Flag=0 ) do
  55.   begin
  56.     Dec( Count );
  57.     Inc( Freq[ Map[ Str^ ] ] );
  58.     Inc( Str );
  59.     Flag := Freq[ 0 ] + UInt8( Count=0 );
  60.   end;
  61. end;
  62.  
  63. // ATTN: the following may fail, if Length( S )>255
  64. function IsAnagramMM(const S1, S2: string; IgnoreSpaces: boolean = True; ExceptionOnError: boolean = False): boolean;
  65.  
  66. var
  67.   Freq1: tFreq;
  68.   Freq2: tFreq;
  69.  
  70. begin
  71.   Freq1 := Default( tFreq );
  72.   Histogram( pUInt8( @S1[ 1 ] ), Length( S1 ), Mapping, Freq1 );
  73.  
  74.   if( Freq1[ 0 ]<>0 ) then
  75.     if ExceptionOnError then
  76.       raise ERangeError.Create( 'IsAnagram: illegal character in S1' )
  77.     else
  78.       Exit( FALSE );
  79.  
  80.   Freq2 := Default( tFreq );
  81.   Histogram( pUInt8( @S2[ 1 ] ), Length( S2 ), Mapping, Freq2 );
  82.  
  83.   if( Freq2[ 0 ]<>0 ) then
  84.     if ExceptionOnError then
  85.       raise ERangeError.Create( 'IsAnagram: illegal character in S2' )
  86.     else
  87.       Exit( FALSE );
  88.  
  89.   if( ( IgnoreSpaces=FALSE ) and ( Freq1[ $20 ]<>Freq2[ $20 ] ) ) then Exit( FALSE );
  90.  
  91.   Freq1[ $20 ] := 0;
  92.   Freq2[ $20 ] := 0;
  93.   Result := CompareDWord( Freq1[ $20 ], Freq2[ $20 ], ( SizeOf( tFreq )-$20 ) div 4 )=0;
  94. end;
  95.  

Bart

  • Hero Member
  • *****
  • Posts: 5706
    • Bart en Mariska's Webstek
Re: Contest: fastest IsAnagram function
« Reply #62 on: October 24, 2024, 10:33:51 pm »
And here is mine.
Sorry to say that it fails the validity check:
Code: [Select]
TestValidity for Martin
FAIL to detect invalid character #128
Validitycheck FAIL for Martin

Bart

Warfley

  • Hero Member
  • *****
  • Posts: 2040
Re: Contest: fastest IsAnagram function
« Reply #63 on: October 24, 2024, 10:43:05 pm »
And also here is my approach to it:

This fails my validityest:
Code: [Select]
TestValidity for Warfly
FAIL: valid anagram rejected (with IgnoreSpaces=TRUE):
S1: "1234567890"
S2: "0 1 2 3 4 5 6 7 8 9 "
Validitycheck FAIL for Warfly
These are anagrams and your code rejects them.

Bart
Yep you're right, was a typo :)
Edited in the original post

Bart

  • Hero Member
  • *****
  • Posts: 5706
    • Bart en Mariska's Webstek
Re: Contest: fastest IsAnagram function
« Reply #64 on: October 24, 2024, 10:58:04 pm »
Results so far:

Code: [Select]
C:\Users\Bart\LazarusProjecten\bugs\forum\anagram>anagram
Testing validity

TestValidity for Bart
Validitycheck OK for Bart

TestValidity for Bart2
Validitycheck OK for Bart2

TestValidity for Warfly
FAIL to detect invalid character #0
Validitycheck FAIL for Warfly

TestValidity for Fibonacci
FAIL: EAccessViolation: Access violation
Validitycheck FAIL for Fibonacci

TestValidity for Serge
Validitycheck OK for Serge

TestValidity for Zvoni
FAIL: EAccessViolation: Access violation
Validitycheck FAIL for Zvoni

TestValidity for Zvoni2
FAIL to detect invalid character #0
Validitycheck FAIL for Zvoni2

TestValidity for Alligator
FAIL to detect invalid character #0
Validitycheck FAIL for Alligator

TestValidity for SilverCoder
FAIL to detect invalid character #128
Validitycheck FAIL for SilverCoder

TestValidity for AVK
FAIL to detect invalid character #128
Validitycheck FAIL for AVK

TestValidity for Martin
FAIL to detect invalid character #128
Validitycheck FAIL for Martin

TestValidity for Dummy
FAIL: erroneously returned as anagram, but they are not (with IgnoreSpaces=True):
S1: "1234567890"
S2: "0 1 2 3 4 5 6 7 8 9!"
Validitycheck FAIL for Dummy



Testing speed
Bart           :   250
Bart2          :   250
Warfly         : Failed validity test
Fibonacci      : Failed validity test
Serge          :   156
Zvoni          : Failed validity test
Zvoni2         : Failed validity test
Alligator      : Failed validity test
SilverCoder    : Failed validity test
AVK            : Failed validity test
Martin         : Failed validity test
Dummy          : Failed validity test

Sorry Warfly: current failure is another one.

So far, only the code of ASerge passes the test.

"Dummy" is a deliberateley wrong implementation, just to test the validity procedure.
Bart2 = Bart, but using NatriveInt

Compiled on Win10-64 using fpc 3.2.2, cross compiled to 64-bit.

Relevant compiler options used:
Code: [Select]
-MObjFPC
-Scghi
-O2
-OoREGVAR
-Xs
-XX
-l
-vewnhibq

Bart

Bart

  • Hero Member
  • *****
  • Posts: 5706
    • Bart en Mariska's Webstek
Re: Contest: fastest IsAnagram function
« Reply #65 on: October 24, 2024, 11:02:09 pm »
B.t.w. the validity test tests:
  • Valid anagrams with IgnoreSpaces=True
  • Valid anagrams with IgnoreSpaces=False (using the same set as above, but removing all whitespace before testing)
  • Invalid anagrams with IgnoreSpaces=True
  • Invalid input (characters < #31 or > #127)

Bart

Warfley

  • Hero Member
  • *****
  • Posts: 2040
Re: Contest: fastest IsAnagram function
« Reply #66 on: October 24, 2024, 11:08:04 pm »
    • Invalid input (characters < #31 or > #127)

    Bart
    What kind of test do you do, because there is no description on what should happen on error, I assumed that error on input = undefined result (it's a great assumption for optimizing code :D). If you expect that error in input = False, then obviously my code will fail. But as far as reading your description in the first post, this does not seem to be a requirement

    Bart

    • Hero Member
    • *****
    • Posts: 5706
      • Bart en Mariska's Webstek
    Re: Contest: fastest IsAnagram function
    « Reply #67 on: October 24, 2024, 11:10:55 pm »
    OK, if input is invalid then depending on the value of ExceptionOnError, the function should either return False or raise an exception (see my sample code in the first post).

    IIRC then I also specified this in post #1.

    Bart

    Bart

    • Hero Member
    • *****
    • Posts: 5706
      • Bart en Mariska's Webstek
    Re: Contest: fastest IsAnagram function
    « Reply #68 on: October 24, 2024, 11:14:42 pm »
    @fibonacci:
    Your code crashes in the line:
    Code: Pascal  [Select][+][-]
    1. else if (Ch1 >= 33) and (Ch1 <> 96) then if Ch1 <= 122 then Inc(F1[Ch1 or $20]) else Inc(F1[Ch1]);
    where
    Code: [Select]
    S1 = " !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⌂"
    S2 = "⌂~}|{zyxw vutsrqpon mlkjihgfe dcba`_^]\ [ZYXWVUTS RQPONMLKJ IHGFEDCBA @?>=<;:98 76543210/ .-,+*)('& %$#"! "
    IgnoreSpaces=TRUE

    Bart

    Bart

    • Hero Member
    • *****
    • Posts: 5706
      • Bart en Mariska's Webstek
    Re: Contest: fastest IsAnagram function
    « Reply #69 on: October 24, 2024, 11:21:37 pm »
    Excerpt from my test program:
    Code: Pascal  [Select][+][-]
    1. function TestValidity(var AFuncRec: TFuncRec): Boolean;
    2. var
    3.   i: Integer;
    4.   S1, S2: String;
    5.   Ch: Char;
    6. begin
    7.   try
    8.     Result := False;
    9.     AFuncRec.IsValid := False;
    10.     writeln;
    11.     writeln('TestValidity for ',AfuncRec.Name);
    12.     for i := Low(ValidAnagrams) to High(ValidAnagrams) do
    13.     begin
    14.       if not AFuncRec.Func(ValidAnagrams[i].S1, ValidAnagrams[i].S2, True, False) then
    15.       begin
    16.         writeln('FAIL: valid anagram rejected (with IgnoreSpaces=TRUE):');
    17.         writeln('S1: "',ValidAnagrams[i].S1,'"');
    18.         writeln('S2: "',ValidAnagrams[i].S2,'"');
    19.         //writeln;
    20.         Exit;
    21.       end;
    22.     end;
    23.  
    24.     for i := Low(ValidAnagrams) to High(ValidAnagrams) do
    25.     begin
    26.       S1 := ValidAnagrams[i].S1;
    27.       S2 := ValidAnagrams[i].S2;
    28.       S1 := StringReplace(S1, #32, '', [rfReplaceAll]);
    29.       S2 := StringReplace(S2, #32, '', [rfReplaceAll]);
    30.       if not AFuncRec.Func(S1, S2, False, False) then
    31.       begin
    32.         writeln('FAIL: valid anagram rejected (with IgnoreSpaces=FALSE):');
    33.         writeln('S1: "',S1,'"');
    34.         writeln('S2: "',S2,'"');
    35.         //writeln;
    36.         Exit;
    37.       end;
    38.     end;
    39.  
    40.     for i := Low(InValidAnagrams) to High(InValidAnagrams) do
    41.     begin
    42.       if AFuncRec.Func(InValidAnagrams[i].S1, InValidAnagrams[i].S2, True, False) then
    43.       begin
    44.         writeln('FAIL: erroneously returned as anagram, but they are not (with IgnoreSpaces=True):');
    45.         writeln('S1: "',InValidAnagrams[i].S1,'"');
    46.         writeln('S2: "',InValidAnagrams[i].S2,'"');
    47.         //writeln;
    48.         Exit;
    49.       end;
    50.     end;
    51.  
    52.     for Ch := #0 to #31 do
    53.     begin
    54.       S1 := Ch;
    55.       S2 := Ch;
    56.       if AFuncRec.Func(S1, S2, True, False) then
    57.       begin
    58.         writeln('FAIL to detect invalid character #',Ord(Ch));
    59.         //writeln;
    60.         Exit;
    61.       end;
    62.     end;
    63.     for Ch := #128 to #255 do
    64.     begin
    65.       S1 := Ch;
    66.       S2 := Ch;
    67.       if AFuncRec.Func(S1, S2, True, False) then
    68.       begin
    69.         writeln('FAIL to detect invalid character #',Ord(Ch));
    70.         //writeln;
    71.         Exit;
    72.       end;
    73.     end;
    74.  
    75.     AFuncRec.IsValid := True;
    76.     Result := True;
    77.   except
    78.     on E: Exception do
    79.     begin
    80.       writeln('FAIL: ',E.ToString);
    81.       Result := FALSE;
    82.       //raise
    83.     end;
    84.   end
    85. end;

    Anagrams used for testing:
    Code: [Select]
    Valid Anagrams for testing
    Nr: 0
    S1=1234567890
    S2=0 1 2 3 4 5 6 7 8 9

    Nr: 1
    S1= !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⌂
    S2=⌂~}|{zyxw vutsrqpon mlkjihgfe dcba`_^]\ [ZYXWVUTS RQPONMLKJ IHGFEDCBA @?>=<;:98 76543210/ .-,+*)('& %$#"!


    Invalid Anagrams for testing
    Nr: 0
    S1=1234567890
    S2=0 1 2 3 4 5 6 7 8 9!

    Nr: 1
    S1= !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⌂
    S2=⌂~}|{zyxw vutsrqpon mlkjihgfe dcba`_^]\ [ZYXWVUTS RQPONMLKJ IHGFEDCBA @?>=<;:98 76543210/ .-,+*)('& %$#"!          !

    Bart

    silvercoder70

    • Full Member
    • ***
    • Posts: 200
      • Tim Coates
    Re: Contest: fastest IsAnagram function
    « Reply #70 on: October 24, 2024, 11:29:42 pm »
    May I ask what settings are used for compiling?

    Wondering if I should bother changing code.

    Second thought is... this might be a good idea as a periodic exercise. I used to be on a stock trading forum where there was a weekly context - no prize, except the feeling of being the winner or participant.
    🔥 Pascal Isn’t Dead -> See What It Can Do: @silvercoder70 on YouTube

    Warfley

    • Hero Member
    • *****
    • Posts: 2040
    Re: Contest: fastest IsAnagram function
    « Reply #71 on: October 24, 2024, 11:39:18 pm »
    OK, if input is invalid then depending on the value of ExceptionOnError, the function should either return False or raise an exception (see my sample code in the first post).

    IIRC then I also specified this in post #1.

    Bart
    Fixed it, but just quickly on my phone, hope it works. Will tomorrow look into this again

    Bart

    • Hero Member
    • *****
    • Posts: 5706
      • Bart en Mariska's Webstek
    Re: Contest: fastest IsAnagram function
    « Reply #72 on: October 24, 2024, 11:55:12 pm »
    May I ask what settings are used for compiling?
    I posted the commandline params for fpc.

    Bart

    silvercoder70

    • Full Member
    • ***
    • Posts: 200
      • Tim Coates
    Re: Contest: fastest IsAnagram function
    « Reply #73 on: October 25, 2024, 12:17:19 am »
    May I ask what settings are used for compiling?
    I posted the commandline params for fpc.

    Bart

    Ahh. Just found it. Thanks
    🔥 Pascal Isn’t Dead -> See What It Can Do: @silvercoder70 on YouTube

    avk

    • Hero Member
    • *****
    • Posts: 826
    Re: Contest: fastest IsAnagram function
    « Reply #74 on: October 25, 2024, 01:04:00 am »
    ...
    AVK            : Failed validity test
    ...

    Oops,  :-[, seems to be fixed:
    Code: Pascal  [Select][+][-]
    1. function IsAnagram_avk2(const s1, s2: string; aIgnoreSpaces: Boolean = True; aExceptionOnError: Boolean = False): Boolean;
    2. var
    3.   Counter: array[#32..#127] of Integer;
    4.   I, Len: Integer;
    5.   c: AnsiChar;
    6. begin
    7.   FillChar(Counter, SizeOf(Counter), 0);
    8.   I := 1;
    9.   Len := Length(s1)-3;
    10.   while I < Len do begin
    11.     if(DWord(Integer(s1[I])-Integer(32))>DWord(95))or(DWord(Integer(s1[I+1])-Integer(32))>DWord(95)) or
    12.       (DWord(Integer(s1[I+2])-Integer(32))>DWord(95))or(DWord(Integer(s1[I+3])-Integer(32))>DWord(95)) then
    13.       if aExceptionOnError then
    14.         raise ERangeError.Create('Illegal character in s1')
    15.       else
    16.         exit(False);
    17.     Inc(Counter[s1[I  ]]);
    18.     Inc(Counter[s1[I+1]]);
    19.     Inc(Counter[s1[I+2]]);
    20.     Inc(Counter[s1[I+3]]);
    21.     Inc(I, 4);
    22.   end;
    23.  
    24.   for I := I to Length(s1) do begin
    25.     if DWord(Integer(s1[I])-Integer(32)) > DWord(95) then
    26.       if aExceptionOnError then
    27.         raise ERangeError.Create('Illegal character in s1')
    28.       else
    29.         exit(False);
    30.     Inc(Counter[s1[I]]);
    31.   end;
    32.  
    33.   I := 1;
    34.   Len := Length(s2)-3;
    35.   while I < Len do begin
    36.     if (DWord(Integer(s2[I])-Integer(32))>DWord(95))or(DWord(Integer(s2[I+1])-Integer(32))>DWord(95)) or
    37.        (DWord(Integer(s2[I+2])-Integer(32))>DWord(95))or(DWord(Integer(s2[I+3])-Integer(32))>DWord(95)) then
    38.       if aExceptionOnError then
    39.         raise ERangeError.Create('Illegal character in s2')
    40.       else
    41.         exit(False);
    42.     Dec(Counter[s2[I  ]]);
    43.     Dec(Counter[s2[I+1]]);
    44.     Dec(Counter[s2[I+2]]);
    45.     Dec(Counter[s2[I+3]]);
    46.     Inc(I, 4);
    47.   end;
    48.  
    49.   for I := I to Length(s2) do begin
    50.     if DWord(Integer(s2[I])-Integer(32)) > DWord(95) then
    51.       if aExceptionOnError then
    52.         raise ERangeError.Create('Illegal character in s2')
    53.       else
    54.         exit(False);
    55.     Dec(Counter[s2[I]]);
    56.   end;
    57.  
    58.   for c := Chr(32 + Ord(aIgnoreSpaces)) to #127 do
    59.     if Counter[c] <> 0 then exit(False);
    60.   Result := True;
    61. end;
    62.  

     

    TinyPortal © 2005-2018