Lazarus

Programming => General => Topic started by: mse on January 11, 2018, 12:35:45 pm

Title: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: mse on January 11, 2018, 12:35:45 pm
This is a continuation of
http://forum.lazarus-ide.org/index.php/topic,39617.msg272572.html#msg272572
Some more numbers on 32 bit Linux:
Code: Pascal  [Select][+][-]
  1. {$ifndef fpc}
  2.  {$mode pascal}
  3. {$endif}
  4. {$ifdef fpc}
  5.  {$codepage utf8}
  6. {$endif}
  7. program test1;
  8.  
  9. uses
  10. {$ifndef fpc}
  11.  rtl_fpccompatibility;
  12. {$else}
  13.  cwstring,sysutils;
  14. {$endif}
  15.  
  16. type
  17. {$ifdef fpc}
  18.  card8 = byte;
  19.  card16 = word;
  20.  char8 = ansichar;
  21.  char16 = unicodechar;
  22.  string8 = utf8string;
  23.  string16 = unicodestring;
  24. {$endif}
  25.  pcard8 = ^card8;
  26.  pcard16 = ^card16;
  27.  card16aty = array[0..0] of card16;
  28.  pcard16aty = ^card16aty;
  29.  card8aty = array[0..0] of card8;
  30.  pcard8aty = ^card8aty;
  31.  
  32.  testfuncty = function(const inp: string8): string16;
  33.  
  34. const
  35.  strlen = 10000;
  36.  loopcount = 20000;
  37.  
  38. function conv1(const inp: string8): string16;
  39. begin
  40.  result:= inp;
  41. end;
  42.  
  43. function conv2(const inp: string8): string16;
  44. var
  45.  i1: int32;
  46. begin
  47.  setlength(result,length(inp));
  48.  for i1:= 1 to length(result) do begin
  49.   card16(result[i1]):= card8(inp[i1]);
  50.  end;
  51. end;
  52.  
  53. function conv3(const inp: string8): string16;
  54. var
  55.  i1: int32;
  56. begin
  57.  setlength(result,length(inp));
  58.  for i1:= 0 to length(result)-1 do begin
  59.   pcard16aty(pointer(result))^[i1]:= pcard8aty(pointer(inp))^[i1];
  60.               //no uniqestring check
  61.  end;
  62. end;
  63.  
  64. function conv4(const inp: string8): string16;
  65. var
  66.  ps,pe: pcard8;
  67.  pd: pcard16;
  68. begin
  69.  setlength(result,length(inp));
  70.  ps:= pointer(inp);
  71.  pe:= ps + length(result);
  72.  pd:= pointer(result);
  73.  while ps < pe do begin
  74.   pd^:= ps^;
  75.   inc(ps);
  76.   inc(pd);
  77.  end;
  78. end;
  79.  
  80. var
  81.  s1: string8;
  82.  s2: string16;
  83.  
  84. procedure test(const alabel: string8; const afunc: testfuncty);
  85. var
  86.  i1: int32;
  87.  t1,t2: tdatetime;
  88. begin
  89.  s2:= '';
  90.  t1:= now();
  91.  for i1:= 0 to loopcount-1 do begin
  92.   s2:= afunc(s1);
  93.  end;
  94.  t2:= now();
  95.  if s1 <> s2 then begin
  96.   writeln(alabel,':****error****');
  97.  end
  98.  else begin
  99.  {$ifdef fpc}
  100.   writeln(alabel,': ',(t2-t1)*24*60*60:0:6,'s');
  101.  {$else}
  102.   writeln(alabel,': ',(t2-t1)*24*60*60,'s');
  103.  {$endif}
  104.  end;
  105. end;
  106.  
  107. var
  108.  i1: int32;
  109.  
  110. begin
  111.  setlength(s1,strlen);
  112.  for i1:= 1 to length(s1) do begin
  113.   s1[i1]:= char8(card8((i1+32) and $7f));
  114.  end;
  115.  {$ifdef fpc}
  116.   writeln('Free Pascal:');
  117.  {$else}
  118.   writeln('MSElang:');
  119.  {$endif}
  120.  test('conv1',@conv1);
  121.  test('conv2',@conv2);
  122.  test('conv3',@conv3);
  123.  test('conv4',@conv4);
  124. end.
  125.  
Free Pascal version 3.0.4, MSElang version 0.0
Compiled with -O-

Free Pascal:
conv1: 0.484000s
conv2: 0.628000s
conv3: 0.364000s
conv4: 0.459000s

MSElang:
conv1: 0.552745s
conv2: 0.553846s
conv3: 0.331735s
conv4: 0.533578s

Compiled with -O3:

Free Pascal:
conv1: 0.480000s
conv2: 0.491000s
conv3: 0.115000s
conv4: 0.107000s

MSElang:
conv1: 0.144055s
conv2: 0.163643s
conv3: 0.0907351s
conv4: 0.0754151s
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: fcu on January 11, 2018, 02:26:54 pm
great !!!  , i think you have to launch a patreon for this project ;)
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: engkin on January 12, 2018, 02:54:18 am
Looks interesting, but I think typically the test should be on pointer version of these procedures. Something like:
Code: Pascal  [Select][+][-]
  1. procedure conv3(var src: pointer; Len: integer; var dst: pointer);
  2. var
  3.   inp: pcard8aty absolute src;
  4.   oup: pcard16aty absolute dst;
  5.  i1: int32;
  6. begin
  7.  for i1:= 0 to Len-1 do begin
  8.   oup^[i1]:= inp^[i1];
  9.  end;
  10. end;
  11.  
  12. procedure conv4(var src: pointer;Len: integer; var dst: pointer);
  13. var
  14.  ps: pcard8 absolute src;
  15.  pe: pcard8;
  16.  pd: pcard16 absolute dst;
  17. begin
  18.  pe:= ps + Len;
  19.  while ps < pe do begin
  20.   pd^:= ps^;
  21.   inc(ps);
  22.   inc(pd);
  23.  end;
  24. end;
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: mse on January 12, 2018, 06:43:25 am
I don't understand, please explain.
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: AlexTP on January 12, 2018, 11:23:23 am
In my GetTickCount test (default Lazarus proj opt) i have same time of conv3/conv4.
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: engkin on January 13, 2018, 04:18:26 am
I don't understand, please explain.

I meant having dual versions like:
Code: Pascal  [Select][+][-]
  1. function UTF8Pos(const SearchForText, SearchInText: string; StartPos: SizeInt = 1): PtrInt;
  2. function UTF8PosP(SearchForText: PChar; SearchForTextLen: SizeInt;
  3.   SearchInText: PChar; SearchInTextLen: SizeInt): PChar;

or
Code: Pascal  [Select][+][-]
  1. function UTF8Length(const s: string): PtrInt; inline;
  2. function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt;
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: engkin on January 13, 2018, 04:22:36 am
In my GetTickCount test (default Lazarus proj opt) i have same time of conv3/conv4.
"(default Lazarus proj opt)" might not be optimized.

Did you reach the speed you were aiming for?
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: engkin on January 14, 2018, 04:32:36 am
I decided the try some assembly code:
Code: Pascal  [Select][+][-]
  1. {$AsmMode intel}
  2. procedure conv5(src: pointer; Len: integer; dst: pointer);assembler;nostackframe;
  3. label lblLoop;
  4. asm
  5.   push edi { Temp }
  6.  
  7. align 4
  8. lblLoop:
  9.   movzx di, byte ptr [src+Len]
  10.   mov word ptr [dst+Len*2], di
  11.  
  12.   dec Len { Copy in reverse order }
  13.   jns lblLoop
  14.  
  15.   pop edi { Restore Temp }
  16. end;

Most likely this should be close enough to the limit.
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: mse on January 14, 2018, 09:23:57 am
Code: Pascal  [Select][+][-]
  1. {$ifdef fpc}
  2. {$AsmMode intel}
  3. procedure conv5a(src: pointer; Len: integer; dst: pointer);assembler;nostackframe;
  4. label lblLoop;
  5. asm
  6.   push edi { Temp }
  7.  
  8. align 4
  9. lblLoop:
  10.   movzx di, byte ptr [src+Len]
  11.   mov word ptr [dst+Len*2], di
  12.  
  13.   dec Len { Copy in reverse order }
  14.   jns lblLoop
  15.  
  16.   pop edi { Restore Temp }
  17. end;
  18.  
  19. function conv5(const inp: string8): string16;
  20. begin
  21.  setlength(result,length(inp));
  22.  conv5a(pointer(inp),length(inp),pointer(result));
  23. end;
  24. {$endif}
  25.  
Code: [Select]
Free Pascal:
conv1: 0.487000s
conv2: 0.513000s
conv3: 0.117000s
conv4: 0.106000s
conv5: 0.092000s

MSElang:
conv1: 0.157672s
conv2: 0.170412s
conv3: 0.0998492s
conv4: 0.0773759s

Can you try to write the assembler code directly into above conv5()?
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: engkin on January 15, 2018, 07:31:17 pm
Code: [Select]
Free Pascal:
conv1: 0.487000s
conv2: 0.513000s
conv3: 0.117000s
conv4: 0.106000s
conv5: 0.092000s

MSElang:
conv1: 0.157672s
conv2: 0.170412s
conv3: 0.0998492s
conv4: 0.0773759s
The figures are impressive. What assembly do you have for MSElang conv4?

Can you try to write the assembler code directly into above conv5()?
It defeats the purpose, as calling:
Code: Pascal  [Select][+][-]
  1.  s2 := conv5(s1);
causes FPC to use fpc_unicodestr_assign
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: mse on January 15, 2018, 08:06:59 pm
The figures are impressive. What assembly do you have for MSElang conv4?
Code: [Select]
.align 16, 0x90
.type conv4,@function
conv4:                                  # @conv4
.Lfunc_begin107:
.cfi_startproc
# BB#0:
pushl %ebx
.Ltmp430:
.cfi_def_cfa_offset 8
pushl %edi
.Ltmp431:
.cfi_def_cfa_offset 12
pushl %esi
.Ltmp432:
.cfi_def_cfa_offset 16
subl $16, %esp
.Ltmp433:
.cfi_def_cfa_offset 32
.Ltmp434:
.cfi_offset %esi, -16
.Ltmp435:
.cfi_offset %edi, -12
.Ltmp436:
.cfi_offset %ebx, -8
movl 36(%esp), %esi
movl 32(%esp), %ebx
xorl %edi, %edi
testl %esi, %esi
movl $0, %eax
je .LBB107_2
# BB#1:
movl -4(%esi), %eax
.LBB107_2:                              # %__mla__lengthstring.exit
movl %eax, 4(%esp)
movl %ebx, (%esp)
calll __mla__setlengthstring16
movl (%ebx), %eax
testl %eax, %eax
je .LBB107_4
# BB#3:
movl -4(%eax), %edi
.LBB107_4:                              # %__mla__lengthstring.exit14
leal (%esi,%edi), %ecx
cmpl %esi, %ecx
jbe .LBB107_6
.align 16, 0x90
.LBB107_5:                              # %.lr.ph
                                        # =>This Inner Loop Header: Depth=1
movzbl (%esi), %ecx
movw %cx, (%eax)
incl %esi
addl $2, %eax
decl %edi
jne .LBB107_5
.LBB107_6:                              # %._crit_edge
addl $16, %esp
popl %esi
popl %edi
popl %ebx
retl
.Lfunc_end107:
.size conv4, .Lfunc_end107-conv4
.cfi_endproc
Quote
Can you try to write the assembler code directly into above conv5()?
It defeats the purpose, as calling:
Code: Pascal  [Select][+][-]
  1.  s2 := conv5(s1);
causes FPC to use fpc_unicodestr_assign
It should not because AFAIK "result" for strings is a hidden "var" variabe.
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: engkin on January 16, 2018, 01:13:42 am
Quote
Can you try to write the assembler code directly into above conv5()?
It defeats the purpose, as calling:
Code: Pascal  [Select][+][-]
  1.  s2 := conv5(s1);
causes FPC to use fpc_unicodestr_assign
It should not because AFAIK "result" for strings is a hidden "var" variabe.
When using a global variable (s2):
Code: ASM  [Select][+][-]
  1. # Temp -48,4 allocated
  2. # [211] s2 := convIt(s1);  { s2 is a global variable }
  3.         leal    -48(%ebp),%edx
  4.         movl    U_$P$TEST1_$$_S1,%eax
  5.         call    P$TEST1_$$_CONVIT$UTF8STRING$$UNICODESTRING
  6.         movl    -48(%ebp),%edx
  7.         movl    $U_$P$TEST1_$$_S2,%eax
  8.         call    fpc_unicodestr_assign
  9.         # Temp -48,4 released

When using a local variable (ls):
Code: ASM  [Select][+][-]
  1. # [212] ls := convIt(s1);  { ls is a local variable }
  2.         leal    -4(%ebp),%edx
  3.         movl    U_$P$TEST1_$$_S1,%eax
  4.         call    P$TEST1_$$_CONVIT$UTF8STRING$$UNICODESTRING

What do you think?
Title: Re: Performance comparison of different versions of conversion ASCII -> utf-16
Post by: mse on January 16, 2018, 08:15:17 am
You are right, see attachment. I asked for moving your assembler code to conv5 because the comparison is more fair without an additional procedure call.

TinyPortal © 2005-2018