Lazarus

Free Pascal => General => Topic started by: BobDog on September 25, 2020, 01:22:33 am

Title: Quicksort a string array (Case insensitive or standard)
Post by: BobDog on September 25, 2020, 01:22:33 am
This sorts a million elements up or down and is set for case insensitive.
Not necessarily Lazarus, I don't have it anyway.
Tested 3.0.4 64 bits.
Code: Pascal  [Select][+][-]
  1.  
  2.  
  3.     program Quicksortstrings;
  4.   //{$rangeChecks on}   // OK tested already
  5.  
  6.  {$macro on}
  7.  
  8.  {$define caseinsensitive}      // comment out for case sensitive sort
  9. Uses
  10. SysUtils,DateUtils,strutils ; {for timer and midstr only, not needed for quicksort}
  11.  
  12.  { =============  QUICKSORT =========== }
  13.   type
  14.  direction =(up,down);
  15.   var
  16.   u : Array[0..255] of byte;  {look up table for speed}
  17.  
  18.  
  19. Procedure QuickSort(Var arr:array of ansistring;Start:longint;Finish:longint;d:direction);
  20.   Function lessthan(const a:ansistring;const b:ansiString):integer;
  21.     Var
  22.     n,lim,lena,lenb:longint;
  23.       begin
  24.       lena:=length(a);
  25.       lenb:=length(b);
  26.       if (lena<lenb) then lim:=lena  else lim:=lenb ;
  27.     For n :=1 To lim do
  28.     begin
  29.         If u[ord(a[n])] < u[ord(b[n])] Then exit (-1);
  30.         If u[ord(a[n])] > u[ord(b[n])] Then exit (0);
  31.         end ;
  32.     exit( 0 ) ;
  33. End;
  34.  
  35.  Function morethan(const a:ansistring;const b:ansiString):integer;
  36.     Var
  37.     n,lim,lena,lenb:longint;
  38.       begin
  39.       lena:=length(a);
  40.       lenb:=length(b);
  41.       if (lena<lenb) then lim:=lena else lim:=lenb ;
  42.     For n :=1 To lim do
  43.     begin
  44.         If u[ord(a[n])] > u[ord(b[n])] Then exit (-1);
  45.         If u[ord(a[n])] < u[ord(b[n])] Then exit (0);
  46.         end ;
  47.     exit( 0 ) ;
  48. End;
  49. Var
  50.   i,j: longint;
  51.   x,temp: ansistring;
  52. Begin
  53.   i := Start ;
  54.   j := finish;
  55.   x := arr[(Start+finish) Div 2];
  56.   While I <= J Do
  57.     Begin
  58.     if d = up then
  59.     begin
  60.     While lessthan(arr[I], X)=-1 do
  61.       I+=1;
  62.     While morethan(arr[J], X)=-1 do
  63.        J-=1;
  64.        end;
  65.  
  66.        if d = down then
  67.     begin
  68.     While morethan(arr[I], X)=-1 do
  69.       I+=1;
  70.     While lessthan(arr[J], X)=-1 do
  71.        J-=1;
  72.        end;
  73.  
  74.       If I<=J Then
  75.         Begin
  76.           temp := arr[j];
  77.           arr[j] := arr[i];
  78.           arr[i] := temp;
  79.           I+= 1;
  80.           J-= 1;
  81.         End;
  82.     End;
  83.   If J > Start Then QuickSort(arr,Start,J,d);
  84.   If I < Finish Then QuickSort(arr,I,Finish,d);
  85. End;
  86.  
  87. procedure setlookuparray(var u:array of byte); // must run
  88. var x,r:integer;
  89. begin
  90.  for x:=0 to 255 do
  91.     begin
  92.    r:=x;
  93.    {$ifdef caseinsensitive}
  94.    if (r<91) and (r>64) then  r:=r+32;
  95.    {$endif}
  96.    u[x]:=r;
  97.    end;
  98. end;
  99.  { ===============  END QUICKSORT=======}
  100.  
  101.   { try it out}
  102.   function range(f:longint;l:longint):longint ;
  103. begin
  104.     range:=  random(1000000) mod (l-f+1) + f ;
  105. end;
  106.  
  107. function mixcases:byte;
  108. var i:integer;
  109. begin
  110.  i:=range(97,122);
  111.  if random(10)<5 then i:=i-32;
  112.  exit(i)
  113. end;
  114.  
  115. var
  116.    a:array of ansistring;
  117.    l,i,maxlen:longint;
  118.    StartTime: TTime;
  119.    Elapsed: Int64;
  120.  
  121.   begin
  122.   setlookuparray(u); // must do
  123.    maxlen:=1000000;
  124.  
  125.    writeln('Creating a mixed case string array of ',maxlen,' elements');
  126.    setlength(a,maxlen);
  127. {$define  w:= char(mixcases)}
  128.    for l:=0 to maxlen-1 do
  129.    begin
  130.    for i:=1 to 50 do a[l]+=w;
  131.    a[l]:=midstr(a[l],1,range(10,49)); // make different lengths
  132.    end;
  133.  
  134.      writeln('strings created, now sorting');
  135.      writeln;
  136.       StartTime := Time;
  137.    Quicksort(a,0,maxlen-1,up);
  138.      Elapsed := MillisecondsBetween(Time, StartTime);
  139.  
  140.    for l:=0 to 20  do writeln(a[l]);
  141.    writeln('. . .');
  142.    writeln('. . .');
  143.    for l:=length(a)-20 to length(a)-1  do writeln(a[l]);
  144.  
  145.     writeln;
  146.    writeln('Time taken ',Elapsed,'  milliseconds');
  147.    readln;
  148.  
  149.  
  150.   end.
  151.  
  152.  
Or using the built in MSVCRT sort in windows
I have not tested libc.so Linux, (don't have Linux)
Code: Pascal  [Select][+][-]
  1.  
  2.  program MSVCRTsort;
  3.   //{$rangeChecks on}   // OK tested already
  4.  
  5.  {$macro on}
  6.  
  7.  Uses
  8. SysUtils,DateUtils,strutils ; {for timer and midstr only, not needed for quicksort}
  9.  
  10.   {=====  start sort ====}
  11. // for Linux  external libc.so (I think)
  12.  function  qsort(T:pointer;size:longint;szdata:integer;p:pointer):integer ; cdecl external 'msvcrt.dll' name 'qsort';
  13.  
  14.    type
  15.  direction =(up,down);
  16.  casetype=(CaseSensitive,CaseInsensitive);
  17.  asp=^ansistring;
  18.  
  19.   var
  20.   u : Array[0..255] of byte;  {look up table for speed}
  21.   ct:casetype=CaseInsensitive;
  22.  
  23.  Function lessthan(const a:ansistring;const b:ansiString):integer;
  24.     Var
  25.     n,lim,lena,lenb:longint;
  26.       begin
  27.       lena:=length(a);
  28.       lenb:=length(b);
  29.       if (lena<lenb) then lim:=lena  else lim:=lenb ;
  30.     For n :=1 To lim do
  31.     begin
  32.         If u[ord(a[n])] < u[ord(b[n])] Then exit (-1);
  33.         If u[ord(a[n])] > u[ord(b[n])] Then exit (0);
  34.         end ;
  35.     exit( 0 ) ;
  36. End;
  37.  
  38.  Function morethan(const a:ansistring;const b:ansiString):integer;
  39.     Var
  40.     n,lim,lena,lenb:longint;
  41.       begin
  42.       lena:=length(a);
  43.       lenb:=length(b);
  44.       if (lena<lenb) then lim:=lena else lim:=lenb ;
  45.     For n :=1 To lim do
  46.     begin
  47.         If u[ord(a[n])] > u[ord(b[n])] Then exit (-1);
  48.         If u[ord(a[n])] < u[ord(b[n])] Then exit (0);
  49.         end ;
  50.     exit( 0 ) ;
  51. End;
  52.  
  53.  function callbackU( const n1:asp; const n2:asp):integer;
  54.  begin
  55.  
  56.  if (ct=CaseInsensitive) then
  57.  begin
  58.    if lessthan(n1^,n2^)=-1 then exit(-1);
  59.    if morethan(n1^,n2^)=-1 then exit(1) ;
  60.    end
  61.  
  62.    else
  63.    begin
  64.    if n1^<n2^ then exit(-1);
  65.     if n1^>n2^ then exit(1);
  66.     end ;
  67.  
  68.    exit(0);
  69.  end;
  70.  
  71.     function callbackD(const n1:asp;const n2:asp):integer;
  72.  begin
  73.  
  74.  if (ct=CaseInsensitive) then
  75.  begin
  76.    if lessthan(n1^,n2^)=-1 then exit(1);
  77.    if morethan(n1^,n2^)=-1 then exit(-1);
  78.    end
  79.    else
  80.  
  81.    begin
  82.     if n1^<n2^ then exit(1);
  83.     if n1^>n2^ then exit(-1);
  84.     end;
  85.  
  86.    exit(0);
  87.  end;
  88.  
  89.  procedure csort(var a:array of ansistring;lower:longint;upper:longint;d:direction;ctype :casetype);
  90.  
  91.   procedure setlookuparray(var u:array of byte); // must run
  92. var x,r:integer;
  93. begin
  94.  for x:=0 to 255 do
  95.     begin
  96.    r:=x;
  97.    if (r<91) and (r>64) then  r:=r+32;
  98.    u[x]:=r;
  99.    end;
  100. end;
  101.  
  102. const
  103.  done:integer=0;
  104.  begin
  105.   if done=0 then
  106.   begin
  107.    setlookuparray(u);done:=1;
  108.    end;
  109.    ct:=ctype;
  110.   If d=up   Then qsort( @a[Lower],((Upper)-(Lower)+1),sizeof(ansistring),@callbackU);
  111.   If d=down Then qsort( @a[Lower],((Upper)-(Lower)+1),sizeof(ansistring),@callbackD);
  112.  end;
  113.  {=====  end sort ====}
  114.  
  115.  
  116.   { try it out}
  117.   function range(f:longint;l:longint):longint ;
  118. begin
  119.     range:=  random(1000000) mod (l-f+1) + f ;
  120. end;
  121.  
  122. function mixcases:byte;
  123. var i:integer;
  124. begin
  125.  i:=range(97,122);
  126.  if random(10)<5 then i:=i-32;
  127.  exit(i)
  128. end;
  129.  
  130. var
  131.    a:array of ansistring;
  132.    l,i,maxlen:longint;
  133.    StartTime: TTime;
  134.    Elapsed: Int64;
  135.  
  136.   begin
  137.  
  138.    maxlen:=1000000;
  139.  
  140.    writeln('Creating a mixed case string array of ',maxlen,' elements');
  141.    setlength(a,maxlen);
  142.    {$define  w:= char(mixcases)}
  143.    for l:=0 to maxlen-1 do
  144.    begin
  145.    for i:=1 to 50 do a[l]+=w;
  146.    a[l]:=midstr(a[l],1,range(10,49)); // make different lengths
  147.    end;
  148.  
  149.      writeln('strings created, now sorting  ');
  150.      writeln;
  151.  
  152.       StartTime := Time;
  153.  
  154.    csort(a,0,length(a),down,CaseInsensitive);  //<< ------- method
  155.  
  156.      Elapsed := MillisecondsBetween(Time, StartTime);
  157.  
  158.    for l:=0 to 20  do writeln(a[l]);
  159.    writeln('. . .');
  160.    writeln('. . .');
  161.    for l:=length(a)-20 to length(a)-1  do writeln(a[l]);
  162.  
  163.     writeln;
  164.    writeln('Time taken ',Elapsed,'  milliseconds, ',ct);
  165.  
  166.    readln;
  167.  
  168.   end.
  169.  
  170.  
  171.  

Auld Lang Syne
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: winni on September 25, 2020, 01:53:45 am
Hi!

If you are interested in another Quicksort implementation then
have a look in the unit Grids, used by the TStringGrid.

There is the procedureTCustomGrid.Sort which contains a procedure Quicksort
for UTF8 strings.

Keep on hackin

Winni


 

Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: BobDog on September 25, 2020, 02:12:54 pm

Thanks for testing winni
I cannot find the unit grid in the fp 3.0.4.
I only have the freepascal distribution, I have no Lazarus.
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: rvk on September 25, 2020, 02:34:11 pm
I cannot find the unit grid in the fp 3.0.4.
I only have the freepascal distribution, I have no Lazarus.
You could always check out the source of lazarus just to browse through it (you could even do that online).

Here is the implementation in trunk.
Code: Pascal  [Select][+][-]
  1. procedure TCustomGrid.Sort(ColSorting: Boolean; index, IndxFrom, IndxTo: Integer);
  2.   procedure QuickSort(L,R: Integer);
  3.   var
  4.     I,J: Integer;
  5.     P{,Q}: Integer;
  6.   begin
  7.     repeat
  8.       I:=L;
  9.       J:=R;
  10.       P:=(L+R) div 2;
  11.       repeat
  12.         if ColSorting then begin
  13.           while DoCompareCells(index, P, index, I)>0 do I:=I+1;
  14.           while DoCompareCells(index, P, index, J)<0 do J:=J-1;
  15.         end else begin
  16.           while DoCompareCells(P, index, I, index)>0 do I:=I+1;
  17.           while DoCompareCells(P, index, J, index)<0 do J:=J-1;
  18.         end;
  19.         if I<=J then begin
  20.  
  21.           if I<>J then
  22.             if not FStrictSort or
  23.               (ColSorting     and (DoCompareCells(index, I, index, J)<>0)) or
  24.               (not ColSorting and (DoCompareCells(I, index, J, index)<>0))
  25.             then
  26.               DoOPExchangeColRow(not ColSorting, I,J);
  27.  
  28.           if P=I then
  29.             P:=J
  30.           else if P=J then
  31.             P:=I;
  32.  
  33.           I:=I+1;
  34.           J:=J-1;
  35.         end;
  36.       until I>J;
  37.  
  38.       if L<J then
  39.         QuickSort(L,J);
  40.  
  41.       L:=I;
  42.     until I>=R;
  43.   end;
  44. begin
  45.   if RowCount>FixedRows then begin
  46.     CheckIndex(ColSorting, Index);
  47.     CheckIndex(not ColSorting, IndxFrom);
  48.     CheckIndex(not ColSorting, IndxTo);
  49.     BeginUpdate;
  50.     QuickSort(IndxFrom, IndxTo);
  51.     EndUpdate;
  52.   end;
  53. end;
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: BobDog on September 26, 2020, 12:22:13 pm

I have added another built-in sort, the C sort, msvcrt.dll /( libc.so --linux which I cannot test).
msvcrt.dll is a well tested, well oiled and well optimised windows dll, I think it has been around forever.
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: avk on September 26, 2020, 03:51:46 pm
It makes sense to declare the comparator parameters as const, then things will go faster.
You might also be interested to take a look, here (https://github.com/avk959/LGenerics/blob/master/lgenerics/LGArrayHelpers.pas) is a pretty fast library that implements some sorting algorithms (although it uses generics and cannot be compiled using FPC-3.0.4).
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: BobDog on September 26, 2020, 04:32:37 pm

Thanks avk.
I didn't know that trick with const parameters, it makes a difference.
I have adjusted the code.
Thanks for the link.

Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: 440bx on September 26, 2020, 07:12:12 pm
msvcrt.dll is a well tested, well oiled and well optimised windows dll, I think it has been around forever.
Just FYI, instead of depending on msvcrt.dll, you can use the qsort found in NTDLL.  It's the one I use for sorting and, I can say from experience, it is fast.
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: avk on September 26, 2020, 09:31:48 pm
I remember there was already some competition between LGenerics and NtDll some time ago.
I suppose NtDll will lose in the task of sorting an array of Pascal strings.
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: 440bx on September 27, 2020, 04:19:03 pm
I suppose NtDll will lose in the task of sorting an array of Pascal strings.
Even if it does it won't be by much.  LGenerics has a definite advantage in 32bit when sorting elements that fit in a register, in 64 bit, it's likely NtDll's sort will either match LGenerics or pull ahead by a small margin.

The advantage of using NtDll is that it makes the executable smaller and use less memory since NtDll (unlike msvcrt.dll) is always loaded.
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: avk on September 27, 2020, 04:49:05 pm
I believe the 64-bit version of LGenerics is about 1.5 times faster in sorting a string array than QSort. And its advantage is that it is written in Pascal and therefore available on almost any platform.
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: 440bx on September 27, 2020, 04:53:26 pm
I believe the 64-bit version of LGenerics is about 1.5 times faster in sorting a string array than QSort.
I don't recall the sample program in the "sorting and counting" being 1.5 times faster than the Windows API implementation.

As far as portability, that's a valid point but, using msvcrt.dll isn't portable either.  IOW, leaving portability out of the picture, the OP is better off using NTDLL's qsort than msvcrt's.
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: jamie on September 27, 2020, 05:12:51 pm
I've found an index list that points to a record containing reference of where in the list it is currently and where it should be after the sort.

 so at the end all you have is a created list that is fixed and if you happen to know the number entries ahead of time  you can pre-allocated it. But basically you use the list afterwards to index the entries which gives the pointer address to the string/ word..

 One could also regenerate a new list this way, it would be linear outcome..
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: avk on September 27, 2020, 05:38:54 pm
I don't recall the sample program in the "sorting and counting" being 1.5 times faster than the Windows API implementation.

It seems, besides sorting, there was also file mapping against simple ReadLn/WriteLn. However, if you are interested, I could make a demo.

I've found an index list that points to a record containing reference of where in the list it is currently and where it should be after the sort.

 so at the end all you have is a created list that is fixed and if you happen to know the number entries ahead of time  you can pre-allocated it. But basically you use the list afterwards to index the entries which gives the pointer address to the string/ word..

 One could also regenerate a new list this way, it would be linear outcome..


@jamie, it would be interesting to see what this gives.
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: 440bx on September 27, 2020, 06:08:15 pm
It seems, besides sorting, there was also file mapping against simple ReadLn/WriteLn.
True.  There were a number of optimizations that affected the total elapsed time.  It wasn't just the sort implementation.

Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: avk on September 27, 2020, 07:30:26 pm
There is another funny question. How many Pascal libraries do you know that are faster than the corresponding C library?
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: Thaddy on September 27, 2020, 07:45:00 pm
By algorithm ? Almost all string handling.... C cant't handle strings very well and C++ has no built in support for real strings that contain zero's, which makes a quick escape but not a real deal....Specifically string reads are much faster in Pascal, because length is stored...
Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: avk on September 27, 2020, 08:07:13 pm
By algorithm ? ...

No, in terms of implementation efficiency.

Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: 440bx on September 27, 2020, 09:57:06 pm
There is another funny question. How many Pascal libraries do you know that are faster than the corresponding C library?
What are the applicable constraints to that question ?... given the _same_ algorithm, it's more likely that the C implementation will be faster simply because C compilers tend to optimize better than Pascal compilers. That's only because there is more effort put into C than in Pascal.  A Pascal compiler can be as good at optimizing as a C compiler but, code optimization does not seem to be the focus of Pascal compiler writers.

Title: Re: Quicksort a string array (Case insensitive or standard)
Post by: Thaddy on September 28, 2020, 07:45:25 am
In the case of FPC vs C it is also a matter of platform: x86_64 and i386 have more optimizations than other platforms.
Another note is that C often has more aggresive default settimgs, whereas FPC is conservative by default.
So you must also try and test with the same platform and CPU/FPU instruction sets. This can make a big difference here, e.g like copy operations using SSEx instead of on the CPU. The former is what C often selects, whereas FPC selects the CPU by default but CAN use SSEx)
TinyPortal © 2005-2018