### Bookstore

 Computer Math and Games in Pascal (preview) Lazarus Handbook

### Author Topic: Quicksort a string array (Case insensitive or standard)  (Read 1445 times)

#### BobDog

• Jr. Member
• Posts: 68
##### Quicksort a string array (Case insensitive or standard)
« on: September 25, 2020, 01:22:33 am »
This sorts a million elements up or down and is set for case insensitive.
Not necessarily Lazarus, I don't have it anyway.
Tested 3.0.4 64 bits.
Code: Pascal  [Select][+][-]
1.
2.
3.     program Quicksortstrings;
4.   //{\$rangeChecks on}   // OK tested already
5.
6.  {\$macro on}
7.
8.  {\$define caseinsensitive}      // comment out for case sensitive sort
9. Uses
10. SysUtils,DateUtils,strutils ; {for timer and midstr only, not needed for quicksort}
11.
12.  { =============  QUICKSORT =========== }
13.   type
14.  direction =(up,down);
15.   var
16.   u : Array[0..255] of byte;  {look up table for speed}
17.
18.
19. Procedure QuickSort(Var arr:array of ansistring;Start:longint;Finish:longint;d:direction);
20.   Function lessthan(const a:ansistring;const b:ansiString):integer;
21.     Var
22.     n,lim,lena,lenb:longint;
23.       begin
24.       lena:=length(a);
25.       lenb:=length(b);
26.       if (lena<lenb) then lim:=lena  else lim:=lenb ;
27.     For n :=1 To lim do
28.     begin
29.         If u[ord(a[n])] < u[ord(b[n])] Then exit (-1);
30.         If u[ord(a[n])] > u[ord(b[n])] Then exit (0);
31.         end ;
32.     exit( 0 ) ;
33. End;
34.
35.  Function morethan(const a:ansistring;const b:ansiString):integer;
36.     Var
37.     n,lim,lena,lenb:longint;
38.       begin
39.       lena:=length(a);
40.       lenb:=length(b);
41.       if (lena<lenb) then lim:=lena else lim:=lenb ;
42.     For n :=1 To lim do
43.     begin
44.         If u[ord(a[n])] > u[ord(b[n])] Then exit (-1);
45.         If u[ord(a[n])] < u[ord(b[n])] Then exit (0);
46.         end ;
47.     exit( 0 ) ;
48. End;
49. Var
50.   i,j: longint;
51.   x,temp: ansistring;
52. Begin
53.   i := Start ;
54.   j := finish;
55.   x := arr[(Start+finish) Div 2];
56.   While I <= J Do
57.     Begin
58.     if d = up then
59.     begin
60.     While lessthan(arr[I], X)=-1 do
61.       I+=1;
62.     While morethan(arr[J], X)=-1 do
63.        J-=1;
64.        end;
65.
66.        if d = down then
67.     begin
68.     While morethan(arr[I], X)=-1 do
69.       I+=1;
70.     While lessthan(arr[J], X)=-1 do
71.        J-=1;
72.        end;
73.
74.       If I<=J Then
75.         Begin
76.           temp := arr[j];
77.           arr[j] := arr[i];
78.           arr[i] := temp;
79.           I+= 1;
80.           J-= 1;
81.         End;
82.     End;
83.   If J > Start Then QuickSort(arr,Start,J,d);
84.   If I < Finish Then QuickSort(arr,I,Finish,d);
85. End;
86.
87. procedure setlookuparray(var u:array of byte); // must run
88. var x,r:integer;
89. begin
90.  for x:=0 to 255 do
91.     begin
92.    r:=x;
93.    {\$ifdef caseinsensitive}
94.    if (r<91) and (r>64) then  r:=r+32;
95.    {\$endif}
96.    u[x]:=r;
97.    end;
98. end;
99.  { ===============  END QUICKSORT=======}
100.
101.   { try it out}
102.   function range(f:longint;l:longint):longint ;
103. begin
104.     range:=  random(1000000) mod (l-f+1) + f ;
105. end;
106.
107. function mixcases:byte;
108. var i:integer;
109. begin
110.  i:=range(97,122);
111.  if random(10)<5 then i:=i-32;
112.  exit(i)
113. end;
114.
115. var
116.    a:array of ansistring;
117.    l,i,maxlen:longint;
118.    StartTime: TTime;
119.    Elapsed: Int64;
120.
121.   begin
122.   setlookuparray(u); // must do
123.    maxlen:=1000000;
124.
125.    writeln('Creating a mixed case string array of ',maxlen,' elements');
126.    setlength(a,maxlen);
127. {\$define  w:= char(mixcases)}
128.    for l:=0 to maxlen-1 do
129.    begin
130.    for i:=1 to 50 do a[l]+=w;
131.    a[l]:=midstr(a[l],1,range(10,49)); // make different lengths
132.    end;
133.
134.      writeln('strings created, now sorting');
135.      writeln;
136.       StartTime := Time;
137.    Quicksort(a,0,maxlen-1,up);
138.      Elapsed := MillisecondsBetween(Time, StartTime);
139.
140.    for l:=0 to 20  do writeln(a[l]);
141.    writeln('. . .');
142.    writeln('. . .');
143.    for l:=length(a)-20 to length(a)-1  do writeln(a[l]);
144.
145.     writeln;
146.    writeln('Time taken ',Elapsed,'  milliseconds');
148.
149.
150.   end.
151.
152.
Or using the built in MSVCRT sort in windows
I have not tested libc.so Linux, (don't have Linux)
Code: Pascal  [Select][+][-]
1.
2.  program MSVCRTsort;
3.   //{\$rangeChecks on}   // OK tested already
4.
5.  {\$macro on}
6.
7.  Uses
8. SysUtils,DateUtils,strutils ; {for timer and midstr only, not needed for quicksort}
9.
10.   {=====  start sort ====}
11. // for Linux  external libc.so (I think)
12.  function  qsort(T:pointer;size:longint;szdata:integer;p:pointer):integer ; cdecl external 'msvcrt.dll' name 'qsort';
13.
14.    type
15.  direction =(up,down);
16.  casetype=(CaseSensitive,CaseInsensitive);
17.  asp=^ansistring;
18.
19.   var
20.   u : Array[0..255] of byte;  {look up table for speed}
21.   ct:casetype=CaseInsensitive;
22.
23.  Function lessthan(const a:ansistring;const b:ansiString):integer;
24.     Var
25.     n,lim,lena,lenb:longint;
26.       begin
27.       lena:=length(a);
28.       lenb:=length(b);
29.       if (lena<lenb) then lim:=lena  else lim:=lenb ;
30.     For n :=1 To lim do
31.     begin
32.         If u[ord(a[n])] < u[ord(b[n])] Then exit (-1);
33.         If u[ord(a[n])] > u[ord(b[n])] Then exit (0);
34.         end ;
35.     exit( 0 ) ;
36. End;
37.
38.  Function morethan(const a:ansistring;const b:ansiString):integer;
39.     Var
40.     n,lim,lena,lenb:longint;
41.       begin
42.       lena:=length(a);
43.       lenb:=length(b);
44.       if (lena<lenb) then lim:=lena else lim:=lenb ;
45.     For n :=1 To lim do
46.     begin
47.         If u[ord(a[n])] > u[ord(b[n])] Then exit (-1);
48.         If u[ord(a[n])] < u[ord(b[n])] Then exit (0);
49.         end ;
50.     exit( 0 ) ;
51. End;
52.
53.  function callbackU( const n1:asp; const n2:asp):integer;
54.  begin
55.
56.  if (ct=CaseInsensitive) then
57.  begin
58.    if lessthan(n1^,n2^)=-1 then exit(-1);
59.    if morethan(n1^,n2^)=-1 then exit(1) ;
60.    end
61.
62.    else
63.    begin
64.    if n1^<n2^ then exit(-1);
65.     if n1^>n2^ then exit(1);
66.     end ;
67.
68.    exit(0);
69.  end;
70.
71.     function callbackD(const n1:asp;const n2:asp):integer;
72.  begin
73.
74.  if (ct=CaseInsensitive) then
75.  begin
76.    if lessthan(n1^,n2^)=-1 then exit(1);
77.    if morethan(n1^,n2^)=-1 then exit(-1);
78.    end
79.    else
80.
81.    begin
82.     if n1^<n2^ then exit(1);
83.     if n1^>n2^ then exit(-1);
84.     end;
85.
86.    exit(0);
87.  end;
88.
89.  procedure csort(var a:array of ansistring;lower:longint;upper:longint;d:direction;ctype :casetype);
90.
91.   procedure setlookuparray(var u:array of byte); // must run
92. var x,r:integer;
93. begin
94.  for x:=0 to 255 do
95.     begin
96.    r:=x;
97.    if (r<91) and (r>64) then  r:=r+32;
98.    u[x]:=r;
99.    end;
100. end;
101.
102. const
103.  done:integer=0;
104.  begin
105.   if done=0 then
106.   begin
107.    setlookuparray(u);done:=1;
108.    end;
109.    ct:=ctype;
110.   If d=up   Then qsort( @a[Lower],((Upper)-(Lower)+1),sizeof(ansistring),@callbackU);
111.   If d=down Then qsort( @a[Lower],((Upper)-(Lower)+1),sizeof(ansistring),@callbackD);
112.  end;
113.  {=====  end sort ====}
114.
115.
116.   { try it out}
117.   function range(f:longint;l:longint):longint ;
118. begin
119.     range:=  random(1000000) mod (l-f+1) + f ;
120. end;
121.
122. function mixcases:byte;
123. var i:integer;
124. begin
125.  i:=range(97,122);
126.  if random(10)<5 then i:=i-32;
127.  exit(i)
128. end;
129.
130. var
131.    a:array of ansistring;
132.    l,i,maxlen:longint;
133.    StartTime: TTime;
134.    Elapsed: Int64;
135.
136.   begin
137.
138.    maxlen:=1000000;
139.
140.    writeln('Creating a mixed case string array of ',maxlen,' elements');
141.    setlength(a,maxlen);
142.    {\$define  w:= char(mixcases)}
143.    for l:=0 to maxlen-1 do
144.    begin
145.    for i:=1 to 50 do a[l]+=w;
146.    a[l]:=midstr(a[l],1,range(10,49)); // make different lengths
147.    end;
148.
149.      writeln('strings created, now sorting  ');
150.      writeln;
151.
152.       StartTime := Time;
153.
154.    csort(a,0,length(a),down,CaseInsensitive);  //<< ------- method
155.
156.      Elapsed := MillisecondsBetween(Time, StartTime);
157.
158.    for l:=0 to 20  do writeln(a[l]);
159.    writeln('. . .');
160.    writeln('. . .');
161.    for l:=length(a)-20 to length(a)-1  do writeln(a[l]);
162.
163.     writeln;
164.    writeln('Time taken ',Elapsed,'  milliseconds, ',ct);
165.
167.
168.   end.
169.
170.
171.

Auld Lang Syne
« Last Edit: September 26, 2020, 04:21:38 pm by BobDog »

#### winni

• Hero Member
• Posts: 1992
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #1 on: September 25, 2020, 01:53:45 am »
Hi!

If you are interested in another Quicksort implementation then
have a look in the unit Grids, used by the TStringGrid.

There is the procedureTCustomGrid.Sort which contains a procedure Quicksort
for UTF8 strings.

Keep on hackin

Winni

#### BobDog

• Jr. Member
• Posts: 68
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #2 on: September 25, 2020, 02:12:54 pm »

Thanks for testing winni
I cannot find the unit grid in the fp 3.0.4.
I only have the freepascal distribution, I have no Lazarus.

#### rvk

• Hero Member
• Posts: 4386
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #3 on: September 25, 2020, 02:34:11 pm »
I cannot find the unit grid in the fp 3.0.4.
I only have the freepascal distribution, I have no Lazarus.
You could always check out the source of lazarus just to browse through it (you could even do that online).

Here is the implementation in trunk.
Code: Pascal  [Select][+][-]
1. procedure TCustomGrid.Sort(ColSorting: Boolean; index, IndxFrom, IndxTo: Integer);
2.   procedure QuickSort(L,R: Integer);
3.   var
4.     I,J: Integer;
5.     P{,Q}: Integer;
6.   begin
7.     repeat
8.       I:=L;
9.       J:=R;
10.       P:=(L+R) div 2;
11.       repeat
12.         if ColSorting then begin
13.           while DoCompareCells(index, P, index, I)>0 do I:=I+1;
14.           while DoCompareCells(index, P, index, J)<0 do J:=J-1;
15.         end else begin
16.           while DoCompareCells(P, index, I, index)>0 do I:=I+1;
17.           while DoCompareCells(P, index, J, index)<0 do J:=J-1;
18.         end;
19.         if I<=J then begin
20.
21.           if I<>J then
22.             if not FStrictSort or
23.               (ColSorting     and (DoCompareCells(index, I, index, J)<>0)) or
24.               (not ColSorting and (DoCompareCells(I, index, J, index)<>0))
25.             then
26.               DoOPExchangeColRow(not ColSorting, I,J);
27.
28.           if P=I then
29.             P:=J
30.           else if P=J then
31.             P:=I;
32.
33.           I:=I+1;
34.           J:=J-1;
35.         end;
36.       until I>J;
37.
38.       if L<J then
39.         QuickSort(L,J);
40.
41.       L:=I;
42.     until I>=R;
43.   end;
44. begin
45.   if RowCount>FixedRows then begin
46.     CheckIndex(ColSorting, Index);
47.     CheckIndex(not ColSorting, IndxFrom);
48.     CheckIndex(not ColSorting, IndxTo);
49.     BeginUpdate;
50.     QuickSort(IndxFrom, IndxTo);
51.     EndUpdate;
52.   end;
53. end;

#### BobDog

• Jr. Member
• Posts: 68
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #4 on: September 26, 2020, 12:22:13 pm »

I have added another built-in sort, the C sort, msvcrt.dll /( libc.so --linux which I cannot test).
msvcrt.dll is a well tested, well oiled and well optimised windows dll, I think it has been around forever.

#### avk

• Sr. Member
• Posts: 324
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #5 on: September 26, 2020, 03:51:46 pm »
It makes sense to declare the comparator parameters as const, then things will go faster.
You might also be interested to take a look, here is a pretty fast library that implements some sorting algorithms (although it uses generics and cannot be compiled using FPC-3.0.4).

#### BobDog

• Jr. Member
• Posts: 68
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #6 on: September 26, 2020, 04:32:37 pm »

Thanks avk.
I didn't know that trick with const parameters, it makes a difference.
I have adjusted the code.
Thanks for the link.

#### 440bx

• Hero Member
• Posts: 2047
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #7 on: September 26, 2020, 07:12:12 pm »
msvcrt.dll is a well tested, well oiled and well optimised windows dll, I think it has been around forever.
Just FYI, instead of depending on msvcrt.dll, you can use the qsort found in NTDLL.  It's the one I use for sorting and, I can say from experience, it is fast.
FPC v3.0.4 and Lazarus 1.8.2 on Windows 7 64bit.

#### avk

• Sr. Member
• Posts: 324
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #8 on: September 26, 2020, 09:31:48 pm »
I remember there was already some competition between LGenerics and NtDll some time ago.
I suppose NtDll will lose in the task of sorting an array of Pascal strings.

#### 440bx

• Hero Member
• Posts: 2047
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #9 on: September 27, 2020, 04:19:03 pm »
I suppose NtDll will lose in the task of sorting an array of Pascal strings.
Even if it does it won't be by much.  LGenerics has a definite advantage in 32bit when sorting elements that fit in a register, in 64 bit, it's likely NtDll's sort will either match LGenerics or pull ahead by a small margin.

The advantage of using NtDll is that it makes the executable smaller and use less memory since NtDll (unlike msvcrt.dll) is always loaded.
FPC v3.0.4 and Lazarus 1.8.2 on Windows 7 64bit.

#### avk

• Sr. Member
• Posts: 324
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #10 on: September 27, 2020, 04:49:05 pm »
I believe the 64-bit version of LGenerics is about 1.5 times faster in sorting a string array than QSort. And its advantage is that it is written in Pascal and therefore available on almost any platform.

#### 440bx

• Hero Member
• Posts: 2047
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #11 on: September 27, 2020, 04:53:26 pm »
I believe the 64-bit version of LGenerics is about 1.5 times faster in sorting a string array than QSort.
I don't recall the sample program in the "sorting and counting" being 1.5 times faster than the Windows API implementation.

As far as portability, that's a valid point but, using msvcrt.dll isn't portable either.  IOW, leaving portability out of the picture, the OP is better off using NTDLL's qsort than msvcrt's.
FPC v3.0.4 and Lazarus 1.8.2 on Windows 7 64bit.

#### jamie

• Hero Member
• Posts: 3784
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #12 on: September 27, 2020, 05:12:51 pm »
I've found an index list that points to a record containing reference of where in the list it is currently and where it should be after the sort.

so at the end all you have is a created list that is fixed and if you happen to know the number entries ahead of time  you can pre-allocated it. But basically you use the list afterwards to index the entries which gives the pointer address to the string/ word..

One could also regenerate a new list this way, it would be linear outcome..
The only true wisdom is knowing you know nothing

#### avk

• Sr. Member
• Posts: 324
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #13 on: September 27, 2020, 05:38:54 pm »
I don't recall the sample program in the "sorting and counting" being 1.5 times faster than the Windows API implementation.

It seems, besides sorting, there was also file mapping against simple ReadLn/WriteLn. However, if you are interested, I could make a demo.

I've found an index list that points to a record containing reference of where in the list it is currently and where it should be after the sort.

so at the end all you have is a created list that is fixed and if you happen to know the number entries ahead of time  you can pre-allocated it. But basically you use the list afterwards to index the entries which gives the pointer address to the string/ word..

One could also regenerate a new list this way, it would be linear outcome..

@jamie, it would be interesting to see what this gives.

#### 440bx

• Hero Member
• Posts: 2047
##### Re: Quicksort a string array (Case insensitive or standard)
« Reply #14 on: September 27, 2020, 06:08:15 pm »
It seems, besides sorting, there was also file mapping against simple ReadLn/WriteLn.
True.  There were a number of optimizations that affected the total elapsed time.  It wasn't just the sort implementation.

FPC v3.0.4 and Lazarus 1.8.2 on Windows 7 64bit.