Recent

Author Topic: What is more (memory) efficient  (Read 4522 times)

ALLIGATOR

  • Sr. Member
  • ****
  • Posts: 329
  • I use FPC [main] 💪🐯💪
Re: What is more (memory) efficient
« Reply #30 on: February 05, 2025, 07:09:04 pm »
Then the size I get (x86_64 and i386) is 8 bytes.

Wow! Cool! (I suspected it should be possible  :D but I don't work with unions)
I may seem rude - please don't take it personally

JdeHaan

  • Full Member
  • ***
  • Posts: 171
Re: What is more (memory) efficient
« Reply #31 on: February 05, 2025, 07:37:36 pm »
Thanks @Zoran, I'll explore that option.

Warfley

  • Hero Member
  • *****
  • Posts: 2037
Re: What is more (memory) efficient
« Reply #32 on: February 05, 2025, 08:02:29 pm »
Ok, now with random access  :P
Not random array access random field access. E.g. you got a record with 10 fields, and you access them "randomly" (of course not like random index wise, but with complex code that accesses the fields in no particular order).

Your code only ever accesses the IntVal field, not even the Typ field.

If I just change your code slighty, to also consider other the Typ field (and initializing the data to avoid predictive branching):
Code: Pascal  [Select][+][-]
  1. program test;
  2. {$mode delphi}
  3. {$optimization on}
  4.  
  5. uses SysUtils;
  6.  
  7. type
  8.   TValueType = (vtBoolean, vtInteger, vtReal);
  9.   PValueNotPacked = ^TValueNotPacked;
  10.   TValueNotPacked = record
  11.     case Typ: TValueType of
  12.       vtBoolean:  (BoolVal: Boolean);
  13.       vtInteger:  (IntVal: Int64);
  14.       vtReal:     (RealVal: Double);
  15.   end;
  16.   PValuePacked = ^TValuePacked;
  17.   TValuePacked = packed record
  18.     case Typ: TValueType of
  19.       vtBoolean:  (BoolVal: Boolean);
  20.       vtInteger:  (IntVal: Int64);
  21.       vtReal:     (RealVal: Double);
  22.   end;
  23.  
  24. const
  25.   size = 512*1024*1024;
  26.  
  27. procedure bench_notpacked1;
  28. var
  29.   arr_notpacked: array of TValueNotPacked;
  30.   i, s: NativeInt;
  31.   t: TDateTime;
  32. begin
  33.   Write(SizeOf(TValueNotPacked), ', ');
  34.   SetLength(arr_notpacked, size);
  35.   for i:=0 to size-1 do
  36.   begin
  37.     arr_notpacked[i].Typ:=TValueType(Random(ord(High(TValueType)))); // Avoids Real, because Doubles have more overhead than ordinal types
  38.     arr_notpacked[i].RealVal:=Random;
  39.   end;
  40.   t:=Now;
  41.   begin
  42.     for i:=0 to size-1 do
  43.       case arr_notpacked[i].Typ of
  44.       vtBoolean: s += ord(arr_notpacked[i].BoolVal);
  45.       vtInteger: s += arr_notpacked[i].IntVal;
  46.       vtReal: s += Trunc(arr_notpacked[i].RealVal);
  47.       end;
  48.   end;
  49.   WriteLn('Not packed 1: ',(Now-t)*MSecsPerDay:0:0, ' ms.');
  50.   SetLength(arr_notpacked, 0);
  51. end;
  52.  
  53. procedure bench_notpacked2;
  54. var
  55.   arr_notpacked: array of TValueNotPacked;
  56.   s: NativeInt;
  57.   t: TDateTime;
  58.   p, pend: PValueNotPacked;
  59.   i: NativeInt;
  60. begin
  61.   Write(SizeOf(TValueNotPacked), ', ');
  62.   SetLength(arr_notpacked, size);
  63.   for i:=0 to size-1 do
  64.   begin
  65.     arr_notpacked[i].Typ:=TValueType(Random(ord(High(TValueType)))); // Avoids Real, because Doubles have more overhead than ordinal types
  66.     arr_notpacked[i].RealVal:=Random;
  67.   end;
  68.   t:=Now;
  69.   begin
  70.     p:=@arr_notpacked[0];
  71.     pend:=@arr_notpacked[size-1];
  72.     while p<=pend do
  73.     begin
  74.       case p^.Typ of
  75.       vtBoolean: s += ord(p^.BoolVal);
  76.       vtInteger: s += p^.IntVal;
  77.       vtReal: s += Trunc(p^.RealVal);
  78.       end;
  79.       inc(p);
  80.     end;
  81.   end;
  82.   WriteLn('Not packed 2: ',(Now-t)*MSecsPerDay:0:0, ' ms.');
  83.   SetLength(arr_notpacked, 0);
  84. end;
  85.  
  86. procedure bench_packed1;
  87. var
  88.   arr_packed: array of TValuePacked;
  89.   i, s: NativeInt;
  90.   t: TDateTime;
  91. begin
  92.   Write(SizeOf(TValuePacked), ', ');
  93.   SetLength(arr_packed, size);
  94.   for i:=0 to size-1 do
  95.   begin
  96.     arr_packed[i].Typ:=TValueType(Random(ord(High(TValueType)))); // Avoids Real, because Doubles have more overhead than ordinal types
  97.     arr_packed[i].RealVal:=Random;
  98.   end;
  99.   t:=Now;
  100.   begin
  101.     for i:=0 to size-1 do
  102.       case arr_packed[i].Typ of
  103.       vtBoolean: s += ord(arr_packed[i].BoolVal);
  104.       vtInteger: s += arr_packed[i].IntVal;
  105.       vtReal: s += Trunc(arr_packed[i].RealVal);
  106.       end;
  107.   end;
  108.   WriteLn('Packed 1: ',(Now-t)*MSecsPerDay:0:0, ' ms.');
  109.   SetLength(arr_packed, 0);
  110. end;
  111.  
  112. procedure bench_packed2;
  113. var
  114.   arr_packed: array of TValuePacked;
  115.   s: NativeInt;
  116.   t: TDateTime;
  117.   p, pend: PValuePacked;
  118.   i: NativeInt;
  119. begin
  120.   Write(SizeOf(TValuePacked), ', ');
  121.   SetLength(arr_packed, size);
  122.   for i:=0 to size-1 do
  123.   begin
  124.     arr_packed[i].Typ:=TValueType(Random(ord(High(TValueType)))); // Avoids Real, because Doubles have more overhead than ordinal types
  125.     arr_packed[i].RealVal:=Random;
  126.   end;
  127.   t:=Now;
  128.   p:=@arr_packed[0];
  129.   pend:=@arr_packed[size-1];
  130.   while p<=pend do
  131.   begin
  132.     case p^.Typ of
  133.     vtBoolean: s += ord(p^.BoolVal);
  134.     vtInteger: s += p^.IntVal;
  135.     vtReal: s += Trunc(p^.RealVal);
  136.     end;
  137.     inc(p);
  138.   end;
  139.   WriteLn('Packed 2: ',(Now-t)*MSecsPerDay:0:0, ' ms.');
  140.   SetLength(arr_packed, 0);
  141. end;
  142.  
  143. begin
  144.   bench_notpacked1;
  145.   bench_notpacked2;
  146.   bench_packed1;
  147.   bench_packed2;
  148.   ReadLn;
  149. end.
The difference in timing shrinks already::
Code: Text  [Select][+][-]
  1. 16, Not packed 1: 2656 ms.
  2. 16, Not packed 2: 2377 ms.
  3. 9, Packed 1: 2516 ms.
  4. 9, Packed 2: 2312 ms.
While a change in relative speed difference was to be expected, because the code does now much more, even the absolute speed difference is now much smaller, where before it was consistently between 100-200ms on my system, it's now more around 50ms, with pointer arithmetic on not packed always outperforming array access on packed.
Adding more fields with different alignment, which are accessed in the for loop will most likely change the results further to the point where the performance advantage of packed will have swapped

A benchmark is only as good as it's transferability to real world code. Sure you can show a big difference in your benchmark, but how realistically is it that you create a record with multiple fields but only ever access a single one of these fields in a deterministic manner?

Martin_fr

  • Administrator
  • Hero Member
  • *
  • Posts: 12011
  • Debugger - SynEdit - and more
    • wiki
Re: What is more (memory) efficient
« Reply #33 on: February 05, 2025, 08:12:57 pm »
It may be that Intel does not suffer much from non-alignment. And/Or that the CPU pre-fetching is faster than the execution of the code. So that the data is always retrieved that early.
In that case having to load less memory into the cache will be helpful.

I tried one of the benchmarks https://forum.lazarus.freepascal.org/index.php/topic,70108.msg546065.html#msg546065 but changed it from using the int value to using the double value. In that case packed and unpacked were at the same speed for me. So likely the floating point unit benefits from aligned data, and took more time fetching the non aligned data. But the extra time taken equalled out with the savings of less memory to be loaded into the cache.

Martin_fr

  • Administrator
  • Hero Member
  • *
  • Posts: 12011
  • Debugger - SynEdit - and more
    • wiki
Re: What is more (memory) efficient
« Reply #34 on: February 05, 2025, 08:19:21 pm »
And, btw, if I make both records equal size (by removing "typ") "case  TValueType of" and removing "packed" => the packed run is still faster by a tiny bit (550 <> 520). Though -despite showing up on all of many runs - this is within error margin.

But that just goes to show, never trust a benchmark. (not that I follow my own advise)

ALLIGATOR

  • Sr. Member
  • ****
  • Posts: 329
  • I use FPC [main] 💪🐯💪
Re: What is more (memory) efficient
« Reply #35 on: February 05, 2025, 08:37:32 pm »
A benchmark is only as good as it's transferability to real world code. Sure you can show a big difference in your benchmark, but how realistically is it that you create a record with multiple fields but only ever access a single one of these fields in a deterministic manner?

That's why I answered the topic starter in one of the first replies, that he should test on his data and algorithm and his range of possible hardware 💁‍♂️
I may seem rude - please don't take it personally

ALLIGATOR

  • Sr. Member
  • ****
  • Posts: 329
  • I use FPC [main] 💪🐯💪
Re: What is more (memory) efficient
« Reply #36 on: February 05, 2025, 08:53:48 pm »
Code: Text  [Select][+][-]
  1. 16, Not packed 1: 2656 ms.
  2. 16, Not packed 2: 2377 ms.
  3. 9, Packed 1: 2516 ms.
  4. 9, Packed 2: 2312 ms.
And still packed a little yes won in this particular test )

But it implies data processing - it affects of course, but as they say, while the processor goes to RAM for data, 100 cycles will pass, during this time you can calculate and make a conditional jump )

The processor is much faster than RAM in the current times

---
In fact, this whole task comes down to cache operation, you don't have to declare any packed structures, but just declare one array and iterate over it with different steps but the same number of iterations. For example 1,3,4,7,8,8,12,31,32,33,36,63,64,65,100,160 and so on.

And you can also split this range into two: into aligned steps (for example 4 or 8 or 16) and unaligned steps (3,7,15 or 5,9,17).

Here you go... for now
I may seem rude - please don't take it personally

ALLIGATOR

  • Sr. Member
  • ****
  • Posts: 329
  • I use FPC [main] 💪🐯💪
Re: What is more (memory) efficient
« Reply #37 on: February 05, 2025, 09:18:55 pm »
Not random array access random field access. E.g. you got a record with 10 fields, and you access them "randomly" (of course not like random index wise, but with complex code that accesses the fields in no particular order).

Another benchmark where everything is randomized (as you like  :P) and access to fields and access to array elements )

Code: Pascal  [Select][+][-]
  1. program test;
  2. {$mode delphi}
  3. {$optimization on}
  4.  
  5. uses SysUtils;
  6.  
  7. type
  8.   TValueType = 0..7;
  9.   PValueNotPacked = ^TValueNotPacked;
  10.   TValueNotPacked = record
  11.     case Typ: TValueType of
  12.       0:  (Bool_: Boolean);
  13.       1:  (Int32_: Int64);
  14.       2:  (Int64_: Int64);
  15.       3:  (Int8_: Int8);
  16.       4:  (Int16: Int16);
  17.       5:  (Real_: Real);
  18.       6:  (Double_: Double);
  19.       7:  (UInt8_: UInt8);
  20.   end;
  21.   PValuePacked = ^TValuePacked;
  22.   TValuePacked = packed record
  23.     case Typ: TValueType of
  24.       0:  (Bool_: Boolean);
  25.       1:  (Int32_: Int64);
  26.       2:  (Int64_: Int64);
  27.       3:  (Int8_: Int8);
  28.       4:  (Int16: Int16);
  29.       5:  (Real_: Real);
  30.       6:  (Double_: Double);
  31.       7:  (UInt8_: UInt8);
  32.   end;
  33.  
  34. const
  35.   size = 128*1024*1024;
  36.  
  37. function next_arr(x: NativeUInt): NativeUInt; inline;
  38. begin
  39.   Result := (x * 1103515245 + 12345) mod size;
  40. end;
  41.  
  42. function next_typ(x: NativeUInt): NativeUInt; inline;
  43. begin
  44.   Result := (x * 1103515245 + 12345) mod 8;
  45. end;
  46.  
  47.  
  48. procedure bench_notpacked;
  49. var
  50.   arr: array of TValueNotPacked;
  51.   i: NativeInt;
  52.   t: TDateTime;
  53.   sb: Boolean;
  54.   si32: Int32;
  55.   si64: Int64;
  56.   si8: Int8;
  57.   si16: Int16;
  58.   sr: Real;
  59.   sd: Double;
  60.   su8: UInt8;
  61. begin
  62.   Write(SizeOf(TValueNotPacked), ', ');
  63.   SetLength(arr, size);
  64.   t:=Now;
  65.   for i:=0 to size-1 do
  66.     case next_typ(i) of
  67.       0: sb   := arr[next_arr(i)].Bool_  or sb;
  68.       1: si32 := arr[next_arr(i)].Int32_  + si32;
  69.       2: si64 := arr[next_arr(i)].Int64_  + si64;
  70.       3: si8  := arr[next_arr(i)].Int8_   + si8;
  71.       4: si16 := arr[next_arr(i)].Int16   + si16;
  72.       5: sr   := arr[next_arr(i)].Real_   + sr;
  73.       6: sd   := arr[next_arr(i)].Double_ + sd;
  74.       7: su8  := arr[next_arr(i)].UInt8_  + su8;
  75.     end;
  76.   WriteLn('Not packed: ',(Now-t)*MSecsPerDay:0:0, ' ms.');
  77. end;
  78.  
  79. procedure bench_packed;
  80. var
  81.   arr: array of TValuePacked;
  82.   i: NativeInt;
  83.   t: TDateTime;
  84.   sb: Boolean;
  85.   si32: Int32;
  86.   si64: Int64;
  87.   si8: Int8;
  88.   si16: Int16;
  89.   sr: Real;
  90.   sd: Double;
  91.   su8: UInt8;
  92. begin
  93.   Write(SizeOf(TValuePacked), ', ');
  94.   SetLength(arr, size);
  95.   t:=Now;
  96.   for i:=0 to size-1 do
  97.     case next_typ(i) of
  98.       0: sb   := arr[next_arr(i)].Bool_  or sb;
  99.       1: si32 := arr[next_arr(i)].Int32_  + si32;
  100.       2: si64 := arr[next_arr(i)].Int64_  + si64;
  101.       3: si8  := arr[next_arr(i)].Int8_   + si8;
  102.       4: si16 := arr[next_arr(i)].Int16   + si16;
  103.       5: sr   := arr[next_arr(i)].Real_   + sr;
  104.       6: sd   := arr[next_arr(i)].Double_ + sd;
  105.       7: su8  := arr[next_arr(i)].UInt8_  + su8;
  106.     end;
  107.   WriteLn('Packed: ',(Now-t)*MSecsPerDay:0:0, ' ms.');
  108. end;
  109.  
  110. begin
  111.   bench_notpacked;
  112.   bench_packed;
  113.   ReadLn;
  114. end.
  115.  
i7-8750H, ddr4

Code: Pascal  [Select][+][-]
  1. 16, Not packed: 2555 ms.
  2. 9, Packed: 1733 ms.
  3.  
I may seem rude - please don't take it personally

Warfley

  • Hero Member
  • *****
  • Posts: 2037
Re: What is more (memory) efficient
« Reply #38 on: February 05, 2025, 09:48:36 pm »
Another benchmark where everything is randomized (as you like  :P) and access to fields and access to array elements )
Because it's a variant record, you access the same field, just under different names. Let's change this to actually be different fields with different alignment:
Code: Pascal  [Select][+][-]
  1.   TValue[Not]Packed = [packed] record
  2.     Typ: TValueType;
  3.     Bool_: Boolean;
  4.     Int32_: Int64;
  5.     Int64_: Int64;
  6.     Int8_: Int8;
  7.     Int16: Int16;
  8.     Real_: Real;
  9.     Double_: Double;
  10.     UInt8_: UInt8;
  11.   end;
Now packed is slower:
Code: Text  [Select][+][-]
  1. 56, Not packed: 6453 ms.
  2. 38, Packed: 6518 ms.
  3.  

There are certainly cases where you don't need alignment, especially on an x64 (on ARM it's a whole different story tho). But Compilers try to optimize the general case, and generally speaking you have records that contain more than just one field
« Last Edit: February 05, 2025, 09:53:12 pm by Warfley »

ALLIGATOR

  • Sr. Member
  • ****
  • Posts: 329
  • I use FPC [main] 💪🐯💪
Re: What is more (memory) efficient
« Reply #39 on: February 05, 2025, 10:18:37 pm »
Now packed is slower:
Code: Text  [Select][+][-]
  1. 56, Not packed: 6453 ms.
  2. 38, Packed: 6518 ms.
  3.  

On my hardware) :
Code: Pascal  [Select][+][-]
  1. 56, Not packed: 4119 ms.
  2. 38, Packed: 3785 ms.
  3.  
I may seem rude - please don't take it personally

440bx

  • Hero Member
  • *****
  • Posts: 6017
Re: What is more (memory) efficient
« Reply #40 on: February 06, 2025, 06:14:07 am »
I believe it's important to keep in mind that the benchmark results may _not_ be an indication of the effects of aligning data but of how well (or poorly) the compiler optimizes a specific case.

FPC v3.2.2 and Lazarus v4.0rc3 on Windows 7 SP1 64bit.

TRon

  • Hero Member
  • *****
  • Posts: 4377
Re: What is more (memory) efficient
« Reply #41 on: February 06, 2025, 06:58:46 am »
Besides that, not everyone has the same processor and the benchmarks as shown so far are not real world situations but optimized benchmarks tailored to do one thing its good at.

Such small benchmarks on modern processor tells absolutely nothing, irl they act more like a steam machine and reach an optimal performance after a certain amount of time.
Today is tomorrow's yesterday.

ALLIGATOR

  • Sr. Member
  • ****
  • Posts: 329
  • I use FPC [main] 💪🐯💪
Re: What is more (memory) efficient
« Reply #42 on: February 06, 2025, 07:37:55 am »
I tried to implement an algorithm to test the processor cache performance depending on the “record size”, or in other words the data step/size

But the test results do not satisfy me, earlier on the research charts I saw a completely different picture, there were certain drawdowns at the boundaries of cache sizes of different levels, I don't see this in myself...

I am posting the code, maybe someone will be interested (and I am also interested if someone will find a possible error in the code, why my expectation does not coincide with reality, it is desirable, of course, to confirm your words with references/test programs and not to rely on “my inner feeling”  :)).

Code: Pascal  [Select][+][-]
  1. program test;
  2. {$mode objfpc}
  3. {$optimization on}
  4.  
  5. uses SysUtils;
  6.  
  7. type
  8.   TMyArray = array of NativeInt;
  9.   TBenchProc = procedure (const arr: TMyArray; step: NativeInt);
  10.  
  11. const
  12.   step_over = 10;
  13.   max_step = 128*1024 + step_over;
  14.   steps_count = 16*1024*1024;
  15.   size = 2*1024*1024*1024 div sizeof(TMyArray[0]);
  16.  
  17. procedure bench_read(const arr: TMyArray; step: NativeInt);
  18. var
  19.   i, dummy: NativeInt;
  20.   c: NativeInt = 0;
  21.   timer: TDateTime;
  22. begin
  23.   if step>max_step then Halt(1);
  24.   timer:=Now;
  25.   while c < steps_count do
  26.   begin
  27.     if (size div step)>(steps_count-c) then
  28.       i:=size - (steps_count-c)*step
  29.     else
  30.       i:=0;
  31.     while (i < size) do
  32.     begin
  33.       dummy := arr[i]; inc(i, step);
  34.       dummy := arr[i]; inc(i, step);
  35.       dummy := arr[i]; inc(i, step);
  36.       dummy := arr[i]; inc(i, step);
  37.       dummy := arr[i]; inc(i, step);
  38.       dummy := arr[i]; inc(i, step);
  39.       dummy := arr[i]; inc(i, step);
  40.       dummy := arr[i]; inc(i, step);
  41.       dummy := arr[i]; inc(i, step);
  42.       dummy := arr[i]; inc(i, step);
  43.       dummy := arr[i]; inc(i, step);
  44.       dummy := arr[i]; inc(i, step);
  45.       dummy := arr[i]; inc(i, step);
  46.       dummy := arr[i]; inc(i, step);
  47.       dummy := arr[i]; inc(i, step);
  48.       dummy := arr[i]; inc(i, step);
  49.       inc(c, 16);
  50.     end;
  51.   end;
  52.   WriteLn('step: ', step: 8, ', time: ', (Now-timer)*MSecsPerDay:0:0, ' ms');
  53. end;
  54.  
  55. procedure bench_write(const arr: TMyArray; step: NativeInt);
  56. var
  57.   i: NativeInt;
  58.   c: NativeInt = 0;
  59.   timer: TDateTime;
  60.   zero: NativeInt = 0;
  61. begin
  62.   if step>max_step then Halt(1);
  63.   timer:=Now;
  64.   while c < steps_count do
  65.   begin
  66.     if (size div step)>(steps_count-c) then
  67.       i:=size - (steps_count-c)*step
  68.     else
  69.       i:=0;
  70.     while (i < size) do
  71.     begin
  72.       arr[i]:=zero; inc(i, step);
  73.       arr[i]:=zero; inc(i, step);
  74.       arr[i]:=zero; inc(i, step);
  75.       arr[i]:=zero; inc(i, step);
  76.       arr[i]:=zero; inc(i, step);
  77.       arr[i]:=zero; inc(i, step);
  78.       arr[i]:=zero; inc(i, step);
  79.       arr[i]:=zero; inc(i, step);
  80.       arr[i]:=zero; inc(i, step);
  81.       arr[i]:=zero; inc(i, step);
  82.       arr[i]:=zero; inc(i, step);
  83.       arr[i]:=zero; inc(i, step);
  84.       arr[i]:=zero; inc(i, step);
  85.       arr[i]:=zero; inc(i, step);
  86.       arr[i]:=zero; inc(i, step);
  87.       arr[i]:=zero; inc(i, step);
  88.       inc(c, 16);
  89.     end;
  90.   end;
  91.   WriteLn('step: ', step: 8, ', time: ', (Now-timer)*MSecsPerDay:0:0, ' ms');
  92. end;
  93.  
  94. procedure bench_readwrite(const arr: TMyArray; step: NativeInt);
  95. var
  96.   i: NativeInt;
  97.   c: NativeInt = 0;
  98.   timer: TDateTime;
  99.   zero: NativeInt = 0;
  100. begin
  101.   if step>max_step then Halt(1);
  102.   timer:=Now;
  103.   while c < steps_count do
  104.   begin
  105.     if (size div step)>(steps_count-c) then
  106.       i:=size - (steps_count-c)*step
  107.     else
  108.       i:=0;
  109.     while (i < size) do
  110.     begin
  111.       arr[i]:=zero; inc(i, step);
  112.       zero:=arr[i]; inc(i, step);
  113.       arr[i]:=zero; inc(i, step);
  114.       zero:=arr[i]; inc(i, step);
  115.       arr[i]:=zero; inc(i, step);
  116.       zero:=arr[i]; inc(i, step);
  117.       arr[i]:=zero; inc(i, step);
  118.       zero:=arr[i]; inc(i, step);
  119.       arr[i]:=zero; inc(i, step);
  120.       zero:=arr[i]; inc(i, step);
  121.       arr[i]:=zero; inc(i, step);
  122.       zero:=arr[i]; inc(i, step);
  123.       arr[i]:=zero; inc(i, step);
  124.       zero:=arr[i]; inc(i, step);
  125.       arr[i]:=zero; inc(i, step);
  126.       zero:=arr[i]; inc(i, step);
  127.       inc(c, 16);
  128.     end;
  129.   end;
  130.   WriteLn('step: ', step: 8, ', time: ', (Now-timer)*MSecsPerDay:0:0, ' ms');
  131. end;
  132.  
  133. var
  134.   bench_procs: array of TBenchProc = (@bench_read, @bench_write, @bench_readwrite);
  135.   steps: array of integer = (1,2,3,4,5,6,7,8,9,10,15,16,17,23,24,25,30,31,32,33,34,47,48,49,62,63,64,65,66,95,96,97,126,127,128,129,
  136.                              254,255,256,257,510,511,512,513,1022,1023,1024,1025,1026,2046,2047,2048,2049,
  137.                              16382,16383,16384,16385,16386,
  138.                              32766,32767,32768,32769,32770,
  139.                              65534,65535,65536,65537,65538,
  140.                              131070,131071,131072,131073,131074
  141.                              );
  142.   arr: TMyArray;
  143.   st: NativeInt;
  144.   bench: TBenchProc;
  145.  
  146. begin
  147.   SetLength(arr, size + max_step*16);
  148.  
  149.   WriteLn('steps count: ', steps_count);
  150.  
  151.   for bench in bench_procs do
  152.     for st in steps do
  153.       bench(arr, st);
  154.  
  155.   ReadLn;
  156. end.
  157.  
I may seem rude - please don't take it personally

DragoRosso

  • Guest
Re: What is more (memory) efficient
« Reply #43 on: February 06, 2025, 08:05:22 am »
I tried to implement an algorithm to test the processor cache performance depending on the “record size”, or in other words the data step/size
What makes you believe that with the proposed code you are analyzing the performance of the processor cache?

ALLIGATOR

  • Sr. Member
  • ****
  • Posts: 329
  • I use FPC [main] 💪🐯💪
Re: What is more (memory) efficient
« Reply #44 on: February 06, 2025, 08:13:04 am »
I may seem rude - please don't take it personally

 

TinyPortal © 2005-2018