Recent

Author Topic: FFT performance  (Read 4917 times)

Thaddy

  • Hero Member
  • *****
  • Posts: 18676
  • Jungle wars. And failing health it seems.
Re: FFT performance
« Reply #45 on: October 21, 2025, 04:39:06 pm »
So my code is only valid for 64 bit systems. I will mark it as such.
I do not intend to adapt it for 32 bit since I don't use it. Hail windows 10 end of line too.
It is adaptable, but not with the macro trickery because the abi forces you to use eax and know the size.
BTW: it works on 32 bit linux intel?
« Last Edit: October 21, 2025, 04:44:38 pm by Thaddy »
Due to censorship, I changed this to "Nelly the Elephant". Keeps the message clear.

Josh

  • Hero Member
  • *****
  • Posts: 1447
Re: FFT performance
« Reply #46 on: October 21, 2025, 05:27:37 pm »
my results
plenty of fiddling, added ability toswapbetween single double.
iended up with a messwith testing, but cleaned it up.
how it performs,not a clue, sort of optimized for larger data. you can set the max unroll count

Code: Pascal  [Select][+][-]
  1. 16384-point FFT Forward (avg 100 runs,auto-unroll,type=Double): 116.9 ns/point
  2. 16384-point FFT Inverse (avg 100 runs,auto-unroll,type=Double): 117.2 ns/point

Code: Pascal  [Select][+][-]
  1. program myfft;
  2. {$mode objfpc}
  3. uses
  4.   windows,sysutils,math;
  5.  
  6. type
  7.   TFFTFloat=double; // change to Single if desired
  8.  
  9. const
  10.   FFT_FORWARD=1;
  11.   FFT_INVERSE=-1;
  12.   MAX_POINTS=16384;
  13.   MAX_UNROLL=128;
  14.  
  15. var
  16.   Re,Im: array of TFFTFloat;
  17.   BitRevTable: array of Cardinal;
  18.   TwRe,TwIm:array of array of TFFTFloat;
  19.   nPoints,NumRuns,run:Integer;
  20.   t0,t1,t2,freq:Int64;
  21.   nsForward,nsInverse:Double;
  22.  
  23. function BitReverse32(v:Cardinal;Bits:Integer):Cardinal;inline;
  24. begin
  25.   v:=((v and $55555555) shl 1) or ((v and $AAAAAAAA) shr 1);
  26.   v:=((v and $33333333) shl 2) or ((v and $CCCCCCCC) shr 2);
  27.   v:=((v and $0F0F0F0F) shl 4) or ((v and $F0F0F0F0) shr 4);
  28.   v:=((v and $00FF00FF) shl 8) or ((v and $FF00FF00) shr 8);
  29.   v:=((v and $0000FFFF) shl 16) or ((v and $FFFF0000) shr 16);
  30.   Result:=v shr (32-Bits);
  31. end;
  32.  
  33. procedure InitBitRevTable(nPoints:Integer);
  34. var i,Bits: Integer;
  35. begin
  36.   SetLength(BitRevTable,nPoints);
  37.   Bits:=Trunc(Log2(nPoints));
  38.   for i:=0 to nPoints-1 do BitRevTable[i]:=BitReverse32(i,Bits);
  39. end;
  40.  
  41. procedure InitTwiddles(nPoints:Integer;Direction:Integer);
  42. var stage,half,k,maxStage:Integer;
  43.     theta:TFFTFloat;
  44. begin
  45.   maxStage:=Trunc(Log2(nPoints));
  46.   SetLength(TwRe,maxStage);
  47.   SetLength(TwIm,maxStage);
  48.   for stage:=1 to maxStage do
  49.   begin
  50.     half:=1 shl (stage-1);
  51.     SetLength(TwRe[stage-1],half);
  52.     SetLength(TwIm[stage-1],half);
  53.     theta:=2*Pi/(2*half)*Direction;
  54.     for k:=0 to half-1 do SinCos(theta*k,TwIm[stage-1][k],TwRe[stage-1][k]);
  55.   end;
  56. end;
  57.  
  58. procedure FFT_InPlace(var Re,Im: array of TFFTFloat);
  59. var n,stage,maxStage,half,block,idx,idx1,idx2,j,k,tailStart,UnrollCount:Integer;
  60.     wRe,wIm,tmpRe,tmpIm,rVal,iVal,tR,tI:TFFTFloat;
  61. begin
  62.   n:=Length(Re);
  63.   if n<=1 then Exit;
  64.   for idx:=0 to n-1 do
  65.   begin
  66.     j:=BitRevTable[idx];
  67.     if idx<j then
  68.     begin
  69.       tmpRe:=Re[idx];
  70.       Re[idx]:=Re[j];
  71.       Re[j]:=tmpRe;
  72.       tmpRe:=Im[idx];
  73.       Im[idx]:=Im[j];
  74.       Im[j]:=tmpRe;
  75.     end;
  76.   end;
  77.   maxStage:=Trunc(Log2(n));
  78.   for stage:=1 to maxStage do
  79.   begin
  80.     half:=1 shl (stage-1);
  81.     // unroll only for large stags
  82.     UnrollCount:=1;
  83.     while (UnrollCount*2<=half) and (UnrollCount*2<=MAX_UNROLL) do UnrollCount:=UnrollCount*2;
  84.     for block:=0 to (n div (2*half))-1 do
  85.     begin
  86.       idx:=block*2*half;
  87.       if half>=UnrollCount then
  88.       begin
  89.         // unroll
  90.         for j:=0 to (half div UnrollCount)-1 do
  91.         begin
  92.           idx1:=idx+j*UnrollCount;
  93.           idx2:=idx1+half;
  94.           for k:=0 to UnrollCount-1 do
  95.           begin
  96.             rVal:=Re[idx1+k];
  97.             iVal:=Im[idx1+k];
  98.             tR:=Re[idx2+k];
  99.             tI:=Im[idx2+k];
  100.             wRe:=TwRe[stage-1][j*UnrollCount+k];
  101.             wIm:=TwIm[stage-1][j*UnrollCount+k];
  102.             tmpRe:=wRe*tR-wim*ti;
  103.             tmpIm:=wRe*tI+wIm*tr;
  104.             Re[idx1+k]:=rval+tmpRe;
  105.             Im[idx1+k]:=ival+tmpIm;
  106.             Re[idx2+k]:=rval-tmpRe;
  107.             Im[idx2+k]:=ival-tmpIm;
  108.           end;
  109.         end;
  110.         tailStart:=(half div UnrollCount)*UnrollCount;
  111.       end
  112.       else tailStart:=0;
  113.       for j:=tailStart to half-1 do
  114.       begin
  115.         idx1:=idx+j;
  116.         idx2:=idx1+half;
  117.         wRe:=TwRe[stage-1][j];
  118.         wIm:=TwIm[stage-1][j];
  119.         tmpRe:=wRe*Re[idx2]-wIm*Im[idx2];
  120.         tmpIm:=wRe*Im[idx2]+wIm*Re[idx2];
  121.         rVal:=Re[idx1];
  122.         iVal:=Im[idx1];
  123.         Re[idx1]:=rVal+tmpRe;
  124.         Im[idx1]:=iVal+tmpim;
  125.         Re[idx2]:=rVal-tmpRe;
  126.         Im[idx2]:=iVal-tmpIm;
  127.       end;
  128.     end;
  129.   end;
  130. end;
  131.  
  132. procedure ScaleInverse(var Re,Im:array of TFFTFloat);
  133. var k,n:Integer;
  134.     tmp:TFFTFloat;
  135. begin
  136.   n:=Length(Re);
  137.   tmp:=1.0/n;
  138.   for k:=0 to n-1 do
  139.   begin
  140.     Re[k]:=Re[k]*tmp;
  141.     Im[k]:=Im[k]*tmp;
  142.   end;
  143. end;
  144.  
  145. begin
  146.   nPoints:=16384;
  147.   NumRuns:=100;
  148.   SetLength(Re,nPoints);
  149.   SetLength(Im,nPoints);
  150.   Randomize;
  151.   for run:=0 to nPoints-1 do
  152.   begin
  153.     Re[run]:=Sin(2*Pi*run/nPoints)+((Random(100)/100)-0.5);
  154.     Im[run]:=0.0;
  155.   end;
  156.   InitBitRevTable(nPoints);
  157.   InitTwiddles(nPoints,FFT_FORWARD);
  158.   InitTwiddles(nPoints,FFT_INVERSE);
  159.   // warm-up
  160.   FFT_InPlace(Re,Im);
  161.   ScaleInverse(Re,Im);
  162.   QueryPerformanceFrequency(freq);
  163.   nsForward:=0;
  164.   nsInverse:=0;
  165.   for run:=1 to NumRuns do
  166.   begin
  167.     QueryPerformanceCounter(t0);
  168.     FFT_InPlace(Re,Im); // forward
  169.     QueryPerformanceCounter(t1);
  170.     FFT_InPlace(Re,Im); // backward
  171.     ScaleInverse(Re,Im);
  172.     QueryPerformanceCounter(t2);
  173.     nsForward:=nsForward+((t1-t0)/freq)/nPoints*1e9;
  174.     nsInverse:=nsInverse+((t2-t1)/freq)/nPoints*1e9;
  175.   end;
  176.   Writeln(Format('%d-point FFT Forward (avg %d runs,auto-unroll,type=%s): %.1f ns/point',[nPoints,NumRuns,'Double',nsForward/NumRuns]));
  177.   Writeln(Format('%d-point FFT Inverse (avg %d runs,auto-unroll,type=%s): %.1f ns/point',[nPoints,NumRuns,'Double',nsInverse/NumRuns]));
  178.   Readln;
  179. end.
  180.  


Ill at the moment, cant move, a bit of time to spare, so twiddling better than being bored...
The best way to get accurate information on the forum is to post something wrong and wait for corrections.

marcov

  • Administrator
  • Hero Member
  • *
  • Posts: 12593
  • FPC developer.
Re: FFT performance
« Reply #47 on: October 21, 2025, 05:41:52 pm »
I tried to run that on my 5700x and I got

Quote
16384-point FFT Forward (avg 100 runs,auto-unroll,type=Double): 22,1 ns/point
16384-point FFT Inverse (avg 100 runs,auto-unroll,type=Double): 22,5 ns/point

Making it single and adding {$excessprecision off} didn't really change timings.

22ns/point is about 22ms/1024, which is about the same as my adaptation of nils haeck.
« Last Edit: October 21, 2025, 10:20:17 pm by marcov »

Thaddy

  • Hero Member
  • *****
  • Posts: 18676
  • Jungle wars. And failing health it seems.
Re: FFT performance
« Reply #48 on: October 21, 2025, 07:02:30 pm »
Nowadays, single might actually be slower than double in my experience.
Due to censorship, I changed this to "Nelly the Elephant". Keeps the message clear.

Josh

  • Hero Member
  • *****
  • Posts: 1447
Re: FFT performance
« Reply #49 on: October 21, 2025, 08:52:21 pm »
targetting 32bit i had noticeable difference, about 30%
the unroll feature is more usefull on larger dataset,
The best way to get accurate information on the forum is to post something wrong and wait for corrections.

marcov

  • Administrator
  • Hero Member
  • *
  • Posts: 12593
  • FPC developer.
Re: FFT performance
« Reply #50 on: October 21, 2025, 10:21:01 pm »
Nowadays, single might actually be slower than double in my experience.

Without $excessprecision, sure.

Curt Carpenter

  • Hero Member
  • *****
  • Posts: 698
Re: FFT performance
« Reply #51 on: October 22, 2025, 06:01:03 am »
Has anyone pursued an AI for an optimized general-purpose fft (any language)?   Seems like a really good research case.

jamie

  • Hero Member
  • *****
  • Posts: 7491
Re: FFT performance
« Reply #52 on: October 22, 2025, 11:55:55 am »
Has anyone pursued an AI for an optimized general-purpose fft (any language)?   Seems like a really good research case.

If you want bad and uncompliable code, much of it is pure jibberish.
« Last Edit: October 22, 2025, 11:57:44 am by jamie »
The only true wisdom is knowing you know nothing

Josh

  • Hero Member
  • *****
  • Posts: 1447
Re: FFT performance
« Reply #53 on: October 22, 2025, 12:21:58 pm »
IMO
Agree AI may generate workable code, the effort you have to put in to create safe code, and correct its errors, you may as well write it yourself. Plus you need to understand what it generates to debug it later.

The real problem with AI is the data is scraped from various sources, and what it gives you could be copyrighted or have licensing restrictions that does not match your needs. Even if you ask AI for the original source, it does not tell you.
You could end up being in a legal nightmare down the line, ignorance is not an excuse, blaming AI would be laughed out of court, same way as blaming SATNAV when your caught speeding, or drive off a cliff because it told you to..

It should only be used  for ideas or technical information, not direct coding.
« Last Edit: October 22, 2025, 12:25:43 pm by Josh »
The best way to get accurate information on the forum is to post something wrong and wait for corrections.

marcov

  • Administrator
  • Hero Member
  • *
  • Posts: 12593
  • FPC developer.
Re: FFT performance
« Reply #54 on: October 22, 2025, 01:01:07 pm »
Has anyone pursued an AI for an optimized general-purpose fft (any language)?   Seems like a really good research case.

The problem is that it might cite GPL fftw code.

Also the power of two case is too simple. Make sure you ask for non power of two.

Josh

  • Hero Member
  • *****
  • Posts: 1447
Re: FFT performance
« Reply #55 on: October 22, 2025, 09:18:41 pm »
Hi

Done more fiddling and testing,even tried unrolling in blocks, and others, but finished with this version

My results on me little Lappy Intel(R) Pentium(R) CPU 5405U @ 2.30GHz (2.30 GHz)
Quote
16384-point FFT Forward (avg 100 runs,default-unroll,type=Double): 89.9 ns/point
16384-point FFT Inverse (avg 100 runs,default-unroll,type=Double): 90.6 ns/point

Code: Pascal  [Select][+][-]
  1. program myfft;
  2. {$mode objfpc}
  3. uses
  4.   windows,sysutils,math;
  5.  
  6. type
  7.   TFFTFloat=double; // change to Single if desired
  8.  
  9. const
  10.   FFT_FORWARD=1;
  11.   FFT_INVERSE=-1;
  12.   MAX_POINTS=16384;
  13.   MAX_UNROLL=128;
  14.   DEFAULT_UNROLL_COUNT=8;
  15.  
  16. var
  17.   Re,Im: array of TFFTFloat;
  18.   BitRevTable: array of Cardinal;
  19.   TwRe,TwIm:array of array of TFFTFloat;
  20.   nPoints,NumRuns,run:Integer;
  21.   t0,t1,t2,freq:Int64;
  22.   nsForward,nsInverse:Double;
  23.   UnrollCountArray:Array[0..255] of Byte;
  24.  
  25. function BitReverse32(v:Cardinal;Bits:Integer):Cardinal;inline;
  26. begin
  27.   v:=((v and $55555555) shl 1) or ((v and $AAAAAAAA) shr 1);
  28.   v:=((v and $33333333) shl 2) or ((v and $CCCCCCCC) shr 2);
  29.   v:=((v and $0F0F0F0F) shl 4) or ((v and $F0F0F0F0) shr 4);
  30.   v:=((v and $00FF00FF) shl 8) or ((v and $FF00FF00) shr 8);
  31.   v:=((v and $0000FFFF) shl 16) or ((v and $FFFF0000) shr 16);
  32.   Result:=v shr (32-Bits);
  33. end;
  34.  
  35. procedure InitBitRevTable(nPoints:Integer);
  36. var i,Bits: Integer;
  37. begin
  38.   SetLength(BitRevTable,nPoints);
  39.   Bits:=Trunc(Log2(nPoints));
  40.   for i:=0 to nPoints-1 do BitRevTable[i]:=BitReverse32(i,Bits);
  41. end;
  42.  
  43. procedure InitTwiddles(nPoints:Integer;Direction:Integer);
  44. var stage,half,k,maxStage:Integer;
  45.     theta:TFFTFloat;
  46. begin
  47.   maxStage:=Trunc(Log2(nPoints));
  48.   SetLength(TwRe,maxStage);
  49.   SetLength(TwIm,maxStage);
  50.   for stage:=1 to maxStage do
  51.   begin
  52.     half:=1 shl (stage-1);
  53.     SetLength(TwRe[stage-1],half);
  54.     SetLength(TwIm[stage-1],half);
  55.     theta:=2*Pi/(2*half)*Direction;
  56.     for k:=0 to half-1 do SinCos(theta*k,TwIm[stage-1][k],TwRe[stage-1][k]);
  57.   end;
  58. end;
  59.  
  60. procedure FFT_InPlace(var Re,Im: array of TFFTFloat;AutoUnroll:Byte=0);
  61. var n,stage,maxStage,half,block,idx,idx1,idx2,j,k,tailStart,UnrollCount:Integer;
  62.     wRe,wIm,tmpRe,tmpIm,rVal,iVal,tR,tI:TFFTFloat;
  63. begin
  64.   n:=Length(Re);
  65.   if n<=1 then Exit;
  66.   for idx:=0 to n-1 do
  67.   begin
  68.     j:=BitRevTable[idx];
  69.     if idx<j then
  70.     begin
  71.       tmpRe:=Re[idx];
  72.       Re[idx]:=Re[j];
  73.       Re[j]:=tmpRe;
  74.       tmpRe:=Im[idx];
  75.       Im[idx]:=Im[j];
  76.       Im[j]:=tmpRe;
  77.     end;
  78.   end;
  79.   maxStage:=Trunc(Log2(n));
  80.   for stage:=1 to maxStage do
  81.   begin
  82.     half:=1 shl (stage-1);
  83.     // unroll only for large stags
  84.     if AutoUnroll=1 then
  85.     begin
  86.       UnrollCount:=1;
  87.       while (UnrollCount*2<=half) and (UnrollCount*2<=MAX_UNROLL) do UnrollCount:=UnrollCount*2;
  88.     end
  89.     else UnrollCount:=UnrollCountArray[AutoUnroll];
  90.     for block:=0 to (n div (2*half))-1 do
  91.     begin
  92.       idx:=block*2*half;
  93.       if half>=UnrollCount then
  94.       begin
  95.         // unroll
  96.         for j:=0 to (half div UnrollCount)-1 do
  97.         begin
  98.           idx1:=idx+j*UnrollCount;
  99.           idx2:=idx1+half;
  100.           for k:=0 to UnrollCount-1 do
  101.           begin
  102.             rVal:=Re[idx1+k];
  103.             iVal:=Im[idx1+k];
  104.             tR:=Re[idx2+k];
  105.             tI:=Im[idx2+k];
  106.             wRe:=TwRe[stage-1][j*UnrollCount+k];
  107.             wIm:=TwIm[stage-1][j*UnrollCount+k];
  108.             tmpRe:=wRe*tR-wim*ti;
  109.             tmpIm:=wRe*tI+wIm*tr;
  110.             Re[idx1+k]:=rval+tmpRe;
  111.             Im[idx1+k]:=ival+tmpIm;
  112.             Re[idx2+k]:=rval-tmpRe;
  113.             Im[idx2+k]:=ival-tmpIm;
  114.           end;
  115.         end;
  116.         tailStart:=(half div UnrollCount)*UnrollCount;
  117.       end
  118.       else tailStart:=0;
  119.       for j:=tailStart to half-1 do
  120.       begin
  121.         idx1:=idx+j;
  122.         idx2:=idx1+half;
  123.         wRe:=TwRe[stage-1][j];
  124.         wIm:=TwIm[stage-1][j];
  125.         tmpRe:=wRe*Re[idx2]-wIm*Im[idx2];
  126.         tmpIm:=wRe*Im[idx2]+wIm*Re[idx2];
  127.         rVal:=Re[idx1];
  128.         iVal:=Im[idx1];
  129.         Re[idx1]:=rVal+tmpRe;
  130.         Im[idx1]:=iVal+tmpim;
  131.         Re[idx2]:=rVal-tmpRe;
  132.         Im[idx2]:=iVal-tmpIm;
  133.       end;
  134.     end;
  135.   end;
  136. end;
  137.  
  138. procedure ScaleInverse(var Re,Im:array of TFFTFloat);
  139. var k,n:Integer;
  140.     tmp:TFFTFloat;
  141. begin
  142.   n:=Length(Re);
  143.   tmp:=1.0/n;
  144.   for k:=0 to n-1 do
  145.   begin
  146.     Re[k]:=Re[k]*tmp;
  147.     Im[k]:=Im[k]*tmp;
  148.   end;
  149. end;
  150.  
  151. begin
  152.   UnrollCountArray[0]:=DEFAULT_UNROLL_COUNT;
  153.   for run:=1 to 255 do
  154.   begin
  155.     if run >MAX_UNROLL then UnrollCountArray[run]:=byte(MAX_UNROLL);
  156.     UnrollCountArray[run]:=byte(((run+1) div 2)*2);
  157.   end;
  158.   nPoints:=16384;
  159.   NumRuns:=100;
  160.   SetLength(Re,nPoints);
  161.   SetLength(Im,nPoints);
  162.   Randomize;
  163.   for run:=0 to nPoints-1 do
  164.   begin
  165.     Re[run]:=Sin(2*Pi*run/nPoints)+((Random(100)/100)-0.5);
  166.     Im[run]:=0.0;
  167.   end;
  168.   InitBitRevTable(nPoints);
  169.   InitTwiddles(nPoints,FFT_FORWARD);
  170.   InitTwiddles(nPoints,FFT_INVERSE);
  171.   // warm-up
  172.   FFT_InPlace(Re,Im,0);
  173.   ScaleInverse(Re,Im);
  174.   QueryPerformanceFrequency(freq);
  175.   nsForward:=0;
  176.   nsInverse:=0;
  177.   for run:=1 to NumRuns do
  178.   begin
  179.     QueryPerformanceCounter(t0);
  180.     // 0=unroll count to DEFAULT_UNROLL_COUNT, 1=auto calc, 2-128 fix unroll count to a value
  181.     FFT_InPlace(Re,Im,0); // forward
  182.     QueryPerformanceCounter(t1);
  183.     FFT_InPlace(Re,Im,0); // backward
  184.     ScaleInverse(Re,Im);
  185.     QueryPerformanceCounter(t2);
  186.     nsForward:=nsForward+((t1-t0)/freq)/nPoints*1e9;
  187.     nsInverse:=nsInverse+((t2-t1)/freq)/nPoints*1e9;
  188.   end;
  189.   Writeln(Format('%d-point FFT Forward (avg %d runs,default-unroll,type=%s): %.1f ns/point',[nPoints,NumRuns,'Double',nsForward/NumRuns]));
  190.   Writeln(Format('%d-point FFT Inverse (avg %d runs,default-unroll,type=%s): %.1f ns/point',[nPoints,NumRuns,'Double',nsInverse/NumRuns]));
  191.   Readln;
  192. end.
  193.  
The best way to get accurate information on the forum is to post something wrong and wait for corrections.

Curt Carpenter

  • Hero Member
  • *****
  • Posts: 698
Re: FFT performance
« Reply #56 on: October 22, 2025, 10:53:56 pm »
Has anyone pursued an AI for an optimized general-purpose fft (any language)?   Seems like a really good research case.

If you want bad and uncompliable code, much of it is pure jibberish.

I should have been more precise.  Has anyone published any serious research on applying AI to fft optimization insofar as it is a well-formed problem?

MathMan

  • Sr. Member
  • ****
  • Posts: 466
Re: FFT performance
« Reply #57 on: October 23, 2025, 12:10:07 am »
Has anyone pursued an AI for an optimized general-purpose fft (any language)?   Seems like a really good research case.

If you want bad and uncompliable code, much of it is pure jibberish.

I should have been more precise.  Has anyone published any serious research on applying AI to fft optimization insofar as it is a well-formed problem?

Not as far as i know. My view on the matters is a bit geared towards Number Theoretic Transforms (NTT - the pure integer variant of FFT, which is used in arbitrary precision multiplications and crypto), but I also read a fair amount on real/complex FFT. Haven't come across a publication that was based on AI assisted research.

jamie

  • Hero Member
  • *****
  • Posts: 7491
Re: FFT performance
« Reply #58 on: October 23, 2025, 12:39:11 am »
no optimizer turned on.
Code: Pascal  [Select][+][-]
  1. unit Unit1;
  2.  
  3. {$mode objfpc}{$H+}
  4.  
  5. interface
  6.  
  7. uses
  8.   Classes, SysUtils, Forms, Controls, Graphics, Dialogs, StdCtrls;
  9.  
  10. type
  11.  
  12.   { TForm1 }
  13.  
  14.   TForm1 = class(TForm)
  15.     Button1: TButton;
  16.     procedure Button1Click(Sender: TObject);
  17.   private
  18.  
  19.   public
  20.  
  21.   end;
  22.  
  23. var
  24.   Form1: TForm1;
  25.  
  26. implementation
  27.  
  28. {$R *.lfm}
  29. const BitReverseTable256:Array[0..255] of Byte =
  30. (
  31.   $00, $80, $40, $C0, $20, $A0, $60, $E0, $10, $90, $50, $D0, $30, $B0, $70, $F0,
  32.   $08, $88, $48, $C8, $28, $A8, $68, $E8, $18, $98, $58, $D8, $38, $B8, $78, $F8,
  33.   $04, $84, $44, $C4, $24, $A4, $64, $E4, $14, $94, $54, $D4, $34, $B4, $74, $F4,
  34.   $0C, $8C, $4C, $CC, $2C, $AC, $6C, $EC, $1C, $9C, $5C, $DC, $3C, $BC, $7C, $FC,
  35.   $02, $82, $42, $C2, $22, $A2, $62, $E2, $12, $92, $52, $D2, $32, $B2, $72, $F2,
  36.   $0A, $8A, $4A, $CA, $2A, $AA, $6A, $EA, $1A, $9A, $5A, $DA, $3A, $BA, $7A, $FA,
  37.   $06, $86, $46, $C6, $26, $A6, $66, $E6, $16, $96, $56, $D6, $36, $B6, $76, $F6,
  38.   $0E, $8E, $4E, $CE, $2E, $AE, $6E, $EE, $1E, $9E, $5E, $DE, $3E, $BE, $7E, $FE,
  39.   $01, $81, $41, $C1, $21, $A1, $61, $E1, $11, $91, $51, $D1, $31, $B1, $71, $F1,
  40.   $09, $89, $49, $C9, $29, $A9, $69, $E9, $19, $99, $59, $D9, $39, $B9, $79, $F9,
  41.   $05, $85, $45, $C5, $25, $A5, $65, $E5, $15, $95, $55, $D5, $35, $B5, $75, $F5,
  42.   $0D, $8D, $4D, $CD, $2D, $AD, $6D, $ED, $1D, $9D, $5D, $DD, $3D, $BD, $7D, $FD,
  43.   $03, $83, $43, $C3, $23, $A3, $63, $E3, $13, $93, $53, $D3, $33, $B3, $73, $F3,
  44.   $0B, $8B, $4B, $CB, $2B, $AB, $6B, $EB, $1B, $9B, $5B, $DB, $3B, $BB, $7B, $FB,
  45.   $07, $87, $47, $C7, $27, $A7, $67, $E7, $17, $97, $57, $D7, $37, $B7, $77, $F7,
  46.   $0F, $8F, $4F, $CF, $2F, $AF, $6F, $EF, $1F, $9F, $5F, $DF, $3F, $BF, $7F, $FF
  47. );
  48. Procedure Reverse32Bits(Var aValue:Cardinal); Inline;
  49. Var
  50.   Q:Array[0..3] of Byte absolute aValue;
  51.   T:Cardinal;
  52.   P:Array[0..3] of Byte absolute T;
  53. Begin
  54.    T :=AValue;
  55.    Q[3] := BitReverseTable256[P[0]];
  56.    Q[2] := BitReverseTable256[P[1]];
  57.    Q[1] := BitReverseTable256[P[2]];
  58.    Q[0] := BitReverseTable256[P[3]];
  59. end;
  60.  
  61. procedure TForm1.Button1Click(Sender: TObject);
  62. var
  63.   A:DWord;
  64. begin
  65.  A:= 1;
  66.  Reverse32Bits(A);
  67.  Caption := A.ToHexString;
  68. end;
  69.  
  70. end.
  71.  

reverse 32 bits.
The only true wisdom is knowing you know nothing

Curt Carpenter

  • Hero Member
  • *****
  • Posts: 698
Re: FFT performance
« Reply #59 on: October 23, 2025, 05:22:35 am »
Not as far as i know. My view on the matters is a bit geared towards Number Theoretic Transforms (NTT - the pure integer variant of FFT, which is used in arbitrary precision multiplications and crypto), but I also read a fair amount on real/complex FFT. Haven't come across a publication that was based on AI assisted research.

Thanks.  Never encountered NTT, but I've been very out of touch for many years now.  Given the importance of FFT though, it sure seems like a good subject for the AI-in-mathematics people. 

 

TinyPortal © 2005-2018