* * *

Author Topic: another fpc vs gcc benchmark  (Read 467 times)

fcu

  • New member
  • *
  • Posts: 7
another fpc vs gcc benchmark
« on: September 13, 2017, 06:22:53 pm »
Hi
this is just for fun . all i want to know is where is the slowness is coming from ?
when i translate the c to pas i thought the speed would be the same , because there is no much calculation  but
ok the result on my machine (win7 32bits , intel g2020 )
fpc : 140 fps
gcc : 335 fps

pascal : compiled with -O3
Code: Pascal  [Select]
  1. program demo;
  2. {$ifdef FPC}{$mode objfpc}{$h+}{$endif}
  3. {$ifdef mswindows}{$apptype gui}{$endif}
  4. uses  sdl,math;
  5.        
  6. const
  7.   s_w = 1024;
  8.   s_h = 640;
  9.  
  10. var
  11.   event : TSDL_Event;
  12.   done : boolean = false;
  13.   screen : PSDL_Surface;  
  14.   myLut  : array[0..s_w*s_h-1] of longword;
  15.   myTex  : array[0..256*256-1  ] of longword;
  16.   count,tickf,tickl,fps : longword;
  17.   dif : single;
  18.  
  19. function inttostr(v : integer): ansistring; inline;
  20. begin
  21.   str(v,result);
  22. end;
  23.  
  24. procedure initLut();
  25. var i,j : longint;
  26.     x,y,w,r,a,u,v : single;
  27.     iu,iv,iw : int32;
  28. begin
  29.         for j:=0 to s_h -1 do
  30.     for i:=0 to s_w -1 do
  31.     begin
  32.         x := -1.0 + i*(2.0/s_w);
  33.         y :=  1.0 - j*(2.0/s_h);
  34.         r := sqrt( x*x+y*y );
  35.         a := arctan2( y, x );
  36.  
  37.         u := 1.0/r;
  38.         v := a*(3.0/3.14159);
  39.         w := r*r;
  40.         if( w>1.0  ) then w := 1.0;
  41.  
  42.         iu := round(u*255.0);
  43.         iv := round(v*255.0);
  44.         iw := round(w*255.0);
  45.  
  46.         myLut[s_w*j+i] := ((iw and 255)<<16) or ((iv and 255)<<8) or (iu and 255);
  47.     end;
  48. end;
  49.  
  50. procedure initTexture();
  51. var i,j,r,g : integer;
  52. begin
  53.         for j:=0 to 256-1 do
  54.     for i:=0 to 256-1 do
  55.     begin
  56.         r := (i xor j);
  57.         g := (((i>>6) and 1) xor ((j>>6) and 1))*255;
  58.         g := (g*5 + 3*r)>>3;
  59.         myTex[256*j+i] := $ff000000 or (g<<16) or (g<<8) or g;
  60.     end;
  61. end;
  62.  
  63. procedure intro_init();
  64. begin
  65.         initTexture();
  66.     initLut();
  67. end;
  68.  
  69. function colScale( col : longword;sca : integer ):longword;
  70. begin
  71.     result := ((((col and $00ff00ff)*sca)>>8) and $00ff00ff) +
  72.               ((((col and $0000ff00)*sca)>>8) and $0000ff00);
  73. end;
  74.  
  75. procedure do_intro( buffer : plongword; itime : longint );
  76. var i , val, col : integer;
  77. begin
  78.     for i:=0 to s_w*s_h-1 do
  79.     begin
  80.         val := myLut[i];
  81.             col := myTex[ ((val and $0000ffff)+(itime>>2)) and $0000ffff ];
  82.         buffer[i] := colScale( col, (val>>16) );
  83.     end;
  84. end;
  85.  
  86. begin
  87.        
  88.         SDL_Init(SDL_INIT_VIDEO);
  89.         screen := SDL_SetVideoMode( s_w, s_h, 32 , SDL_SWSURFACE );
  90.         intro_init();
  91.         tickf := SDL_Getticks();
  92.        
  93.     while not done  do
  94.         begin
  95.       dif := (tickl-tickf) / 1000;
  96.       if (dif >= 1.0 ) then
  97.       begin
  98.         tickf := tickl;
  99.         fps := count;
  100.         count := 0;
  101.             SDL_WM_SetCaption(pchar(inttostr(fps)+' fps'),nil);
  102.       end
  103.       else
  104.       begin
  105.         inc(count);
  106.       end;     
  107.           while  SDL_PollEvent( @event ) > 0  do
  108.             case event.type_  of
  109.             SDL_KEYDOWN:
  110.                 if ( event.key.keysym.sym = SDLK_ESCAPE ) then
  111.                 begin
  112.                   done := true;
  113.                 end;
  114.                 SDL_QUITEV:
  115.                 begin
  116.                   done := true;
  117.                 end;   
  118.           end;
  119.           do_intro(screen^.pixels,SDL_Getticks());
  120.           tickl := SDL_GetTicks();
  121.           SDL_Flip(screen);
  122.         end;           
  123.  
  124.         SDL_FreeSurface(screen);
  125. end.
  126.  
  127.  

gcc : also compiled with -O3

Code: C  [Select]
  1. #include <sdl.h>
  2. #include <math.h>
  3.  
  4. const int s_w = 1024;
  5. const int s_h = 640;
  6.  
  7.  
  8. static unsigned int myLut[s_w*s_h];
  9. static unsigned int myTex[256*256];
  10.  
  11. void initLut( void )
  12. {
  13.         for( int j=0; j<s_h; j++ )
  14.     for( int i=0; i<s_w; i++ )
  15.     {
  16.         const float x = -1.0f + (float)i*(2.0f/(float)s_w);
  17.         const float y =  1.0f - (float)j*(2.0f/(float)s_h);
  18.         const float r = sqrt( x*x+y*y );
  19.         const float a = atan2( y, x );
  20.  
  21.         const float u = 1.0f/r;
  22.         const float v = a*(3.0f/3.14159f);
  23.               float w = r*r;
  24.         if( w>1.0f ) w=1.0f;
  25.  
  26.         const int iu = (int)(u*255.0f);
  27.         const int iv = (int)(v*255.0f);
  28.         const int iw = (int)(w*255.0f);
  29.  
  30.         myLut[s_w*j+i] = ((iw&255)<<16) | ((iv&255)<<8) | (iu&255);
  31.     }
  32. }
  33.  
  34.  
  35. void initTexture( void )
  36. {
  37.         for( int j=0; j<256; j++ )
  38.     for( int i=0; i<256; i++ )
  39.     {
  40.         int r = (i ^ j);
  41.         int g = (((i>>6)&1)^((j>>6)&1))*255;
  42.         g = (g*5 + 3*r)>>3;
  43.         myTex[256*j+i] = 0xff000000 | (g<<16) | (g<<8) | g;
  44.     }
  45. }
  46. void intro_init( void )
  47. {
  48.         initTexture();
  49.     initLut();
  50. }
  51.  
  52.  
  53. unsigned int colScale( unsigned int col, int sca )
  54. {
  55.     return ((((col&0x00ff00ff)*sca)>>8)&0x00ff00ff) + ((((col&0x0000ff00)*sca)>>8)&0x0000ff00);
  56.  
  57. }
  58. void do_intro( unsigned int *buffer, long itime )
  59. {
  60.     for( int i=0; i<s_w*s_h; i++ )
  61.     {
  62.         const unsigned int val = myLut[i];
  63.             const unsigned int col = myTex[ ((val&0x0000ffff)+(itime>>2))&0x0000ffff ];
  64.         buffer[i] = colScale( col, (val>>16) );
  65.     }
  66. }
  67.  
  68. int main ( int argc, char** argv )
  69. {
  70.     unsigned int tickf,tickl = 0,fps,count = 0;
  71.     float dif;
  72.     char buf[32];
  73.     SDL_Event event;
  74.  
  75.  
  76.     SDL_Init( SDL_INIT_VIDEO );
  77.     atexit(SDL_Quit);
  78.  
  79.     SDL_Surface* screen = SDL_SetVideoMode(s_w, s_h, 32, SDL_SWSURFACE );
  80.     intro_init();
  81.  
  82.     bool done = false;
  83.     tickf = SDL_GetTicks();
  84.     while (!done)
  85.     {
  86.      dif = (tickl-tickf) / 1000;
  87.      if (dif >= 1.0f ){
  88.         tickf = tickl;
  89.         fps = count;
  90.         count = 0;
  91.         sprintf(buf,"%d fps",fps);
  92.         SDL_WM_SetCaption(buf,NULL);
  93.      } else {
  94.        count++;
  95.      };
  96.       while (SDL_PollEvent(&event))
  97.       {
  98.         switch (event.type)
  99.         {
  100.           case SDL_QUIT:
  101.             done = true;
  102.             break;
  103.           case SDL_KEYDOWN:
  104.             if ( event.key.keysym.sym == SDLK_ESCAPE ){
  105.               done = true;
  106.             }
  107.           break;
  108.         }
  109.       }
  110.  
  111.      do_intro((unsigned int *)screen->pixels, SDL_GetTicks());
  112.      SDL_Flip(screen);
  113.      tickl = SDL_GetTicks();
  114.     }
  115.  
  116.     SDL_FreeSurface(screen);
  117.     return 0;
  118. }
  119.  
  120.  


Nitorami

  • Full Member
  • ***
  • Posts: 227
Re: another fpc vs gcc benchmark
« Reply #1 on: September 13, 2017, 07:03:04 pm »
The bottleneck seems to be in your function colScale. I get a factor 2 speed increase when exchanging all occurences of longword in the code by longint.

This may be a penalty of mixing signed and unsigned types; AFAIK pascal is stricter than C here. 

fcu

  • New member
  • *
  • Posts: 7
Re: another fpc vs gcc benchmark
« Reply #2 on: September 13, 2017, 07:12:32 pm »
yes thats true , thanks
also by inline colScale function i get the same speed now .
thanks again

 

Recent

Get Lazarus at SourceForge.net. Fast, secure and Free Open Source software downloads Open Hub project report for Lazarus