Recent

Author Topic: [SOLVED] Same logic but big diff (Arduino Uno R3)  (Read 2217 times)

julkas

  • Guest
[SOLVED] Same logic but big diff (Arduino Uno R3)
« on: September 24, 2019, 09:59:52 am »
I have created following simple sketch in Arduino IDE 1.8.10 -
Code: C  [Select][+][-]
  1. void setup() {
  2.   pinMode(LED_BUILTIN, OUTPUT);
  3. }
  4.  
  5. int dv = 0;  
  6. void zdelay() {
  7.   long int i;
  8.   for(i = 0; i < 4000000L; i++) {
  9.     dv = dv * dv;
  10.   }  
  11. }
  12.  
  13. void loop() {
  14.   digitalWrite(LED_BUILTIN, HIGH);
  15.   zdelay();  
  16.   digitalWrite(LED_BUILTIN, LOW);
  17.   zdelay();
  18. }
  19.  
Avrdude result -
Code: Text  [Select][+][-]
  1. avrdude: verifying ...
  2. avrdude: 792 bytes of flash verified
  3.  
  4. avrdude done.  Thank you.
zdelay C function delays execution ~ 3s

Next created Pascal Blinky.lpr -
Code: Pascal  [Select][+][-]
  1. program Blinky;
  2.  
  3. var
  4.   dv: Integer = 0;
  5.  
  6. procedure zdelay;
  7. var
  8.   i: LongInt;
  9. begin
  10.   for i := 1 to 4000000 do
  11.     dv := dv * dv;
  12. end;
  13.  
  14. const
  15.   PB5 = 1 shl 5; // On Port B, Bit 5, internal LED is connected on Arduino Uno.
  16.  
  17. begin
  18.   // Set internal LED to output.
  19.   DDRB := DDRB or PB5;
  20.   while True do
  21.   begin
  22.     // Turn ON internal LED.
  23.     PORTB := PORTB or PB5;
  24.     zdelay;
  25.     // Turn OFF internal LED.
  26.     PORTB := PORTB and (not PB5);
  27.     zdelay;
  28.   end;
  29. end.

Output from cross-compiler and avrdude -
Code: Text  [Select][+][-]
  1. $ ./ppcrossavr.exe -Tembedded -Cpavr5 -Wpatmega328p -Xs -O2 -l -vewhi -a Blinky.lpr
  2. Free Pascal Compiler version 3.3.1 [2019/09/22] for avr
  3. Copyright (c) 1993-2019 by Florian Klaempfl and others
  4. Target OS: Embedded
  5. Compiling Blinky.lpr
  6. Assembling blinky
  7. Linking Blinky
  8. 30 lines compiled, 0.1 sec, 274 bytes code, 2 bytes data
  9.  
  10. $ /E/Arduino/hardware/tools/avr/bin/avrdude.exe -C/E/Arduino/hardware/tools/avr/etc/avrdude.conf -v -patmega328p -carduino -PCOM5 -b115200 -D -Uflash:w:Blinky.hex:i
  11.  
  12. avrdude.exe: Version 6.3-20190619
  13.              Copyright (c) 2000-2005 Brian Dean, http://www.bdmicro.com/
  14.              Copyright (c) 2007-2014 Joerg Wunsch
  15.  
  16.              System wide configuration file is "E:/Arduino/hardware/tools/avr/etc/avrdude.conf"
  17.  
  18.              Using Port                    : COM5
  19.              Using Programmer              : arduino
  20.              Overriding Baud Rate          : 115200
  21.              AVR Part                      : ATmega328P
  22.              Chip Erase delay              : 9000 us
  23.              PAGEL                         : PD7
  24.              BS2                           : PC2
  25.              RESET disposition             : dedicated
  26.              RETRY pulse                   : SCK
  27.              serial program mode           : yes
  28.              parallel program mode         : yes
  29.              Timeout                       : 200
  30.              StabDelay                     : 100
  31.              CmdexeDelay                   : 25
  32.              SyncLoops                     : 32
  33.              ByteDelay                     : 0
  34.              PollIndex                     : 3
  35.              PollValue                     : 0x53
  36.              Memory Detail                 :
  37.  
  38.                                       Block Poll               Page                       Polled
  39.                Memory Type Mode Delay Size  Indx Paged  Size   Size #Pages MinW  MaxW   ReadBack
  40.                ----------- ---- ----- ----- ---- ------ ------ ---- ------ ----- ----- ---------
  41.                eeprom        65    20     4    0 no       1024    4      0  3600  3600 0xff 0xff
  42.                flash         65     6   128    0 yes     32768  128    256  4500  4500 0xff 0xff
  43.                lfuse          0     0     0    0 no          1    0      0  4500  4500 0x00 0x00
  44.                hfuse          0     0     0    0 no          1    0      0  4500  4500 0x00 0x00
  45.                efuse          0     0     0    0 no          1    0      0  4500  4500 0x00 0x00
  46.                lock           0     0     0    0 no          1    0      0  4500  4500 0x00 0x00
  47.                calibration    0     0     0    0 no          1    0      0     0     0 0x00 0x00
  48.                signature      0     0     0    0 no          3    0      0     0     0 0x00 0x00
  49.  
  50.              Programmer Type : Arduino
  51.              Description     : Arduino
  52.              Hardware Version: 3
  53.              Firmware Version: 4.4
  54.              Vtarget         : 0.3 V
  55.              Varef           : 0.3 V
  56.              Oscillator      : 28.800 kHz
  57.              SCK period      : 3.3 us
  58.  
  59. avrdude.exe: AVR device initialized and ready to accept instructions
  60.  
  61. Reading | ################################################## | 100% 0.00s
  62.  
  63. avrdude.exe: Device signature = 0x1e950f (probably m328p)
  64. avrdude.exe: reading input file "Blinky.hex"
  65. avrdude.exe: writing flash (276 bytes):
  66.  
  67. Writing | ################################################## | 100% 0.06s
  68.  
  69. avrdude.exe: 276 bytes of flash written
  70. avrdude.exe: verifying flash memory against Blinky.hex:
  71. avrdude.exe: load data flash data from input file Blinky.hex:
  72. avrdude.exe: input file Blinky.hex contains 276 bytes
  73. avrdude.exe: reading on-chip flash data:
  74.  
  75. Reading | ################################################## | 100% 0.05s
  76.  
  77. avrdude.exe: verifying ...
  78. avrdude.exe: 276 bytes of flash verified
  79.  
  80. avrdude.exe done.  Thank you.
  81.  

zdelay Pascal function delays execution ~ 9s

Who can explain this ?
« Last Edit: September 24, 2019, 11:37:03 am by julkas »

marcov

  • Administrator
  • Hero Member
  • *
  • Posts: 12107
  • FPC developer.
Re: Same logic but big diff (Arduino Uno R3)
« Reply #1 on: September 24, 2019, 10:08:17 am »
It would require checking assembly to figure it out, but probably FPC either does a more expensive (more bits) divide, or accesses global DV each time.

While gcc probably updates once at the end, since it is not volatile.

IOW your code probably relies on assumptions of optimizations.

julkas

  • Guest
Re: Same logic but big diff (Arduino Uno R3)
« Reply #2 on: September 24, 2019, 10:17:22 am »
Pascal assembly -
Code: ASM  [Select][+][-]
  1.         .file "Blinky.lpr"
  2. # Begin asmlist al_procedures
  3.  
  4. .section .text.n_psblinky_ss_zdelay
  5. .globl  PsBLINKY_ss_ZDELAY
  6. PsBLINKY_ss_ZDELAY:
  7. .Lc2:
  8.         push    r3
  9.         push    r2
  10.         mov     r18,r1
  11.         mov     r19,r1
  12.         mov     r20,r1
  13.         mov     r21,r1
  14. .Lj5:
  15.         movw    r22,r18
  16.         movw    r24,r20
  17.         ldi     r26,1
  18.         add     r22,r26
  19.         adc     r23,r1
  20.         adc     r24,r1
  21.         adc     r25,r1
  22.         movw    r18,r22
  23.         movw    r20,r24
  24.         lds     r2,(TC_sPsBLINKY_ss_DV)
  25.         lds     r3,(TC_sPsBLINKY_ss_DV+1)
  26.         lds     r22,(TC_sPsBLINKY_ss_DV)
  27.         lds     r23,(TC_sPsBLINKY_ss_DV+1)
  28.         movw    r24,r22
  29.         mul     r2,r24
  30.         movw    r22,r0
  31.         mul     r25,r2
  32.         add     r23,r0
  33.         mul     r24,r3
  34.         add     r23,r0
  35.         clr     r1
  36.         sts     (TC_sPsBLINKY_ss_DV),r22
  37.         sts     (TC_sPsBLINKY_ss_DV+1),r23
  38.         cp      r18,r1
  39.         ldi     r22,9
  40.         cpc     r19,r22
  41.         ldi     r22,61
  42.         cpc     r20,r22
  43.         cpc     r21,r1
  44.         brge    .Lj8
  45.         rjmp    .Lj5
  46. .Lj8:
  47.         pop     r2
  48.         pop     r3
  49.         ret
  50. .Lc1:
  51. .Le0:
  52.         .size   PsBLINKY_ss_ZDELAY, .Le0 - PsBLINKY_ss_ZDELAY
  53.  
  54. .section .text.n_main
  55. .globl  main
  56. main:
  57. .globl  PASCALMAIN
  58. PASCALMAIN:
  59. .Lc4:
  60. .Lc5:
  61.         call    FPC_INIT_FUNC_TABLE
  62.         sbi     4,5
  63. .Lj9:
  64.         sbi     5,5
  65.         call    PsBLINKY_ss_ZDELAY
  66.         cbi     5,5
  67.         call    PsBLINKY_ss_ZDELAY
  68.         rjmp    .Lj9
  69. .Lc3:
  70. .Le1:
  71.         .size   main, .Le1 - main
  72.  
  73. .section .text.n_FPC_INIT_FUNC_TABLE
  74. .globl  FPC_INIT_FUNC_TABLE
  75. FPC_INIT_FUNC_TABLE:
  76.         ret
  77.  
  78. .section .text.n_FPC_FINALIZE_FUNC_TABLE
  79. .globl  FPC_FINALIZE_FUNC_TABLE
  80. FPC_FINALIZE_FUNC_TABLE:
  81.         ret
  82. # End asmlist al_procedures
  83. # Begin asmlist al_globals
  84.  
  85. .section .data.n_INITFINAL
  86.         .balign 2
  87. .globl  INITFINAL
  88. INITFINAL:
  89.         .byte   0,0
  90. .Le2:
  91.         .size   INITFINAL, .Le2 - INITFINAL
  92.  
  93. .section .data.n_FPC_THREADVARTABLES
  94.         .balign 2
  95. .globl  FPC_THREADVARTABLES
  96. FPC_THREADVARTABLES:
  97.         .long   0
  98. .Le3:
  99.         .size   FPC_THREADVARTABLES, .Le3 - FPC_THREADVARTABLES
  100.  
  101. .section .data.n_FPC_RESOURCESTRINGTABLES
  102.         .balign 2
  103. .globl  FPC_RESOURCESTRINGTABLES
  104. FPC_RESOURCESTRINGTABLES:
  105.         .short  0
  106. .Le4:
  107.         .size   FPC_RESOURCESTRINGTABLES, .Le4 - FPC_RESOURCESTRINGTABLES
  108.  
  109. .section .data.n_FPC_WIDEINITTABLES
  110.         .balign 2
  111. .globl  FPC_WIDEINITTABLES
  112. FPC_WIDEINITTABLES:
  113.         .short  0
  114. .Le5:
  115.         .size   FPC_WIDEINITTABLES, .Le5 - FPC_WIDEINITTABLES
  116.  
  117. .section .data.n_FPC_RESSTRINITTABLES
  118.         .balign 2
  119. .globl  FPC_RESSTRINITTABLES
  120. FPC_RESSTRINITTABLES:
  121.         .short  0
  122. .Le6:
  123.         .size   FPC_RESSTRINITTABLES, .Le6 - FPC_RESSTRINITTABLES
  124.  
  125. .section .fpc.n_version
  126. __fpc_ident:
  127.         .ascii  "FPC 3.3.1 [2019/09/22] for avr - embedded"
  128. .Le7:
  129.         .size   __fpc_ident, .Le7 - __fpc_ident
  130.  
  131. .section .data.n___stklen
  132.         .balign 2
  133. .globl  __stklen
  134. __stklen:
  135.         .short  1024
  136. .Le8:
  137.         .size   __stklen, .Le8 - __stklen
  138.  
  139. .section .data.n___heapsize
  140.         .balign 2
  141. .globl  __heapsize
  142. __heapsize:
  143.         .short  128
  144. .Le9:
  145.         .size   __heapsize, .Le9 - __heapsize
  146.  
  147. .section .bss.n___fpc_initialheap
  148.         .globl __fpc_initialheap
  149.         .size __fpc_initialheap,128
  150. __fpc_initialheap:
  151.         .zero 128
  152.  
  153. .section .data.n___fpc_valgrind
  154.         .balign 2
  155. .globl  __fpc_valgrind
  156. __fpc_valgrind:
  157.         .byte   0
  158. .Le10:
  159.         .size   __fpc_valgrind, .Le10 - __fpc_valgrind
  160. # End asmlist al_globals
  161. # Begin asmlist al_typedconsts
  162.  
  163. .section .data.n_TC_sPsBLINKY_ss_DV
  164. TC_sPsBLINKY_ss_DV:
  165.         .short  0
  166. .Le11:
  167.         .size   TC_sPsBLINKY_ss_DV, .Le11 - TC_sPsBLINKY_ss_DV
  168. # End asmlist al_typedconsts
  169. # Begin asmlist al_dwarf_frame
  170.  
  171. .section .debug_frame
  172. .Lc6:
  173.         .long   .Lc8-.Lc7
  174. .Lc7:
  175.         .long   -1
  176.         .byte   1
  177.         .byte   0
  178.         .uleb128        1
  179.         .sleb128        -4
  180.         .byte   24
  181.         .byte   12
  182.         .uleb128        13
  183.         .uleb128        1
  184.         .byte   5
  185.         .uleb128        24
  186.         .uleb128        0
  187.         .balign 4,0
  188. .Lc8:
  189.         .long   .Lc10-.Lc9
  190. .Lc9:
  191.         .short  .Lc6
  192.         .short  .Lc2
  193.         .short  .Lc1-.Lc2
  194.         .balign 4,0
  195. .Lc10:
  196.         .long   .Lc13-.Lc12
  197. .Lc12:
  198.         .short  .Lc6
  199.         .short  .Lc4
  200.         .short  .Lc3-.Lc4
  201.         .byte   4
  202.         .long   .Lc5-.Lc4
  203.         .byte   7
  204.         .uleb128        24
  205.         .balign 4,0
  206. .Lc13:
  207. # End asmlist al_dwarf_frame

Laksen

  • Hero Member
  • *****
  • Posts: 794
    • J-Software
Re: Same logic but big diff (Arduino Uno R3)
« Reply #3 on: September 24, 2019, 10:30:05 am »
Try to declare dv volatile in the c code as Marco suggests

For timing you should always write your code as assembly or use a timer to be entirely sure you get the behavior you want

ccrause

  • Hero Member
  • *****
  • Posts: 1007
Re: Same logic but big diff (Arduino Uno R3)
« Reply #4 on: September 24, 2019, 10:39:08 am »
In my opinion writing high level language busy loops does not result in deterministic behaviour.  Depending on the optimization setting the number of low level instructions generated could vary, leading to varying delays. As you have noticed, this is particularly evident when comparing FPC with GCC.

One of the factors affecting performance in this case is that FPC always loads and saves the value of dv from memory in each iteration, while GCC can keep the value in registers.

To make deterministic delays using assembler see e.g. the wiki example Wait or this unit.

Edit: I see I am a bit slow typing out my reply...
« Last Edit: September 24, 2019, 10:40:47 am by ccrause »

julkas

  • Guest
Re: Same logic but big diff (Arduino Uno R3)
« Reply #5 on: September 24, 2019, 10:39:19 am »
Try to declare dv volatile in the c code as Marco suggests

For timing you should always write your code as assembly or use a timer to be entirely sure you get the behavior you want
Thanks. But I don't want here precise timing. I compare two same simple for loops. C code ~ 3s, Pascal ~ 9s. Why?

ccrause

  • Hero Member
  • *****
  • Posts: 1007
Re: Same logic but big diff (Arduino Uno R3)
« Reply #6 on: September 24, 2019, 10:43:25 am »
Thanks. But I don't want here precise timing. I compare two same simple for loops. C code ~ 3s, Pascal ~ 9s. Why?
Well, you got imprecise results so no problem then.

marcov

  • Administrator
  • Hero Member
  • *
  • Posts: 12107
  • FPC developer.
Re: Same logic but big diff (Arduino Uno R3)
« Reply #7 on: September 24, 2019, 10:50:24 am »
Thanks. But I don't want here precise timing. I compare two same simple for loops. C code ~ 3s, Pascal ~ 9s. Why?

Since the loop is supposed to be about a few (or even one, the divide) instruction, any added instructions are very noticeable. As you figured out.

Note this is not guaranteed for C also, e.g. if you switch compilers or tweak optimization info. (which you don't even provide for the C case)


julkas

  • Guest
Re: Same logic but big diff (Arduino Uno R3)
« Reply #8 on: September 24, 2019, 11:06:30 am »
C compiler switches -
"E:\\Arduino\\hardware\\tools\\avr/bin/avr-g++" -c -g -Os -w -std=gnu++11 -fpermissive -fno-exceptions -ffunction-sections -fdata-sections -fno-threadsafe-statics -Wno-error=narrowing -flto -w -x c++ -E -CC -mmcu=atmega328p -DF_CPU=16000000L -DARDUINO=10810 -DARDUINO_AVR_UNO -DARDUINO_ARCH_AVR


C zdelay code assembly -
Code: ASM  [Select][+][-]
  1.  
  2. BlinkyC.ino.elf:     file format elf32-avr
  3. ...
  4.  
  5. int dv = 0;  
  6. void zdelay() {
  7.  170:   80 91 00 01     lds     r24, 0x0100     ; 0x800100 <_edata>
  8.  174:   90 91 01 01     lds     r25, 0x0101     ; 0x800101 <_edata+0x1>
  9.  178:   40 e0           ldi     r20, 0x00       ; 0
  10.  17a:   59 e0           ldi     r21, 0x09       ; 9
  11.  17c:   6d e3           ldi     r22, 0x3D       ; 61
  12.  17e:   70 e0           ldi     r23, 0x00       ; 0
  13.   long int i;
  14.   for(i = 0; i < 4000000L; i++) {
  15.     dv = dv * dv;
  16.  180:   9c 01           movw    r18, r24
  17.  182:   22 9f           mul     r18, r18
  18.  184:   c0 01           movw    r24, r0
  19.  186:   23 9f           mul     r18, r19
  20.  188:   90 0d           add     r25, r0
  21.  18a:   90 0d           add     r25, r0
  22.  18c:   11 24           eor     r1, r1
  23.  18e:   41 50           subi    r20, 0x01       ; 1
  24.  190:   51 09           sbc     r21, r1
  25.  192:   61 09           sbc     r22, r1
  26.  194:   71 09           sbc     r23, r1
  27. }
  28.  
  29. int dv = 0;  
  30. void zdelay() {
  31.   long int i;
  32.   for(i = 0; i < 4000000L; i++) {
  33.  196:   a1 f7           brne    .-24            ; 0x180 <_Z6zdelayv+0x10>
  34.  198:   90 93 01 01     sts     0x0101, r25     ; 0x800101 <_edata+0x1>
  35.  19c:   80 93 00 01     sts     0x0100, r24     ; 0x800100 <_edata>
  36.     dv = dv * dv;
  37.   }  
  38. }
  39.  1a0:   08 95           ret
  40. ...
  41.  

julkas

  • Guest
Re: Same logic but big diff (Arduino Uno R3)
« Reply #9 on: September 24, 2019, 11:21:22 am »
I redeclared dv variable in C code as volatile. Result - draw !
Thanks all.
« Last Edit: September 24, 2019, 11:31:00 am by julkas »

 

TinyPortal © 2005-2018