Recent

Author Topic: ARM AArch64 Pos implementation  (Read 406 times)

LemonParty

  • Sr. Member
  • ****
  • Posts: 418
ARM AArch64 Pos implementation
« on: May 04, 2025, 09:10:00 pm »
Hello.
I have this function that work with AnsiString:
Code: Pascal  [Select][+][-]
  1. function Pos(constref Buf: AnsiChar; C: AnsiChar; Count: SizeUInt): SizeUInt;assembler;nostackframe;
  2. asm
  3.   mov x8,x0
  4.   bic  x3,x0,#15
  5.   sub x8,x8,#1
  6.   ld1 {v1.16b},[x3]
  7.   dup v0.16b,w1
  8.   cmeq v2.16b,v1.16b,v0.16b
  9.   lsl x6,x0,#2
  10.   .byte 67,132,12,15 //shrn v3.8b,v2.8h,#4
  11.   fmov x5,d3 //synd,dend
  12.   lsr x5,x5,x6 //synd, synd, shift
  13.   cbz x5,.LStartLoop
  14.   rbit x5,x5
  15.   clz x5,x5
  16.   cmp x2,x5,lsr #2
  17.   add x0,x0,x5,lsr #2
  18.   csel x0,x0,xzr,hi
  19.   b .LMatch
  20.  
  21. .LStartLoop:
  22.   sub x7,x3,x0 //tmp, src, srcin
  23.   add x7,x7,#17 //tmp, tmp, 17
  24.   subs x4,x2,x7 //cntrem, cntin, tmp
  25.   b.lo .Lnomatch
  26.   tbz x4,#4,.Lloop32_2 //cntrem, 4, L(loop32_2)
  27.   sub x3,x3,#16 //src, src, 16
  28.  
  29. .Lloop32:
  30.   ldr q1,[x3,#32]! //qdata, [src, 32]!
  31.   cmeq v2.16b,v1.16b,v0.16b //vhas_chr.16b, vdata.16b, vrepchr.16b
  32.   umaxp v3.16b,v2.16b,v2.16b //vend.16b, vhas_chr.16b, vhas_chr.16b
  33.   fmov x5,d3 //synd, dend
  34.   cbnz x5,.Lend
  35. .Lloop32_2:
  36.   ldr q1,[x3,#16] //qdata, [src, 16]
  37.   cmeq v2.16b,v1.16b,v0.16b //vhas_chr.16b, vdata.16b, vrepchr.16b
  38.   subs x4,x4,#32
  39.   b.lo .Lend2
  40.   umaxp v3.16b,v2.16b,v2.16b //vend.16b, vhas_chr.16b, vhas_chr.16b
  41.   fmov x5,d3 //synd, dend
  42.   cbz x5,.Lloop32
  43.  
  44. .Lend2:
  45.   add x3,x3,#16 //src, src, 16
  46. .Lend:
  47.   .byte 67,132,12,15 //shrn v3.8b,v2.8h,#4
  48.   sub x4,x3,x0 //cntrem, src, srcin
  49.   fmov x5,d3 //synd, dend
  50.   sub x4,x2,x4 //cntrem, cntin, cntrem
  51.   clz x5,x5 //synd, synd
  52.   cmp x4,x5,lsr #2 //cntrem, synd, lsr 2
  53.   add x0,x3,x5,lsr #2 //result, src, synd, lsr 2
  54.   csel x0,x0,xzr,hi //result, result, xzr, hi
  55.   b .LMatch
  56. .Lnomatch:
  57.   mov Result,x8
  58. .LMatch:
  59.   sub Result,Result,x8
  60. end;
  61.  
(Original code taken from https://codebrowser.dev/glibc/glibc/sysdeps/aarch64/memchr.S.html)
I am looking for way to modify this function to work with WideString.
Lazarus v. 4.99. FPC v. 3.3.1. Windows 11

 

TinyPortal © 2005-2018