function UTF8Len_aligned16(p16: pchar; BlockCount: PtrInt):PtrInt; assembler; { PIC }
Label
loop, ZEROMASK, ONEMASK, ONEMASKx80, ONEMASKxFF, EndLbl;
asm
push ecx
push edi
push ebx
{ tmp counter }
MOV ecx, 0
{ masks }
MOVDQA xmm0, ZEROMASK
MOVDQA xmm1, ONEMASK
MOVDQA xmm2, ONEMASKx80
MOVDQA xmm3, ONEMASKxFF
Loop:
{ get 16 bytes }
MOVDQA xmm4, [p16]
{ Invert 16 bytes }
MOVDQA xmm5, xmm4
ANDNPD xmm5, xmm3 { xmm5 = not xmm4}
{ Shift the inverted bytes 6 bits to the right }
PSRLQ xmm5, 6
{ Keep msb of each non-inverted byte }
PAND xmm4, ONEMASKx80
{ Shift them to right 7 bits }
PSRLQ xmm4, 7 { Shift Right Logical QWord }
{ A one in the 1st bit means: NOT the first byte of a codepoint }
PAND xmm5, xmm4
{ Count them ;-) }
PSADBW xmm5, xmm0
MOVD edi, xmm5
PEXTRW ebx, xmm5, 4
ADD ecx, ebx
ADD ecx, edi
{ Next 16 bytes }
ADD p16, 16
DEC edx
JNZ Loop
{ Result }
Mov eax, ecx
pop ebx
pop edi
pop ecx
jmp EndLbl
align 16
ZEROMASK:
db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
ONEMASK:
db $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01
ONEMASKx80:
db $80,$80,$80,$80,$80,$80,$80,$80,$80,$80,$80,$80,$80,$80,$80,$80
ONEMASKxFF:
db $FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF,$FF
EndLbl:
end;