main.cpp
#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <stdlib.h>
extern "C" int SseTextStringCalcLength(const char* s);
const char * TestStrings[] =
{
"0123456", // Length = 7
"0123456789abcde", // Length = 15
"0123456789abcdef", // Length = 16
"0123456789abcdefg", // Length = 17
"0123456789abcdefghijklmnopqrstu", // Length = 31
"0123456789abcdefghijklmnopqrstuv", // Length = 32
"0123456789abcdefghijklmnopqrstuvw", // Length = 33
"0123456789abcdefghijklmnopqrstuvwxyz", // Length = 36
"", // Length = 0
};
const int OffsetMin = 4096 - 40;
const int OffsetMax = 4096 + 40;
const int NumTestStrings = sizeof(TestStrings) / sizeof(char*);
void SseTextStringCalcLengthCpp(void)
{
const int buff_size = 8192;
const int page_size = 4096;
char* buff = (char*)aligned_alloc(buff_size, page_size);
int len1, len2;
printf("\nResults for SseTextStringCalcLength()\n");
for (int i = 0; i < NumTestStrings; i++)
{
bool error = false;
const char* ts = TestStrings[i];
printf("Test string: \"%s\"\n", ts);
for (int offset = OffsetMin; offset <= OffsetMax; offset++)
{
char* s2 = buff + offset;
memset(buff, 0x55, buff_size);
//strcpy_s(ts, buff_size - offset, s2);
strncpy(s2, ts, buff_size - offset);
len1 = strlen(s2);
len2 = SseTextStringCalcLength(s2);
if ((len1 != len2) && !error)
{
error = true;
printf(" String length compare failed!\n");
printf(" buff: 0x%p offset: %5d s2: 0x%p", buff, offset, s2);
printf(" len1: %5d len2: %5d\n",len1, len2);
}
}
if (!error)
printf(" No errors detected - len1: %5d len2: %5d\n",len1, len2);
}
}
int main(int argc, char* argv[])
{
SseTextStringCalcLengthCpp();
return 0;
}
ssetextstringcalclength.asm
; Name: ssetextstringcalclength.asm
;
; Build: g++ -c -m32 main.cpp -o main.o
; nasm -f elf32 -o ssetextstringcalclength.o ssetextstringcalclength.asm
; g++ -m32 -o ssetextstringcalclength ssetextstringcalclength.o main.o
;
; Source: Modern x86 Assembly Language Programming p. 312
global SseTextStringCalcLength
section .text
; extern "C" int SseTextStringCalcLength(const char* s);
;
; Description: The following function calculates the length of a
; text string using the x86-SSE instruction pcmpistri.
;
; Returns: Length of text string
;
; Requires SSE4.2
%define s [ebp+8]
SseTextStringCalcLength:
push ebp
mov ebp,esp
; Initialize registers for string length calculation
mov eax,s ;eax ='s'
sub eax,16 ;adjust eax for use in loop
mov edx,0ff01h
movd xmm1,edx ;xmm1[15:0] = char range
; Calculate next address and test for near end-of-page condition
.@1:
add eax,16 ;eax = next text block
mov edx,eax
and edx,0fffh ;edx = low 12 bits of address
cmp edx,0ff0h
ja .nearEndOfPage ;jump if within 16 bytes of page boundary
; Test current text block for '\0' byte
pcmpistri xmm1,[eax],14h ;compare char range and text
jnz .@1 ;jump if '\0' byte not found
; Found '\0' byte in current block (index in ECX)
; Calculate string length and return
add eax,ecx ;eax = ptr to '\0' byte
sub eax,s ;eax = final string length
pop ebp
ret
; Search for the '\0' terminator by examining each character
.nearEndOfPage:
mov ecx,4096 ;ecx = size of page in bytes
sub ecx,edx ;ecx = number of bytes to check
.@2:
mov dl,[eax] ;dl = next text string character
or dl,dl
jz .foundNull ;jump if '\0' found
inc eax ;eax = ptr to next char
dec ecx
jnz .@2 ;jump if more chars to test
; Remainder of text string can be searched using 16 byte blocks
; EAX is now aligned on a 16-byte boundary
sub eax,16 ;adjust eax for use in loop
.@3:
add eax,16 ;eax = ptr to next text block
pcmpistri xmm1,[eax],14h ;compare char range and text
jnz .@3 ;jump if '\0' byte not found
; Found '\0' byte in current block (index in ECX)
add eax,ecx ;eax = ptr to '\0' byte
; Calculate final string length and return
.foundNull:
sub eax,s ;eax = final string length
pop ebp
ret
build
g++ -c -m32 main.cpp -o main.o
nasm -f elf32 -o ssetextstringcalclength.o ssetextstringcalclength.asm
g++ -m32 -o ssetextstringcalclength ssetextstringcalclength.o main.o