main.cpp
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <stdlib.h>
extern "C" int SseTextStringReplaceChar(char* s, char old_char, char new_char);
char* TestStrings[] =
{
(char*)("*Red*Green*Blue*"),
(char*)("Cyan*Magenta Yellow*Black Tan"),
(char*)("White*Pink Brown Purple*Gray Orange*"),
(char*)("Beige Silver Indigo Fuchsia Maroon"),
(char*)("***************"),
(char*)("*****+*****+*****+*****+*****"),
(char*)("")
};
const char OldChar = '*';
const char NewChar = '#';
const int OffsetMin = 4096 - 40;
const int OffsetMax = 4096 + 40;
const int NumTestStrings = sizeof(TestStrings) / sizeof (char*);
const unsigned int CheckNum = 0x12345678;
int SseTextStringReplaceCharCpp(char* s, char old_char, char new_char)
{
char c;
int n = 0;
while ((c = *s) != '\0')
{
if (c == OldChar)
{
*s = NewChar;
n++;
}
s++;
}
return n;
}
void SseTextStringReplace(void)
{
const int buff_size = 8192;
const int page_size = 4096;
char* buff1 = (char*)aligned_alloc(buff_size, page_size);
char* buff2 = (char*)aligned_alloc(buff_size, page_size);
printf("\nResults for SseTextStringReplaceChars()\n");
printf("OldChar = '%c' NewChar = '%c'\n", OldChar, NewChar);
for (int i = 0; i < NumTestStrings; i++)
{
char* s = TestStrings[i];
int s_len = strlen(s);
for (int offset = OffsetMin; offset <= OffsetMax; offset++)
{
bool print = (offset == OffsetMin) ? true : false;
char* s1 = buff1 + offset;
char* s2 = buff2 + offset;
int size = buff_size - offset;
int n1 = -1, n2 = -1;
//strcpy_s(s1, size, s);
strncpy(s1, s, size);
*(s1 + s_len + 1) = OldChar;
*((unsigned int*)(s1 + s_len + 2)) = CheckNum;
//strcpy_s(s2, size, s);
strncpy(s2, s, size);
*(s2 + s_len + 1) = OldChar;
*((unsigned int*)(s2 + s_len + 2)) = CheckNum;
if (print)
printf("\ns1 before replace: \"%s\"\n", s1);
n1 = SseTextStringReplaceCharCpp(s1, OldChar, NewChar);
if (print)
printf("s1 after replace: \"%s\"\n", s1);
if (print)
printf("\ns2 before replace: \"%s\"\n", s2);
n2 = SseTextStringReplaceChar(s2, OldChar, NewChar);
if (print)
printf("s2 after replace: \"%s\"\n", s2);
if (strcmp(s1, s1) != 0)
printf("Error - string compare failed\n");
if (n1 != n2)
printf("Error - character count compare failed\n");
if (*(s1 + s_len + 1) != OldChar)
printf("Error - buff1 OldChar overwrite\n");
if (*(s2 + s_len + 1) != OldChar)
printf("Error - buff2 OldChar overwrite\n");
if (*((unsigned int*)(s1 + s_len + 2)) != CheckNum)
printf("Error - buff1 CheckNum overwrite\n");
if (*((unsigned int*)(s2 + s_len + 2)) != CheckNum)
printf("Error - buff2 CheckNum overwrite\n");
}
}
free(((void**)buff1)[0]);
free(((void**)buff2)[0]);
// free(buff1);
// free(buff2);
}
int main(int argc, char* argv[])
{
SseTextStringReplace();
return 0;
}
ssetextstringreplacechar.asm
; Name: ssetextstringreplacechar.asm
;
; Build: gg++ -c -m32 main.cpp -o main.o
; nasm -f elf32 -o ssetextstringreplacechar.o ssetextstringreplacechar.asm
; g++ -m32 -o ssetextstringreplacechar ssetextstringreplacechar.o main.o
;
; Source: Modern x86 Assembly Language Programming p. 316
global SseTextStringReplaceChar
section .data
align 16
PxorNotMask: times 16 db 0xff ;pxor logical not mask
section .text
; extern "C" int SseTextStringReplaceChar_(char* s, char old_char, char new_char);
;
; Description: The following function replaces all instances of old_char
; with new_char in the provided text string.
;
; Requires SSE4.2 and POPCNT feature flag.
%define s [ebp+8]
%define old_char [ebp+12]
%define new_char [ebp+16]
SseTextStringReplaceChar:
push ebp
mov ebp,esp
push ebx
push esi
push edi
; Initialize
mov eax,s ;eax = 's'
sub eax,16 ;adjust eax for loop below
xor edi,edi ;edi = num replaced chars
; Build packed old_char and new_char
movzx ecx,byte old_char
movd xmm1,ecx ;xmm1[7:0] = old_char
movzx ecx,byte new_char ;ecx = new char
movd xmm6,ecx
pxor xmm5,xmm5
pshufb xmm6,xmm5 ;xmm6 = packed new_char
movdqa xmm7,[PxorNotMask] ;xmm7 = pxor not mask
; Calculate next string address and test for near end-of-page condition
.loop1:
add eax,16 ;eax = next text block
mov edx,eax
and edx,0fffh ;edx = low 12 bits of address
cmp edx,0ff0h
ja .nearEndOfPage ;jump if within 16 bytes of page boundary
; Compare current text block to find characters
movdqu xmm2,[eax] ;load next text block
pcmpistrm xmm1,xmm2,40h ;test for old_char match
setz cl ;set if '\0' found
jc .foundMatch1 ;jump if matches found
jz .done ;jump if '\0' found
jmp .loop1 ;jump if no matches found
; Character matches found (xmm0 = match mask)
; Update character match count in EDI
.foundMatch1:
pmovmskb edx,xmm0 ;edx = match mask
popcnt edx,edx ;count the number of matches
add edi,edx ;edi = total match count
; Replace all old_char with new_char
movdqa xmm3,xmm0 ;xmm3 = match mask
pxor xmm0,xmm7
pand xmm0,xmm2 ;remove old_chars
pand xmm3,xmm6
por xmm0,xmm3 ;insert new_chars
movdqu [eax],xmm0 ;save updated string
or cl,cl ;does current block contain '\0'?
jnz .done ;jump if yes
jmp .loop1 ;continue processing text string
; Replace old_char with new_char near end of page
.nearEndOfPage:
mov ecx,4096 ;size of page in bytes
sub ecx,edx ;ecx = number of bytes to check
mov dl,[ebp+12] ;dl = old_char
mov dh,[ebp+16] ;dh = new_char
.loop2:
mov bl,[eax] ;load next input string character
or bl,bl
jz .done ;jump if '\0' found
cmp dl,bl
jne .@1 ;jump if no match
mov [eax],dh ;replace old_char with new_char
inc edi ;update num replaced characters
.@1:
inc eax ;eax = ptr to next char
dec ecx
jnz .loop2 ;repeat until end of page
sub eax,16 ;adjust eax to eliminate jump
; Process remainder of text string; note that movdqa can now be used
.loop3:
add eax,16 ;eax = next text block
movdqa xmm2,[eax] ;load next text block
pcmpistrm xmm1,xmm2,40h ;test for old_char match
setz cl ;set if '\0' found
jc .foundMatch3 ;jump if matches found
jz .done ;jump if '\0' found
jmp .loop3 ;jump if no matches found
.foundMatch3:
pmovmskb edx,xmm0 ;edx = match mask
popcnt edx,edx ;count the number of matches
add edi,edx ;edi = total match count
; Replace all old_char with new_char
movdqa xmm3,xmm0 ;xmm3 = match mask
pxor xmm0,xmm7
pand xmm0,xmm2 ;mask out all old_chars
pand xmm3,xmm6
por xmm0,xmm3 ;insert new_chars
movdqa [eax],xmm0 ;save updated string
or cl,cl ;does current block contain '\0'?
jnz .done ;jump if yes
jmp .loop3 ;continue processing text string
.done:
mov eax,edi ;eax = num replaced characters
pop edi
pop esi
pop ebx
pop ebp
ret
build
g++ -c -m32 main.cpp -o main.o
nasm -f elf32 -o ssetextstringreplacechar.o ssetextstringreplacechar.asm
g++ -m32 -o ssetextstringreplacechar ssetextstringreplacechar.o main.o