main.cpp
#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <stdlib.h>

extern "C" int SseTextStringCalcLength(const char* s);

const char * TestStrings[] =
{
        "0123456",                                  // Length = 7
        "0123456789abcde",                          // Length = 15
        "0123456789abcdef",                         // Length = 16
        "0123456789abcdefg",                        // Length = 17
        "0123456789abcdefghijklmnopqrstu",          // Length = 31
        "0123456789abcdefghijklmnopqrstuv",         // Length = 32
        "0123456789abcdefghijklmnopqrstuvw",        // Length = 33
        "0123456789abcdefghijklmnopqrstuvwxyz",     // Length = 36
        "",                                         // Length = 0
};

const int OffsetMin = 4096 - 40;
const int OffsetMax = 4096 + 40;
const int NumTestStrings = sizeof(TestStrings) / sizeof(char*);

void SseTextStringCalcLengthCpp(void)
{
    const int buff_size = 8192;
    const int page_size = 4096;
	char* buff = (char*)aligned_alloc(buff_size, page_size);
	int len1, len2;
	
    printf("\nResults for SseTextStringCalcLength()\n");

    for (int i = 0; i < NumTestStrings; i++)
    {
        bool error = false;
        const char* ts = TestStrings[i];

        printf("Test string: \"%s\"\n", ts);

        for (int offset = OffsetMin; offset <= OffsetMax; offset++)
        {
            char* s2 = buff + offset;

            memset(buff, 0x55, buff_size);
			//strcpy_s(ts, buff_size - offset, s2);
            strncpy(s2, ts, buff_size - offset);

            len1 = strlen(s2);
            len2 = SseTextStringCalcLength(s2);

            if ((len1 != len2) && !error)
            {
                error = true;
                printf("  String length compare failed!\n");
                printf("  buff: 0x%p  offset: %5d  s2: 0x%p", buff, offset, s2);
                printf("  len1: %5d  len2: %5d\n",len1, len2);
            }
        }

        if (!error)
            printf("  No errors detected - len1: %5d  len2: %5d\n",len1, len2);
    }
}

int main(int argc, char* argv[])
{
    SseTextStringCalcLengthCpp();
    return 0;
}
ssetextstringcalclength.asm
; Name:     ssetextstringcalclength.asm
;
; Build:    g++ -c -m32 main.cpp -o main.o
;           nasm -f elf32 -o ssetextstringcalclength.o ssetextstringcalclength.asm
;           g++ -m32 -o ssetextstringcalclength ssetextstringcalclength.o main.o
;
; Source:   Modern x86 Assembly Language Programming p. 312

global SseTextStringCalcLength

section .text

; extern "C" int SseTextStringCalcLength(const char* s);
;
; Description:  The following function calculates the length of a
;               text string using the x86-SSE instruction pcmpistri.
;
; Returns:      Length of text string
;
; Requires      SSE4.2

%define s   [ebp+8]

SseTextStringCalcLength:
    push    ebp
    mov     ebp,esp
; Initialize registers for string length calculation
    mov     eax,s                   ;eax ='s'
    sub     eax,16                  ;adjust eax for use in loop
    mov     edx,0ff01h
    movd    xmm1,edx                ;xmm1[15:0] = char range
; Calculate next address and test for near end-of-page condition
.@1:
    add     eax,16                  ;eax = next text block
    mov     edx,eax
    and     edx,0fffh               ;edx = low 12 bits of address
    cmp     edx,0ff0h
    ja      .nearEndOfPage          ;jump if within 16 bytes of page boundary
    ; Test current text block for '\0' byte
    pcmpistri xmm1,[eax],14h        ;compare char range and text
    jnz       .@1                              ;jump if '\0' byte not found
    ; Found '\0' byte in current block (index in ECX)
    ; Calculate string length and return
    add     eax,ecx                 ;eax = ptr to '\0' byte
    sub     eax,s                   ;eax = final string length
    pop     ebp
    ret
; Search for the '\0' terminator by examining each character
.nearEndOfPage:
    mov     ecx,4096                ;ecx = size of page in bytes
    sub     ecx,edx                 ;ecx = number of bytes to check
.@2:
    mov     dl,[eax]                ;dl = next text string character
    or      dl,dl
    jz      .foundNull              ;jump if '\0' found
    inc     eax                     ;eax = ptr to next char
    dec     ecx
    jnz     .@2                     ;jump if more chars to test
    ; Remainder of text string can be searched using 16 byte blocks
    ; EAX is now aligned on a 16-byte boundary
    sub     eax,16                  ;adjust eax for use in loop
.@3:
    add       eax,16                ;eax = ptr to next text block
    pcmpistri xmm1,[eax],14h        ;compare char range and text
    jnz       .@3                   ;jump if '\0' byte not found
    ; Found '\0' byte in current block (index in ECX)
    add     eax,ecx                 ;eax = ptr to '\0' byte
; Calculate final string length and return
.foundNull:
    sub     eax,s                   ;eax = final string length
    pop     ebp
    ret
build
g++ -c -m32 main.cpp -o main.o
nasm -f elf32 -o ssetextstringcalclength.o ssetextstringcalclength.asm
g++ -m32 -o ssetextstringcalclength ssetextstringcalclength.o main.o