main.cpp
#include "stdio.h"

extern "C" int ConcatStrings(wchar_t* des, int des_size, const wchar_t* const* src, int src_n);

int main(int argc, char* argv[])
{
    printf("\nResults for ConcatStrings\n");

    // Destination buffer large enough
    wchar_t* src1[] = { L"One ", L"Two ", L"Three ", L"Four" };
    int src1_n = sizeof(src1) / sizeof(wchar_t*);
    const int des1_size = 64;
    wchar_t des1[des1_size];

    int des1_len = ConcatStrings(des1, des1_size, src1, src1_n);
    wchar_t* des1_temp = (*des1 != '\0') ? des1 : L"<empty>";
    wprintf(L"  des_len: %d (%d) des: %s \n", des1_len, wcslen(des1_temp), des1_temp);

    // Destination buffer too small
    wchar_t* src2[] = { L"Red ", L"Green ", L"Blue ", L"Yellow " };
    int src2_n = sizeof(src2) / sizeof(wchar_t*);
    const int des2_size = 16;
    wchar_t des2[des2_size];

    int des2_len = ConcatStrings(des2, des2_size, src2, src2_n);
    wchar_t* des2_temp = (*des2 != '\0') ? des2 : L"<empty>";
    wprintf(L"  des_len: %d (%d) des: %s \n", des2_len, wcslen(des2_temp), des2_temp);

    // Empty string test
    wchar_t* src3[] = { L"Airplane ", L"Car ", L"", L"Truck ", L"Boat " };
    int src3_n = sizeof(src3) / sizeof(wchar_t*);
    const int des3_size = 128;
    wchar_t des3[des3_size];

    int des3_len = ConcatStrings(des3, des3_size, src3, src3_n);
    wchar_t* des3_temp = (*des3 != '\0') ? des3 : L"<empty>";
    wprintf(L"  des_len: %d (%d) des: %s \n", des3_len, wcslen(des3_temp), des3_temp);

    return 0;
}
concatstrings.asm
; Name:     concatstrings.asm
;
; Build:    g++ -c main.cpp -o main.o
;           nasm -f elf64 -o concatstrings.o concatstrings.asm
;           g++ -o concatstrings concatstrings.o main.o
;
; Source:   Modern x86 Assembly Language Programming p.553


; extern "C" int ConcatStrings(wchar_t* des, int des_size, const wchar_t* const* src, int src_n)
;
; Description:  This function performs string concatenation using
;               multiple input strings.
;
; Returns:      -1          Invalid des_size or src_n
;               n >= 0      Length of concatenated string

global ConcatStrings

section .text

ConcatStrings:
;            GCC                Windows
; Registers: rdi    des         rcx
;            rsi    des_size    rdx
;            rdx    src         r8
;            rcx    src_n       r9

; Create stackframe    
push    rbp
mov     rbp,rsp
sub     rsp,8                       ;align stack
push    r12                         ;save non-volatile register
push    r13
push    r14
push    r15
push    rbx
push    rdi                         ;save *des
mov     rax,-1                      ;assume error

; Make sure des_size and src_n are  greater than zero
movsxd  rsi,esi                     ;rsi = des_size
test    rsi,rsi
jle     .error                      ;jump if des_size <= 0
movsxd  rcx,ecx                     ;rcx = src_n
test    rcx,rcx
jle     .error                      ;jump if src_n <= 0

; Perform required initializations
mov     rbx,rdi
mov     r8,rdx
mov     r9,rcx
mov     rdx,rsi
xor     r10,r10                     ;des_index = 0
xor     r11,r11                     ;i = 0
mov     dword[rbx],r10d             ;*des = '\0';

; Repeat loop until concatenation is finished
.lp1:
mov     rdi,[r8+r11*8]              ;rdi = src[i]
mov     rsi,rdi                     ;rsi = src[i]

; Compute length of s[i]
xor     rax,rax
mov     rcx,-1
repne   scasd                       ;find '\0'
not     rcx
dec     rcx                         ;rcx = len(src[i])

; Compute des_index + src_len
mov     rax,r10                     ;rax= des_index
add     rax,rcx                     ;rax = des_index + len(src[i])

; Is des_index + src_len >= des_size?
cmp     rax,rdx
jge     .done

; Copy src[i] to &des[des_index] (rsi already contains src[i])
inc     rcx                         ;rcx = len(src[i]) + 1
lea     rdi,[rbx+r10*4]             ;rdi = &des[des_index]
rep     movsd                       ;perform string move

; Update des_index
mov     r10,rax                     ;des_index += len(src[i])

; Update i and repeat if not done
inc     r11                         ;i += 1
cmp     r11,r9                      ;is i >= src_n?
jl      .lp1                        ;jump if i < src_n

; Return length of concatenated string
.done:
mov     eax,r10d                    ;eax = trunc(des_index)

; Return error code or length of string
.error:
pop     rdi                         ;restore *des
pop     rbx                         ;restore non-volatile registers
pop     r15
pop     r14
pop     r13
pop     r12
mov     rsp,rbp                     ;restore stack
pop     rbp
ret
build
g++ -c main.cpp -o main.o
nasm -f elf64 -o concatstrings.o concatstrings.asm
g++ -o concatstrings concatstrings.o main.o