;name: qwordbin2hexascii.asm ; ;build: nasm -felf64 qwordbin2hexascii.asm -o qwordbin2hexascii.o ; ;description: branch free conversion of qword in rax to ascii in rdx:rax ; use of r8, r9, rcx, rdx, xmm1 and xmm2 requires SSE. bits 64 global qwordbin2hexascii section .text qwordbin2hexascii: push rcx push r8 push r9 push r10 push r11 push r12 push r13 push r14 push r15 movq r10,xmm0 ;lower 64 bits in rax movhlps xmm0,xmm0 ;move higher 64 bits of xmm0 movq r11,xmm0 ;high 64 bits in rdx movq r12,xmm1 ;lower 64 bits in rax movhlps xmm1,xmm1 ;move higher 64 bits of xmm0 movq r13,xmm1 ;high 64 bits in rdx movq r14,xmm2 ;lower 64 bits in rax movhlps xmm2,xmm2 ;move higher 64 bits of xmm0 movq r15,xmm2 ;high 64 bits in rdx mov rax,rdi ;qword in rax ;unpack qword in rax into two dwords in rdx and rax mov edx,eax ;low dword in edx shr rax,32 ;high dword in eax ;unpack dwords into words mov r8,rax mov r9,rdx shl r8,16 shl r9,16 or rax,r8 or rdx,r9 mov rcx,0x0000FFFF0000FFFF and rax,rcx and rdx,rcx ;unpack words into bytes mov r8,rax mov r9,rdx shl r8,8 shl r9,8 or rax,r8 or rdx,r9 mov rcx,0x00FF00FF00FF00FF and rax,rcx and rdx,rcx ;unpack bytes into nibbles mov r8,rax mov r9,rdx shl r8,4 shl r9,4 or rax,r8 or rdx,r9 mov rcx,0x0F0F0F0F0F0F0F0F and rax,rcx and rdx,rcx ;load unpacked qword into xmm0 movq xmm0,rdx ;lower nibble is xmm0 pinsrq xmm0,rax,0x01 ;insert higher nibbles in xmm0 shl rcx,4 ;load mask in xmm1 movq xmm1,rcx pinsrq xmm1,rcx,0x01 mov rax,0x0606060606060606 ;load const for addition in xmm2 movq xmm2,rax pinsrq xmm2,rax,0x01 ;here the convertion starts paddb xmm0,xmm2 pand xmm1,xmm0 psubb xmm0,xmm2 psrlw xmm1,1 psubb xmm0,xmm1 psrlw xmm1,3 psubb xmm0,xmm1 psrlw xmm2,1 paddb xmm1,xmm2 psllw xmm1,4 paddb xmm0,xmm1 movq rax,xmm0 ;lower 64 bits in rax movhlps xmm0,xmm0 ;move higher 64 bits of xmm0 movq rdx,xmm0 ;high 64 bits in rdx ;restore registers movq xmm0,r10 ;lower 64 bits in xmm0 pinsrq xmm0,r11,0x01 ;insert higher 64 bits in xmm0 movq xmm1,r12 ;lower 64 bits in xmm1 pinsrq xmm1,r13,0x01 ;insert higher 64 bits in xmm1 movq xmm2,r14 ;lower 64 bits in xmm2 pinsrq xmm2,r15,0x01 ;insert higher 64 bits in xmm2 pop r15 pop r14 pop r13 pop r12 pop r11 pop r10 pop r9 pop r8 pop rcx ret