main.cpp
#include "../../commonfiles/xmmval.h" #define _USE_MATH_DEFINES #include <math.h> // The order of values in the following enum must match the table // that's defined in SsePackedFloatingPointConversions_.asm. enum CvtOp : unsigned int { Cvtdq2ps, // Packed signed doubleword to SPFP Cvtdq2pd, // Packed signed doubleword to DPFP Cvtps2dq, // Packed SPFP to signed doubleword Cvtpd2dq, // Packed DPFP to signed doubleword Cvtps2pd, // Packed SPFP to DPFP Cvtpd2ps // Packed DPFP to SPFP }; extern "C" void SsePfpConvert(const XmmVal* a, XmmVal* b, CvtOp cvt_op); void SsePfpConversions32(void) { __attribute__ ((aligned(16))) XmmVal a; __attribute__ ((aligned(16))) XmmVal b; char buff[256]; a.i32[0] = 10; a.i32[1] = -500; a.i32[2] = 600; a.i32[3] = -1024; SsePfpConvert(&a, &b, CvtOp::Cvtdq2ps); printf("\nResults for CvtOp::Cvtdq2ps\n"); printf(" a: %s\n", a.ToString_i32(buff, sizeof(buff))); printf(" b: %s\n", b.ToString_r32(buff, sizeof(buff))); a.r32[0] = 1.0f / 3.0f; a.r32[1] = 2.0f / 3.0f; a.r32[2] = -a.r32[0] * 2.0f; a.r32[3] = -a.r32[1] * 2.0f; SsePfpConvert(&a, &b, CvtOp::Cvtps2dq); printf("\nResults for CvtOp::Cvtps2dq\n"); printf(" a: %s\n", a.ToString_r32(buff, sizeof(buff))); printf(" b: %s\n", b.ToString_i32(buff, sizeof(buff))); // cvtps2pd converts the two low-order SPFP values of 'a' a.r32[0] = 1.0f / 7.0f; a.r32[1] = 2.0f / 9.0f; a.r32[2] = 0; a.r32[3] = 0; SsePfpConvert(&a, &b, CvtOp::Cvtps2pd); printf("\nResults for CvtOp::Cvtps2pd\n"); printf(" a: %s\n", a.ToString_r32(buff, sizeof(buff))); printf(" b: %s\n", b.ToString_r64(buff, sizeof(buff))); } void SsePfpConversions64(void) { __attribute__ ((aligned(16))) XmmVal a; __attribute__ ((aligned(16))) XmmVal b; char buff[256]; // cvtdq2pd converts the two low-order doubleword integers of 'a' a.i32[0] = 10; a.i32[1] = -20; a.i32[2] = 0; a.i32[3] = 0; SsePfpConvert(&a, &b, CvtOp::Cvtdq2pd); printf("\nResults for CvtOp::Cvtdq2pd\n"); printf(" a: %s\n", a.ToString_i32(buff, sizeof(buff))); printf(" b: %s\n", b.ToString_r64(buff, sizeof(buff))); // cvtpd2dq sets the two high-order doublewords of 'b' to zero a.r64[0] = M_PI; a.r64[1] = M_E; SsePfpConvert(&a, &b, CvtOp::Cvtpd2dq); printf("\nResults for CvtOp::Cvtpd2dq\n"); printf(" a: %s\n", a.ToString_r64(buff, sizeof(buff))); printf(" b: %s\n", b.ToString_i32(buff, sizeof(buff))); // cvtpd2ps sets the two high-order SPFP values of 'b' to zero a.r64[0] = M_SQRT2; a.r64[1] = M_SQRT1_2; SsePfpConvert(&a, &b, CvtOp::Cvtpd2ps); printf("\nResults for CvtOp::Cvtpd2ps\n"); printf(" a: %s\n", a.ToString_r64(buff, sizeof(buff))); printf(" b: %s\n", b.ToString_r32(buff, sizeof(buff))); } int main(int argc, char* argv[]) { SsePfpConversions32(); SsePfpConversions64(); return 0; }
ssepackedfloatingpointconversions.asm
; Name: ssepackedfloatingpointconversions.asm ; ; Build: g++ -c -m32 main.cpp -o main.o -std=c++11 ; nasm -f elf32 -o ssepackedfloatingpointconversions.o ssepackedfloatingpointconversions.asm ; g++ -m32 -o ssepackedfloatingpointconversions ssepackedfloatingpointconversions.o main.o ../../commonfiles/xmmval.o ; ; Source: Modern x86 Assembly Language Programming p. 249 global SsePfpConvert section .data ; The order of values in the following table must match the enum CvtOp ; that's defined in SsePackedFloatingPointConversions.cpp. align 4 CvtOpTable: dd SsePfpConvert.sseCvtdq2ps dd SsePfpConvert.sseCvtdq2pd dd SsePfpConvert.sseCvtps2dq dd SsePfpConvert.sseCvtpd2dq dd SsePfpConvert.sseCvtps2pd dd SsePfpConvert.sseCvtpd2ps CvtOpTableCount: equ ($ - CvtOpTable) / 4 ; size dword = 4 section .text ; extern "C" void SsePfpConvert(const XmmVal* a, XmmVal* b, CvtOp cvt_op); ; ; Description: The following function demonstrates use of the packed ; floating-point conversion instructions. ; ; Requires: SSE2 SsePfpConvert: push ebp mov ebp,esp ; Load arguments and make sure 'cvt_op' is valid mov eax,[ebp+8] ;eax = 'a' mov ecx,[ebp+12] ;ecx = 'b' mov edx,[ebp+16] ;edx =cvt_op cmp edx,CvtOpTableCount jae .badCvtOp jmp [CvtOpTable+edx*4] ;jump to specified conversion ; Convert packed doubleword signed integers to packed SPFP values .sseCvtdq2ps: movdqa xmm0,[eax] cvtdq2ps xmm1,xmm0 movaps [ecx],xmm1 pop ebp ret ; Convert packed doubleword signed integers to packed DPFP values .sseCvtdq2pd: movdqa xmm0,[eax] cvtdq2pd xmm1,xmm0 movapd [ecx],xmm1 pop ebp ret ; Convert packed SPFP values to packed doubleword signed integers .sseCvtps2dq: movaps xmm0,[eax] cvtps2dq xmm1,xmm0 movdqa [ecx],xmm1 pop ebp ret ; Convert packed DPFP values to packed doubleword signed integers .sseCvtpd2dq: movapd xmm0,[eax] cvtpd2dq xmm1,xmm0 movdqa [ecx],xmm1 pop ebp ret ; Convert packed SPFP to packed DPFP .sseCvtps2pd: movaps xmm0,[eax] cvtps2pd xmm1,xmm0 movapd [ecx],xmm1 pop ebp ret ; Convert packed DPFP to packed SPFP .sseCvtpd2ps: movapd xmm0,[eax] cvtpd2ps xmm1,xmm0 movaps [ecx],xmm1 pop ebp ret .badCvtOp: pop ebp ret
<
build
g++ -c -m32 main.cpp -o main.o -std=c++11 nasm -f elf32 -o ssepackedfloatingpointconversions.o ssepackedfloatingpointconversions.asm g++ -m32 -o ssepackedfloatingpointconversions ssepackedfloatingpointconversions.o main.o ../../commonfiles/xmmval.o