EasyManua.ls Logo

Intel ARCHITECTURE IA-32 - Example 5-12 Division of Two Pair of Single-Precision Complex Number

Intel ARCHITECTURE IA-32
568 pages
To Next Page IconTo Next Page
To Next Page IconTo Next Page
To Previous Page IconTo Previous Page
To Previous Page IconTo Previous Page
Loading...
Optimizing for SIMD Floating-point Applications 5
5-25
Example 5-12 Division of Two Pair of Single-precision Complex Number
// Division of (ak + i bk ) / (ck + i dk )
movshdup xmm0, Src1; load imaginary parts into the
; destination, b1, b1, b0, b0
movaps xmm1, src2; load the 2nd pair of complex values,
; i.e. d1, c1, d0, c0
mulps xmm0, xmm1; temporary results, b1d1, b1c1, b0d0,
; b0c0
shufps xmm1, xmm1, b1; reorder the real and imaginary
; parts, c1, d1, c0, d0
movsldup xmm2, Src1; load the real parts into the
; destination, a1, a1, a0, a0
mulps xmm2, xmm1; temp results, a1c1, a1d1, a0c0, a0d0
addsubps xmm0, xmm2; a1c1+b1d1, b1c1-a1d1, a0c0+b0d0,
; b0c0-a0d0
mulps xmm1, xmm1; c1c1, d1d1, c0c0, d0d0
movps xmm2, xmm1; c1c1, d1d1, c0c0, d0d0
shufps xmm2, xmm2, b1; d1d1, c1c1, d0d0, c0c0
addps xmm2, xmm1; c1c1+d1d1, c1c1+d1d1, c0c0+d0d0,
; c0c0+d0d0
divps xmm0, xmm2
shufps xmm0, xmm0, b1 ; (b1c1-a1d1)/(c1c1+d1d1),
; (a1c1+b1d1)/(c1c1+d1d1),
; (b0c0-a0d0)/( c0c0+d0d0),
; (a0c0+b0d0)/( c0c0+d0d0)

Table of Contents

Related product manuals