From: Pierre Ossman Date: Mon, 29 Jun 2009 12:58:48 +0000 (+0000) Subject: Make x86_64 SIMD code PIC friendly X-Git-Tag: v1.0.90~410 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=ed6a55d2e3ddde3da1ef5d8269aa7fd25440657c;p=tigervnc.git Make x86_64 SIMD code PIC friendly Use RIP relative addressing as that works in both PIC and non-PIC mode. git-svn-id: svn://svn.code.sf.net/p/tigervnc/code/trunk@3864 3789f03b-4d11-0410-bbf8-ca57d06f2519 --- diff --git a/common/jpeg/simd/jcclrss2-64.asm b/common/jpeg/simd/jcclrss2-64.asm index 9900edd8..ca7e24af 100644 --- a/common/jpeg/simd/jcclrss2-64.asm +++ b/common/jpeg/simd/jcclrss2-64.asm @@ -302,10 +302,10 @@ EXTN(jsimd_rgb_ycc_convert_sse2): punpckhwd xmm6,xmm3 movdqa xmm7,xmm1 movdqa xmm4,xmm6 - pmaddwd xmm1,[PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) - pmaddwd xmm6,[PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) - pmaddwd xmm7,[PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) - pmaddwd xmm4,[PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + pmaddwd xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd xmm7,[rel PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) @@ -317,7 +317,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2): psrld xmm1,1 ; xmm1=BOL*FIX(0.500) psrld xmm6,1 ; xmm6=BOH*FIX(0.500) - movdqa xmm5,[PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ] + movdqa xmm5,[rel PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ] paddd xmm7,xmm1 paddd xmm4,xmm6 @@ -334,10 +334,10 @@ EXTN(jsimd_rgb_ycc_convert_sse2): punpckhwd xmm6,xmm2 movdqa xmm5,xmm0 movdqa xmm4,xmm6 - pmaddwd xmm0,[PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) - pmaddwd xmm6,[PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) - pmaddwd xmm5,[PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) - pmaddwd xmm4,[PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + pmaddwd xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd xmm5,[rel PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) @@ -349,7 +349,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2): psrld xmm0,1 ; xmm0=BEL*FIX(0.500) psrld xmm6,1 ; xmm6=BEH*FIX(0.500) - movdqa xmm1,[PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] + movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] paddd xmm5,xmm0 paddd xmm4,xmm6 @@ -372,12 +372,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2): punpckhwd xmm4,xmm3 movdqa xmm7,xmm0 movdqa xmm5,xmm4 - pmaddwd xmm0,[PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) - pmaddwd xmm4,[PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) - pmaddwd xmm7,[PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) - pmaddwd xmm5,[PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + pmaddwd xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd xmm7,[rel PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) - movdqa xmm3,[PD_ONEHALF] ; xmm3=[PD_ONEHALF] + movdqa xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF] paddd xmm0, XMMWORD [wk(4)] paddd xmm4, XMMWORD [wk(5)] @@ -394,7 +394,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2): psrld xmm3,1 ; xmm3=ROL*FIX(0.500) psrld xmm4,1 ; xmm4=ROH*FIX(0.500) - movdqa xmm1,[PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] + movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] paddd xmm7,xmm3 paddd xmm5,xmm4 @@ -411,12 +411,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2): punpckhwd xmm4,xmm2 movdqa xmm1,xmm6 movdqa xmm5,xmm4 - pmaddwd xmm6,[PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) - pmaddwd xmm4,[PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) - pmaddwd xmm1,[PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) - pmaddwd xmm5,[PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + pmaddwd xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd xmm1,[rel PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) - movdqa xmm2,[PD_ONEHALF] ; xmm2=[PD_ONEHALF] + movdqa xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF] paddd xmm6, XMMWORD [wk(6)] paddd xmm4, XMMWORD [wk(7)] @@ -437,7 +437,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2): psrld xmm2,1 ; xmm2=REL*FIX(0.500) psrld xmm4,1 ; xmm4=REH*FIX(0.500) - movdqa xmm0,[PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ] + movdqa xmm0,[rel PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ] paddd xmm1,xmm2 paddd xmm5,xmm4 diff --git a/common/jpeg/simd/jdclrss2-64.asm b/common/jpeg/simd/jdclrss2-64.asm index 0808ecaa..c3c1b07c 100644 --- a/common/jpeg/simd/jdclrss2-64.asm +++ b/common/jpeg/simd/jdclrss2-64.asm @@ -127,17 +127,17 @@ EXTN(jsimd_ycc_rgb_convert_sse2): paddw xmm0,xmm0 ; xmm0=2*CrE paddw xmm1,xmm1 ; xmm1=2*CrO - pmulhw xmm4,[PW_MF0228] ; xmm4=(2*CbE * -FIX(0.22800)) - pmulhw xmm5,[PW_MF0228] ; xmm5=(2*CbO * -FIX(0.22800)) - pmulhw xmm0,[PW_F0402] ; xmm0=(2*CrE * FIX(0.40200)) - pmulhw xmm1,[PW_F0402] ; xmm1=(2*CrO * FIX(0.40200)) + pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbE * -FIX(0.22800)) + pmulhw xmm5,[rel PW_MF0228] ; xmm5=(2*CbO * -FIX(0.22800)) + pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrE * FIX(0.40200)) + pmulhw xmm1,[rel PW_F0402] ; xmm1=(2*CrO * FIX(0.40200)) - paddw xmm4,[PW_ONE] - paddw xmm5,[PW_ONE] + paddw xmm4,[rel PW_ONE] + paddw xmm5,[rel PW_ONE] psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800)) psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800)) - paddw xmm0,[PW_ONE] - paddw xmm1,[PW_ONE] + paddw xmm0,[rel PW_ONE] + paddw xmm1,[rel PW_ONE] psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200)) psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200)) @@ -155,19 +155,19 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmm5,xmm3 punpcklwd xmm2,xmm6 punpckhwd xmm4,xmm6 - pmaddwd xmm2,[PW_MF0344_F0285] - pmaddwd xmm4,[PW_MF0344_F0285] + pmaddwd xmm2,[rel PW_MF0344_F0285] + pmaddwd xmm4,[rel PW_MF0344_F0285] punpcklwd xmm3,xmm7 punpckhwd xmm5,xmm7 - pmaddwd xmm3,[PW_MF0344_F0285] - pmaddwd xmm5,[PW_MF0344_F0285] + pmaddwd xmm3,[rel PW_MF0344_F0285] + pmaddwd xmm5,[rel PW_MF0344_F0285] - paddd xmm2,[PD_ONEHALF] - paddd xmm4,[PD_ONEHALF] + paddd xmm2,[rel PD_ONEHALF] + paddd xmm4,[rel PD_ONEHALF] psrad xmm2,SCALEBITS psrad xmm4,SCALEBITS - paddd xmm3,[PD_ONEHALF] - paddd xmm5,[PD_ONEHALF] + paddd xmm3,[rel PD_ONEHALF] + paddd xmm5,[rel PD_ONEHALF] psrad xmm3,SCALEBITS psrad xmm5,SCALEBITS diff --git a/common/jpeg/simd/jdmrgss2-64.asm b/common/jpeg/simd/jdmrgss2-64.asm index 6474f43b..6329f950 100644 --- a/common/jpeg/simd/jdmrgss2-64.asm +++ b/common/jpeg/simd/jdmrgss2-64.asm @@ -113,17 +113,17 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): paddw xmm7,xmm7 ; xmm7=2*CrH paddw xmm0,xmm0 ; xmm0=2*CrL - pmulhw xmm6,[PW_MF0228] ; xmm6=(2*CbH * -FIX(0.22800)) - pmulhw xmm4,[PW_MF0228] ; xmm4=(2*CbL * -FIX(0.22800)) - pmulhw xmm7,[PW_F0402] ; xmm7=(2*CrH * FIX(0.40200)) - pmulhw xmm0,[PW_F0402] ; xmm0=(2*CrL * FIX(0.40200)) + pmulhw xmm6,[rel PW_MF0228] ; xmm6=(2*CbH * -FIX(0.22800)) + pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbL * -FIX(0.22800)) + pmulhw xmm7,[rel PW_F0402] ; xmm7=(2*CrH * FIX(0.40200)) + pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrL * FIX(0.40200)) - paddw xmm6,[PW_ONE] - paddw xmm4,[PW_ONE] + paddw xmm6,[rel PW_ONE] + paddw xmm4,[rel PW_ONE] psraw xmm6,1 ; xmm6=(CbH * -FIX(0.22800)) psraw xmm4,1 ; xmm4=(CbL * -FIX(0.22800)) - paddw xmm7,[PW_ONE] - paddw xmm0,[PW_ONE] + paddw xmm7,[rel PW_ONE] + paddw xmm0,[rel PW_ONE] psraw xmm7,1 ; xmm7=(CrH * FIX(0.40200)) psraw xmm0,1 ; xmm0=(CrL * FIX(0.40200)) @@ -141,19 +141,19 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmm7,xmm2 punpcklwd xmm5,xmm1 punpckhwd xmm6,xmm1 - pmaddwd xmm5,[PW_MF0344_F0285] - pmaddwd xmm6,[PW_MF0344_F0285] + pmaddwd xmm5,[rel PW_MF0344_F0285] + pmaddwd xmm6,[rel PW_MF0344_F0285] punpcklwd xmm2,xmm3 punpckhwd xmm7,xmm3 - pmaddwd xmm2,[PW_MF0344_F0285] - pmaddwd xmm7,[PW_MF0344_F0285] + pmaddwd xmm2,[rel PW_MF0344_F0285] + pmaddwd xmm7,[rel PW_MF0344_F0285] - paddd xmm5,[PD_ONEHALF] - paddd xmm6,[PD_ONEHALF] + paddd xmm5,[rel PD_ONEHALF] + paddd xmm6,[rel PD_ONEHALF] psrad xmm5,SCALEBITS psrad xmm6,SCALEBITS - paddd xmm2,[PD_ONEHALF] - paddd xmm7,[PD_ONEHALF] + paddd xmm2,[rel PD_ONEHALF] + paddd xmm7,[rel PD_ONEHALF] psrad xmm2,SCALEBITS psrad xmm7,SCALEBITS diff --git a/common/jpeg/simd/jdsamss2-64.asm b/common/jpeg/simd/jdsamss2-64.asm index 1f7b1f59..7df283e8 100644 --- a/common/jpeg/simd/jdsamss2-64.asm +++ b/common/jpeg/simd/jdsamss2-64.asm @@ -133,12 +133,12 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2): punpcklbw xmm3,xmm0 ; xmm3=( 1 2 3 4 5 6 7 8) punpckhbw xmm6,xmm0 ; xmm6=( 9 10 11 12 13 14 15 16) - pmullw xmm1,[PW_THREE] - pmullw xmm4,[PW_THREE] - paddw xmm2,[PW_ONE] - paddw xmm5,[PW_ONE] - paddw xmm3,[PW_TWO] - paddw xmm6,[PW_TWO] + pmullw xmm1,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] + paddw xmm2,[rel PW_ONE] + paddw xmm5,[rel PW_ONE] + paddw xmm3,[rel PW_TWO] + paddw xmm6,[rel PW_TWO] paddw xmm2,xmm1 paddw xmm5,xmm4 @@ -264,8 +264,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2): punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) - pmullw xmm0,[PW_THREE] - pmullw xmm4,[PW_THREE] + pmullw xmm0,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] pcmpeqb xmm7,xmm7 psrldq xmm7,(SIZEOF_XMMWORD-2) @@ -324,8 +324,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2): punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) - pmullw xmm0,[PW_THREE] - pmullw xmm4,[PW_THREE] + pmullw xmm0,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) @@ -373,12 +373,12 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2): movdqa XMMWORD [wk(0)], xmm4 - pmullw xmm7,[PW_THREE] - pmullw xmm3,[PW_THREE] - paddw xmm1,[PW_EIGHT] - paddw xmm5,[PW_EIGHT] - paddw xmm0,[PW_SEVEN] - paddw xmm2,[PW_SEVEN] + pmullw xmm7,[rel PW_THREE] + pmullw xmm3,[rel PW_THREE] + paddw xmm1,[rel PW_EIGHT] + paddw xmm5,[rel PW_EIGHT] + paddw xmm0,[rel PW_SEVEN] + paddw xmm2,[rel PW_SEVEN] paddw xmm1,xmm7 paddw xmm5,xmm3 @@ -426,12 +426,12 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2): movdqa XMMWORD [wk(1)], xmm3 - pmullw xmm6,[PW_THREE] - pmullw xmm4,[PW_THREE] - paddw xmm1,[PW_EIGHT] - paddw xmm0,[PW_EIGHT] - paddw xmm7,[PW_SEVEN] - paddw xmm5,[PW_SEVEN] + pmullw xmm6,[rel PW_THREE] + pmullw xmm4,[rel PW_THREE] + paddw xmm1,[rel PW_EIGHT] + paddw xmm0,[rel PW_EIGHT] + paddw xmm7,[rel PW_SEVEN] + paddw xmm5,[rel PW_SEVEN] paddw xmm1,xmm6 paddw xmm0,xmm4 diff --git a/common/jpeg/simd/jfss2fst-64.asm b/common/jpeg/simd/jfss2fst-64.asm index 93031565..b6a06bc1 100644 --- a/common/jpeg/simd/jfss2fst-64.asm +++ b/common/jpeg/simd/jfss2fst-64.asm @@ -192,7 +192,7 @@ EXTN(jsimd_fdct_ifast_sse2): paddw xmm6,xmm3 psllw xmm6,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm6,[PW_F0707] ; xmm6=z1 + pmulhw xmm6,[rel PW_F0707] ; xmm6=z1 movdqa xmm1,xmm4 movdqa xmm7,xmm3 @@ -216,13 +216,13 @@ EXTN(jsimd_fdct_ifast_sse2): psllw xmm0,PRE_MULTIPLY_SCALE_BITS psllw xmm5,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm5,[PW_F0707] ; xmm5=z3 + pmulhw xmm5,[rel PW_F0707] ; xmm5=z3 movdqa xmm4,xmm2 ; xmm4=tmp10 psubw xmm2,xmm0 - pmulhw xmm2,[PW_F0382] ; xmm2=z5 - pmulhw xmm4,[PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) - pmulhw xmm0,[PW_F1306] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) + pmulhw xmm2,[rel PW_F0382] ; xmm2=z5 + pmulhw xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm0,[rel PW_F1306] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) paddw xmm4,xmm2 ; xmm4=z2 paddw xmm0,xmm2 ; xmm0=z4 @@ -328,7 +328,7 @@ EXTN(jsimd_fdct_ifast_sse2): paddw xmm5,xmm3 psllw xmm5,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm5,[PW_F0707] ; xmm5=z1 + pmulhw xmm5,[rel PW_F0707] ; xmm5=z1 movdqa xmm6,xmm4 movdqa xmm2,xmm3 @@ -355,13 +355,13 @@ EXTN(jsimd_fdct_ifast_sse2): psllw xmm1,PRE_MULTIPLY_SCALE_BITS psllw xmm0,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm0,[PW_F0707] ; xmm0=z3 + pmulhw xmm0,[rel PW_F0707] ; xmm0=z3 movdqa xmm4,xmm7 ; xmm4=tmp10 psubw xmm7,xmm1 - pmulhw xmm7,[PW_F0382] ; xmm7=z5 - pmulhw xmm4,[PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) - pmulhw xmm1,[PW_F1306] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) + pmulhw xmm7,[rel PW_F0382] ; xmm7=z5 + pmulhw xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm1,[rel PW_F1306] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) paddw xmm4,xmm7 ; xmm4=z2 paddw xmm1,xmm7 ; xmm1=z4 diff --git a/common/jpeg/simd/jfss2int-64.asm b/common/jpeg/simd/jfss2int-64.asm index f7879214..5fc4ac85 100644 --- a/common/jpeg/simd/jfss2int-64.asm +++ b/common/jpeg/simd/jfss2int-64.asm @@ -236,17 +236,17 @@ EXTN(jsimd_fdct_islow_sse2): punpckhwd xmm6,xmm0 movdqa xmm4,xmm7 movdqa xmm0,xmm6 - pmaddwd xmm7,[PW_F130_F054] ; xmm7=data2L - pmaddwd xmm6,[PW_F130_F054] ; xmm6=data2H - pmaddwd xmm4,[PW_F054_MF130] ; xmm4=data6L - pmaddwd xmm0,[PW_F054_MF130] ; xmm0=data6H + pmaddwd xmm7,[rel PW_F130_F054] ; xmm7=data2L + pmaddwd xmm6,[rel PW_F130_F054] ; xmm6=data2H + pmaddwd xmm4,[rel PW_F054_MF130] ; xmm4=data6L + pmaddwd xmm0,[rel PW_F054_MF130] ; xmm0=data6H - paddd xmm7,[PD_DESCALE_P1] - paddd xmm6,[PD_DESCALE_P1] + paddd xmm7,[rel PD_DESCALE_P1] + paddd xmm6,[rel PD_DESCALE_P1] psrad xmm7,DESCALE_P1 psrad xmm6,DESCALE_P1 - paddd xmm4,[PD_DESCALE_P1] - paddd xmm0,[PD_DESCALE_P1] + paddd xmm4,[rel PD_DESCALE_P1] + paddd xmm0,[rel PD_DESCALE_P1] psrad xmm4,DESCALE_P1 psrad xmm0,DESCALE_P1 @@ -281,10 +281,10 @@ EXTN(jsimd_fdct_islow_sse2): punpckhwd xmm4,xmm0 movdqa xmm6,xmm7 movdqa xmm0,xmm4 - pmaddwd xmm7,[PW_MF078_F117] ; xmm7=z3L - pmaddwd xmm4,[PW_MF078_F117] ; xmm4=z3H - pmaddwd xmm6,[PW_F117_F078] ; xmm6=z4L - pmaddwd xmm0,[PW_F117_F078] ; xmm0=z4H + pmaddwd xmm7,[rel PW_MF078_F117] ; xmm7=z3L + pmaddwd xmm4,[rel PW_MF078_F117] ; xmm4=z3H + pmaddwd xmm6,[rel PW_F117_F078] ; xmm6=z4L + pmaddwd xmm0,[rel PW_F117_F078] ; xmm0=z4H movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H @@ -311,22 +311,22 @@ EXTN(jsimd_fdct_islow_sse2): punpckhwd xmm4,xmm1 movdqa xmm2,xmm7 movdqa xmm1,xmm4 - pmaddwd xmm7,[PW_MF060_MF089] ; xmm7=tmp4L - pmaddwd xmm4,[PW_MF060_MF089] ; xmm4=tmp4H - pmaddwd xmm2,[PW_MF089_F060] ; xmm2=tmp7L - pmaddwd xmm1,[PW_MF089_F060] ; xmm1=tmp7H + pmaddwd xmm7,[rel PW_MF060_MF089] ; xmm7=tmp4L + pmaddwd xmm4,[rel PW_MF060_MF089] ; xmm4=tmp4H + pmaddwd xmm2,[rel PW_MF089_F060] ; xmm2=tmp7L + pmaddwd xmm1,[rel PW_MF089_F060] ; xmm1=tmp7H paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H paddd xmm2,xmm6 ; xmm2=data1L paddd xmm1,xmm0 ; xmm1=data1H - paddd xmm7,[PD_DESCALE_P1] - paddd xmm4,[PD_DESCALE_P1] + paddd xmm7,[rel PD_DESCALE_P1] + paddd xmm4,[rel PD_DESCALE_P1] psrad xmm7,DESCALE_P1 psrad xmm4,DESCALE_P1 - paddd xmm2,[PD_DESCALE_P1] - paddd xmm1,[PD_DESCALE_P1] + paddd xmm2,[rel PD_DESCALE_P1] + paddd xmm1,[rel PD_DESCALE_P1] psrad xmm2,DESCALE_P1 psrad xmm1,DESCALE_P1 @@ -339,22 +339,22 @@ EXTN(jsimd_fdct_islow_sse2): punpckhwd xmm1,xmm3 movdqa xmm5,xmm4 movdqa xmm3,xmm1 - pmaddwd xmm4,[PW_MF050_MF256] ; xmm4=tmp5L - pmaddwd xmm1,[PW_MF050_MF256] ; xmm1=tmp5H - pmaddwd xmm5,[PW_MF256_F050] ; xmm5=tmp6L - pmaddwd xmm3,[PW_MF256_F050] ; xmm3=tmp6H + pmaddwd xmm4,[rel PW_MF050_MF256] ; xmm4=tmp5L + pmaddwd xmm1,[rel PW_MF050_MF256] ; xmm1=tmp5H + pmaddwd xmm5,[rel PW_MF256_F050] ; xmm5=tmp6L + pmaddwd xmm3,[rel PW_MF256_F050] ; xmm3=tmp6H paddd xmm4,xmm6 ; xmm4=data5L paddd xmm1,xmm0 ; xmm1=data5H paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H - paddd xmm4,[PD_DESCALE_P1] - paddd xmm1,[PD_DESCALE_P1] + paddd xmm4,[rel PD_DESCALE_P1] + paddd xmm1,[rel PD_DESCALE_P1] psrad xmm4,DESCALE_P1 psrad xmm1,DESCALE_P1 - paddd xmm5,[PD_DESCALE_P1] - paddd xmm3,[PD_DESCALE_P1] + paddd xmm5,[rel PD_DESCALE_P1] + paddd xmm3,[rel PD_DESCALE_P1] psrad xmm5,DESCALE_P1 psrad xmm3,DESCALE_P1 @@ -457,8 +457,8 @@ EXTN(jsimd_fdct_islow_sse2): paddw xmm7,xmm2 ; xmm7=tmp10+tmp11 psubw xmm5,xmm2 ; xmm5=tmp10-tmp11 - paddw xmm7,[PW_DESCALE_P2X] - paddw xmm5,[PW_DESCALE_P2X] + paddw xmm7,[rel PW_DESCALE_P2X] + paddw xmm5,[rel PW_DESCALE_P2X] psraw xmm7,PASS1_BITS ; xmm7=data0 psraw xmm5,PASS1_BITS ; xmm5=data4 @@ -480,17 +480,17 @@ EXTN(jsimd_fdct_islow_sse2): punpckhwd xmm2,xmm6 movdqa xmm1,xmm4 movdqa xmm6,xmm2 - pmaddwd xmm4,[PW_F130_F054] ; xmm4=data2L - pmaddwd xmm2,[PW_F130_F054] ; xmm2=data2H - pmaddwd xmm1,[PW_F054_MF130] ; xmm1=data6L - pmaddwd xmm6,[PW_F054_MF130] ; xmm6=data6H + pmaddwd xmm4,[rel PW_F130_F054] ; xmm4=data2L + pmaddwd xmm2,[rel PW_F130_F054] ; xmm2=data2H + pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=data6L + pmaddwd xmm6,[rel PW_F054_MF130] ; xmm6=data6H - paddd xmm4,[PD_DESCALE_P2] - paddd xmm2,[PD_DESCALE_P2] + paddd xmm4,[rel PD_DESCALE_P2] + paddd xmm2,[rel PD_DESCALE_P2] psrad xmm4,DESCALE_P2 psrad xmm2,DESCALE_P2 - paddd xmm1,[PD_DESCALE_P2] - paddd xmm6,[PD_DESCALE_P2] + paddd xmm1,[rel PD_DESCALE_P2] + paddd xmm6,[rel PD_DESCALE_P2] psrad xmm1,DESCALE_P2 psrad xmm6,DESCALE_P2 @@ -525,10 +525,10 @@ EXTN(jsimd_fdct_islow_sse2): punpckhwd xmm1,xmm6 movdqa xmm2,xmm4 movdqa xmm6,xmm1 - pmaddwd xmm4,[PW_MF078_F117] ; xmm4=z3L - pmaddwd xmm1,[PW_MF078_F117] ; xmm1=z3H - pmaddwd xmm2,[PW_F117_F078] ; xmm2=z4L - pmaddwd xmm6,[PW_F117_F078] ; xmm6=z4H + pmaddwd xmm4,[rel PW_MF078_F117] ; xmm4=z3L + pmaddwd xmm1,[rel PW_MF078_F117] ; xmm1=z3H + pmaddwd xmm2,[rel PW_F117_F078] ; xmm2=z4L + pmaddwd xmm6,[rel PW_F117_F078] ; xmm6=z4H movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H @@ -555,22 +555,22 @@ EXTN(jsimd_fdct_islow_sse2): punpckhwd xmm1,xmm5 movdqa xmm0,xmm4 movdqa xmm5,xmm1 - pmaddwd xmm4,[PW_MF060_MF089] ; xmm4=tmp4L - pmaddwd xmm1,[PW_MF060_MF089] ; xmm1=tmp4H - pmaddwd xmm0,[PW_MF089_F060] ; xmm0=tmp7L - pmaddwd xmm5,[PW_MF089_F060] ; xmm5=tmp7H + pmaddwd xmm4,[rel PW_MF060_MF089] ; xmm4=tmp4L + pmaddwd xmm1,[rel PW_MF060_MF089] ; xmm1=tmp4H + pmaddwd xmm0,[rel PW_MF089_F060] ; xmm0=tmp7L + pmaddwd xmm5,[rel PW_MF089_F060] ; xmm5=tmp7H paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H paddd xmm0,xmm2 ; xmm0=data1L paddd xmm5,xmm6 ; xmm5=data1H - paddd xmm4,[PD_DESCALE_P2] - paddd xmm1,[PD_DESCALE_P2] + paddd xmm4,[rel PD_DESCALE_P2] + paddd xmm1,[rel PD_DESCALE_P2] psrad xmm4,DESCALE_P2 psrad xmm1,DESCALE_P2 - paddd xmm0,[PD_DESCALE_P2] - paddd xmm5,[PD_DESCALE_P2] + paddd xmm0,[rel PD_DESCALE_P2] + paddd xmm5,[rel PD_DESCALE_P2] psrad xmm0,DESCALE_P2 psrad xmm5,DESCALE_P2 @@ -586,22 +586,22 @@ EXTN(jsimd_fdct_islow_sse2): punpckhwd xmm5,xmm7 movdqa xmm3,xmm1 movdqa xmm7,xmm5 - pmaddwd xmm1,[PW_MF050_MF256] ; xmm1=tmp5L - pmaddwd xmm5,[PW_MF050_MF256] ; xmm5=tmp5H - pmaddwd xmm3,[PW_MF256_F050] ; xmm3=tmp6L - pmaddwd xmm7,[PW_MF256_F050] ; xmm7=tmp6H + pmaddwd xmm1,[rel PW_MF050_MF256] ; xmm1=tmp5L + pmaddwd xmm5,[rel PW_MF050_MF256] ; xmm5=tmp5H + pmaddwd xmm3,[rel PW_MF256_F050] ; xmm3=tmp6L + pmaddwd xmm7,[rel PW_MF256_F050] ; xmm7=tmp6H paddd xmm1,xmm2 ; xmm1=data5L paddd xmm5,xmm6 ; xmm5=data5H paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H - paddd xmm1,[PD_DESCALE_P2] - paddd xmm5,[PD_DESCALE_P2] + paddd xmm1,[rel PD_DESCALE_P2] + paddd xmm5,[rel PD_DESCALE_P2] psrad xmm1,DESCALE_P2 psrad xmm5,DESCALE_P2 - paddd xmm3,[PD_DESCALE_P2] - paddd xmm7,[PD_DESCALE_P2] + paddd xmm3,[rel PD_DESCALE_P2] + paddd xmm7,[rel PD_DESCALE_P2] psrad xmm3,DESCALE_P2 psrad xmm7,DESCALE_P2 diff --git a/common/jpeg/simd/jfsseflt-64.asm b/common/jpeg/simd/jfsseflt-64.asm index d8f72468..859ff54f 100644 --- a/common/jpeg/simd/jfsseflt-64.asm +++ b/common/jpeg/simd/jfsseflt-64.asm @@ -159,7 +159,7 @@ EXTN(jsimd_fdct_float_sse): addps xmm6,xmm4 ; xmm6=tmp11 addps xmm0,xmm5 - mulps xmm0,[PD_0_707] ; xmm0=z1 + mulps xmm0,[rel PD_0_707] ; xmm0=z1 movaps xmm7,xmm1 movaps xmm4,xmm5 @@ -182,13 +182,13 @@ EXTN(jsimd_fdct_float_sse): addps xmm3,xmm6 ; xmm3=tmp11 addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 - mulps xmm3,[PD_0_707] ; xmm3=z3 + mulps xmm3,[rel PD_0_707] ; xmm3=z3 movaps xmm1,xmm2 ; xmm1=tmp10 subps xmm2,xmm6 - mulps xmm2,[PD_0_382] ; xmm2=z5 - mulps xmm1,[PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) - mulps xmm6,[PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + mulps xmm2,[rel PD_0_382] ; xmm2=z5 + mulps xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) addps xmm1,xmm2 ; xmm1=z2 addps xmm6,xmm2 ; xmm6=z4 @@ -294,7 +294,7 @@ EXTN(jsimd_fdct_float_sse): addps xmm6,xmm4 ; xmm6=tmp11 addps xmm0,xmm5 - mulps xmm0,[PD_0_707] ; xmm0=z1 + mulps xmm0,[rel PD_0_707] ; xmm0=z1 movaps xmm7,xmm1 movaps xmm4,xmm5 @@ -317,13 +317,13 @@ EXTN(jsimd_fdct_float_sse): addps xmm3,xmm6 ; xmm3=tmp11 addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 - mulps xmm3,[PD_0_707] ; xmm3=z3 + mulps xmm3,[rel PD_0_707] ; xmm3=z3 movaps xmm1,xmm2 ; xmm1=tmp10 subps xmm2,xmm6 - mulps xmm2,[PD_0_382] ; xmm2=z5 - mulps xmm1,[PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) - mulps xmm6,[PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + mulps xmm2,[rel PD_0_382] ; xmm2=z5 + mulps xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) addps xmm1,xmm2 ; xmm1=z2 addps xmm6,xmm2 ; xmm6=z4 diff --git a/common/jpeg/simd/jiss2flt-64.asm b/common/jpeg/simd/jiss2flt-64.asm index 572909d2..a3d4a549 100644 --- a/common/jpeg/simd/jiss2flt-64.asm +++ b/common/jpeg/simd/jiss2flt-64.asm @@ -181,7 +181,7 @@ EXTN(jsimd_idct_float_sse2): addps xmm4,xmm2 ; xmm4=tmp10 addps xmm5,xmm3 ; xmm5=tmp13 - mulps xmm1,[PD_1_414] + mulps xmm1,[rel PD_1_414] subps xmm1,xmm5 ; xmm1=tmp12 movaps xmm6,xmm4 @@ -231,13 +231,13 @@ EXTN(jsimd_idct_float_sse2): subps xmm2,xmm5 addps xmm1,xmm5 ; xmm1=tmp7 - mulps xmm2,[PD_1_414] ; xmm2=tmp11 + mulps xmm2,[rel PD_1_414] ; xmm2=tmp11 movaps xmm3,xmm0 addps xmm0,xmm4 - mulps xmm0,[PD_1_847] ; xmm0=z5 - mulps xmm3,[PD_M2_613] ; xmm3=(z10 * -2.613125930) - mulps xmm4,[PD_1_082] ; xmm4=(z12 * 1.082392200) + mulps xmm0,[rel PD_1_847] ; xmm0=z5 + mulps xmm3,[rel PD_M2_613] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[rel PD_1_082] ; xmm4=(z12 * 1.082392200) addps xmm3,xmm0 ; xmm3=tmp12 subps xmm4,xmm0 ; xmm4=tmp10 @@ -344,7 +344,7 @@ EXTN(jsimd_idct_float_sse2): addps xmm4,xmm2 ; xmm4=tmp10 addps xmm5,xmm3 ; xmm5=tmp13 - mulps xmm1,[PD_1_414] + mulps xmm1,[rel PD_1_414] subps xmm1,xmm5 ; xmm1=tmp12 movaps xmm6,xmm4 @@ -375,13 +375,13 @@ EXTN(jsimd_idct_float_sse2): subps xmm2,xmm5 addps xmm1,xmm5 ; xmm1=tmp7 - mulps xmm2,[PD_1_414] ; xmm2=tmp11 + mulps xmm2,[rel PD_1_414] ; xmm2=tmp11 movaps xmm3,xmm0 addps xmm0,xmm4 - mulps xmm0,[PD_1_847] ; xmm0=z5 - mulps xmm3,[PD_M2_613] ; xmm3=(z10 * -2.613125930) - mulps xmm4,[PD_1_082] ; xmm4=(z12 * 1.082392200) + mulps xmm0,[rel PD_1_847] ; xmm0=z5 + mulps xmm3,[rel PD_M2_613] ; xmm3=(z10 * -2.613125930) + mulps xmm4,[rel PD_1_082] ; xmm4=(z12 * 1.082392200) addps xmm3,xmm0 ; xmm3=tmp12 subps xmm4,xmm0 ; xmm4=tmp10 @@ -396,7 +396,7 @@ EXTN(jsimd_idct_float_sse2): subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) subps xmm2,xmm3 ; xmm2=tmp5 - movaps xmm1,[PD_RNDINT_MAGIC] ; xmm1=[PD_RNDINT_MAGIC] + movaps xmm1,[rel PD_RNDINT_MAGIC] ; xmm1=[rel PD_RNDINT_MAGIC] pcmpeqd xmm3,xmm3 psrld xmm3,WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..} @@ -423,7 +423,7 @@ EXTN(jsimd_idct_float_sse2): subps xmm7,xmm2 ; xmm7=data5=(05 15 25 35) subps xmm5,xmm4 ; xmm5=data3=(03 13 23 33) - movaps xmm2,[PD_RNDINT_MAGIC] ; xmm2=[PD_RNDINT_MAGIC] + movaps xmm2,[rel PD_RNDINT_MAGIC] ; xmm2=[rel PD_RNDINT_MAGIC] pcmpeqd xmm4,xmm4 psrld xmm4,WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..} @@ -439,7 +439,7 @@ EXTN(jsimd_idct_float_sse2): por xmm3,xmm7 ; xmm3=(04 05 14 15 24 25 34 35) por xmm1,xmm5 ; xmm1=(02 03 12 13 22 23 32 33) - movdqa xmm2,[PB_CENTERJSAMP] ; xmm2=[PB_CENTERJSAMP] + movdqa xmm2,[rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP] packsswb xmm6,xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35) packsswb xmm1,xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37) diff --git a/common/jpeg/simd/jiss2fst-64.asm b/common/jpeg/simd/jiss2fst-64.asm index 97dfa761..ab6af50c 100644 --- a/common/jpeg/simd/jiss2fst-64.asm +++ b/common/jpeg/simd/jiss2fst-64.asm @@ -172,7 +172,7 @@ EXTN(jsimd_idct_ifast_sse2): paddw xmm5,xmm3 ; xmm5=tmp13 psllw xmm1,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm1,[PW_F1414] + pmulhw xmm1,[rel PW_F1414] psubw xmm1,xmm5 ; xmm1=tmp12 movdqa xmm6,xmm4 @@ -212,7 +212,7 @@ EXTN(jsimd_idct_ifast_sse2): paddw xmm3,xmm0 ; xmm3=tmp7 psllw xmm4,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm4,[PW_F1414] ; xmm4=tmp11 + pmulhw xmm4,[rel PW_F1414] ; xmm4=tmp11 ; To avoid overflow... ; @@ -225,9 +225,9 @@ EXTN(jsimd_idct_ifast_sse2): movdqa xmm0,xmm5 paddw xmm5,xmm2 - pmulhw xmm5,[PW_F1847] ; xmm5=z5 - pmulhw xmm0,[PW_MF1613] - pmulhw xmm2,[PW_F1082] + pmulhw xmm5,[rel PW_F1847] ; xmm5=z5 + pmulhw xmm0,[rel PW_MF1613] + pmulhw xmm2,[rel PW_F1082] psubw xmm0,xmm1 psubw xmm2,xmm5 ; xmm2=tmp10 paddw xmm0,xmm5 ; xmm0=tmp12 @@ -337,7 +337,7 @@ EXTN(jsimd_idct_ifast_sse2): paddw xmm0,xmm3 ; xmm0=tmp13 psllw xmm5,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm5,[PW_F1414] + pmulhw xmm5,[rel PW_F1414] psubw xmm5,xmm0 ; xmm5=tmp12 movdqa xmm1,xmm2 @@ -373,7 +373,7 @@ EXTN(jsimd_idct_ifast_sse2): paddw xmm5,xmm6 ; xmm5=tmp7 psllw xmm2,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm2,[PW_F1414] ; xmm2=tmp11 + pmulhw xmm2,[rel PW_F1414] ; xmm2=tmp11 ; To avoid overflow... ; @@ -386,9 +386,9 @@ EXTN(jsimd_idct_ifast_sse2): movdqa xmm6,xmm4 paddw xmm4,xmm0 - pmulhw xmm4,[PW_F1847] ; xmm4=z5 - pmulhw xmm6,[PW_MF1613] - pmulhw xmm0,[PW_F1082] + pmulhw xmm4,[rel PW_F1847] ; xmm4=z5 + pmulhw xmm6,[rel PW_MF1613] + pmulhw xmm0,[rel PW_F1082] psubw xmm6,xmm7 psubw xmm0,xmm4 ; xmm0=tmp10 paddw xmm6,xmm4 ; xmm6=tmp12 @@ -426,7 +426,7 @@ EXTN(jsimd_idct_ifast_sse2): psraw xmm4,(PASS1_BITS+3) ; descale psraw xmm7,(PASS1_BITS+3) ; descale - movdqa xmm2,[PB_CENTERJSAMP] ; xmm2=[PB_CENTERJSAMP] + movdqa xmm2,[rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP] packsswb xmm5,xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) packsswb xmm7,xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) diff --git a/common/jpeg/simd/jiss2int-64.asm b/common/jpeg/simd/jiss2int-64.asm index cfeb42d8..97317af7 100644 --- a/common/jpeg/simd/jiss2int-64.asm +++ b/common/jpeg/simd/jiss2int-64.asm @@ -196,10 +196,10 @@ EXTN(jsimd_idct_islow_sse2): punpckhwd xmm5,xmm3 movdqa xmm1,xmm4 movdqa xmm3,xmm5 - pmaddwd xmm4,[PW_F130_F054] ; xmm4=tmp3L - pmaddwd xmm5,[PW_F130_F054] ; xmm5=tmp3H - pmaddwd xmm1,[PW_F054_MF130] ; xmm1=tmp2L - pmaddwd xmm3,[PW_F054_MF130] ; xmm3=tmp2H + pmaddwd xmm4,[rel PW_F130_F054] ; xmm4=tmp3L + pmaddwd xmm5,[rel PW_F130_F054] ; xmm5=tmp3H + pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=tmp2L + pmaddwd xmm3,[rel PW_F054_MF130] ; xmm3=tmp2H movdqa xmm6,xmm0 paddw xmm0,xmm2 ; xmm0=in0+in4 @@ -274,10 +274,10 @@ EXTN(jsimd_idct_islow_sse2): punpckhwd xmm0,xmm7 movdqa xmm5,xmm2 movdqa xmm7,xmm0 - pmaddwd xmm2,[PW_MF078_F117] ; xmm2=z3L - pmaddwd xmm0,[PW_MF078_F117] ; xmm0=z3H - pmaddwd xmm5,[PW_F117_F078] ; xmm5=z4L - pmaddwd xmm7,[PW_F117_F078] ; xmm7=z4H + pmaddwd xmm2,[rel PW_MF078_F117] ; xmm2=z3L + pmaddwd xmm0,[rel PW_MF078_F117] ; xmm0=z3H + pmaddwd xmm5,[rel PW_F117_F078] ; xmm5=z4L + pmaddwd xmm7,[rel PW_F117_F078] ; xmm7=z4H movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H @@ -304,10 +304,10 @@ EXTN(jsimd_idct_islow_sse2): punpckhwd xmm0,xmm4 movdqa xmm3,xmm2 movdqa xmm4,xmm0 - pmaddwd xmm2,[PW_MF060_MF089] ; xmm2=tmp0L - pmaddwd xmm0,[PW_MF060_MF089] ; xmm0=tmp0H - pmaddwd xmm3,[PW_MF089_F060] ; xmm3=tmp3L - pmaddwd xmm4,[PW_MF089_F060] ; xmm4=tmp3H + pmaddwd xmm2,[rel PW_MF060_MF089] ; xmm2=tmp0L + pmaddwd xmm0,[rel PW_MF060_MF089] ; xmm0=tmp0H + pmaddwd xmm3,[rel PW_MF089_F060] ; xmm3=tmp3L + pmaddwd xmm4,[rel PW_MF089_F060] ; xmm4=tmp3H paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H @@ -323,10 +323,10 @@ EXTN(jsimd_idct_islow_sse2): punpckhwd xmm0,xmm6 movdqa xmm1,xmm2 movdqa xmm6,xmm0 - pmaddwd xmm2,[PW_MF050_MF256] ; xmm2=tmp1L - pmaddwd xmm0,[PW_MF050_MF256] ; xmm0=tmp1H - pmaddwd xmm1,[PW_MF256_F050] ; xmm1=tmp2L - pmaddwd xmm6,[PW_MF256_F050] ; xmm6=tmp2H + pmaddwd xmm2,[rel PW_MF050_MF256] ; xmm2=tmp1L + pmaddwd xmm0,[rel PW_MF050_MF256] ; xmm0=tmp1H + pmaddwd xmm1,[rel PW_MF256_F050] ; xmm1=tmp2L + pmaddwd xmm6,[rel PW_MF256_F050] ; xmm6=tmp2H paddd xmm2,xmm5 ; xmm2=tmp1L paddd xmm0,xmm7 ; xmm0=tmp1H @@ -348,7 +348,7 @@ EXTN(jsimd_idct_islow_sse2): psubd xmm2,xmm3 ; xmm2=data7L psubd xmm0,xmm4 ; xmm0=data7H - movdqa xmm3,[PD_DESCALE_P1] ; xmm3=[PD_DESCALE_P1] + movdqa xmm3,[rel PD_DESCALE_P1] ; xmm3=[rel PD_DESCALE_P1] paddd xmm5,xmm3 paddd xmm7,xmm3 @@ -372,7 +372,7 @@ EXTN(jsimd_idct_islow_sse2): psubd xmm7,xmm1 ; xmm7=data6L psubd xmm0,xmm6 ; xmm0=data6H - movdqa xmm1,[PD_DESCALE_P1] ; xmm1=[PD_DESCALE_P1] + movdqa xmm1,[rel PD_DESCALE_P1] ; xmm1=[rel PD_DESCALE_P1] paddd xmm4,xmm1 paddd xmm3,xmm1 @@ -410,7 +410,7 @@ EXTN(jsimd_idct_islow_sse2): psubd xmm5,xmm4 ; xmm5=data5L psubd xmm6,xmm2 ; xmm6=data5H - movdqa xmm7,[PD_DESCALE_P1] ; xmm7=[PD_DESCALE_P1] + movdqa xmm7,[rel PD_DESCALE_P1] ; xmm7=[rel PD_DESCALE_P1] paddd xmm3,xmm7 paddd xmm0,xmm7 @@ -436,7 +436,7 @@ EXTN(jsimd_idct_islow_sse2): psubd xmm0,xmm2 ; xmm0=data4L psubd xmm6,xmm7 ; xmm6=data4H - movdqa xmm2,[PD_DESCALE_P1] ; xmm2=[PD_DESCALE_P1] + movdqa xmm2,[rel PD_DESCALE_P1] ; xmm2=[rel PD_DESCALE_P1] paddd xmm1,xmm2 paddd xmm4,xmm2 @@ -536,10 +536,10 @@ EXTN(jsimd_idct_islow_sse2): punpckhwd xmm5,xmm2 movdqa xmm1,xmm6 movdqa xmm2,xmm5 - pmaddwd xmm6,[PW_F130_F054] ; xmm6=tmp3L - pmaddwd xmm5,[PW_F130_F054] ; xmm5=tmp3H - pmaddwd xmm1,[PW_F054_MF130] ; xmm1=tmp2L - pmaddwd xmm2,[PW_F054_MF130] ; xmm2=tmp2H + pmaddwd xmm6,[rel PW_F130_F054] ; xmm6=tmp3L + pmaddwd xmm5,[rel PW_F130_F054] ; xmm5=tmp3H + pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=tmp2L + pmaddwd xmm2,[rel PW_F054_MF130] ; xmm2=tmp2H movdqa xmm3,xmm7 paddw xmm7,xmm0 ; xmm7=in0+in4 @@ -610,10 +610,10 @@ EXTN(jsimd_idct_islow_sse2): punpckhwd xmm7,xmm4 movdqa xmm5,xmm0 movdqa xmm4,xmm7 - pmaddwd xmm0,[PW_MF078_F117] ; xmm0=z3L - pmaddwd xmm7,[PW_MF078_F117] ; xmm7=z3H - pmaddwd xmm5,[PW_F117_F078] ; xmm5=z4L - pmaddwd xmm4,[PW_F117_F078] ; xmm4=z4H + pmaddwd xmm0,[rel PW_MF078_F117] ; xmm0=z3L + pmaddwd xmm7,[rel PW_MF078_F117] ; xmm7=z3H + pmaddwd xmm5,[rel PW_F117_F078] ; xmm5=z4L + pmaddwd xmm4,[rel PW_F117_F078] ; xmm4=z4H movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H @@ -640,10 +640,10 @@ EXTN(jsimd_idct_islow_sse2): punpckhwd xmm7,xmm3 movdqa xmm1,xmm0 movdqa xmm3,xmm7 - pmaddwd xmm0,[PW_MF060_MF089] ; xmm0=tmp0L - pmaddwd xmm7,[PW_MF060_MF089] ; xmm7=tmp0H - pmaddwd xmm1,[PW_MF089_F060] ; xmm1=tmp3L - pmaddwd xmm3,[PW_MF089_F060] ; xmm3=tmp3H + pmaddwd xmm0,[rel PW_MF060_MF089] ; xmm0=tmp0L + pmaddwd xmm7,[rel PW_MF060_MF089] ; xmm7=tmp0H + pmaddwd xmm1,[rel PW_MF089_F060] ; xmm1=tmp3L + pmaddwd xmm3,[rel PW_MF089_F060] ; xmm3=tmp3H paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H @@ -659,10 +659,10 @@ EXTN(jsimd_idct_islow_sse2): punpckhwd xmm7,xmm6 movdqa xmm2,xmm0 movdqa xmm6,xmm7 - pmaddwd xmm0,[PW_MF050_MF256] ; xmm0=tmp1L - pmaddwd xmm7,[PW_MF050_MF256] ; xmm7=tmp1H - pmaddwd xmm2,[PW_MF256_F050] ; xmm2=tmp2L - pmaddwd xmm6,[PW_MF256_F050] ; xmm6=tmp2H + pmaddwd xmm0,[rel PW_MF050_MF256] ; xmm0=tmp1L + pmaddwd xmm7,[rel PW_MF050_MF256] ; xmm7=tmp1H + pmaddwd xmm2,[rel PW_MF256_F050] ; xmm2=tmp2L + pmaddwd xmm6,[rel PW_MF256_F050] ; xmm6=tmp2H paddd xmm0,xmm5 ; xmm0=tmp1L paddd xmm7,xmm4 ; xmm7=tmp1H @@ -684,7 +684,7 @@ EXTN(jsimd_idct_islow_sse2): psubd xmm0,xmm1 ; xmm0=data7L psubd xmm7,xmm3 ; xmm7=data7H - movdqa xmm1,[PD_DESCALE_P2] ; xmm1=[PD_DESCALE_P2] + movdqa xmm1,[rel PD_DESCALE_P2] ; xmm1=[rel PD_DESCALE_P2] paddd xmm5,xmm1 paddd xmm4,xmm1 @@ -708,7 +708,7 @@ EXTN(jsimd_idct_islow_sse2): psubd xmm4,xmm2 ; xmm4=data6L psubd xmm7,xmm6 ; xmm7=data6H - movdqa xmm2,[PD_DESCALE_P2] ; xmm2=[PD_DESCALE_P2] + movdqa xmm2,[rel PD_DESCALE_P2] ; xmm2=[rel PD_DESCALE_P2] paddd xmm3,xmm2 paddd xmm1,xmm2 @@ -740,7 +740,7 @@ EXTN(jsimd_idct_islow_sse2): psubd xmm4,xmm1 ; xmm4=data5L psubd xmm0,xmm7 ; xmm0=data5H - movdqa xmm5,[PD_DESCALE_P2] ; xmm5=[PD_DESCALE_P2] + movdqa xmm5,[rel PD_DESCALE_P2] ; xmm5=[rel PD_DESCALE_P2] paddd xmm6,xmm5 paddd xmm2,xmm5 @@ -766,7 +766,7 @@ EXTN(jsimd_idct_islow_sse2): psubd xmm2,xmm7 ; xmm2=data4L psubd xmm0,xmm5 ; xmm0=data4H - movdqa xmm7,[PD_DESCALE_P2] ; xmm7=[PD_DESCALE_P2] + movdqa xmm7,[rel PD_DESCALE_P2] ; xmm7=[rel PD_DESCALE_P2] paddd xmm3,xmm7 paddd xmm1,xmm7 @@ -777,7 +777,7 @@ EXTN(jsimd_idct_islow_sse2): psrad xmm2,DESCALE_P2 psrad xmm0,DESCALE_P2 - movdqa xmm5,[PB_CENTERJSAMP] ; xmm5=[PB_CENTERJSAMP] + movdqa xmm5,[rel PB_CENTERJSAMP] ; xmm5=[rel PB_CENTERJSAMP] packssdw xmm3,xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73) packssdw xmm2,xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74) diff --git a/common/jpeg/simd/jiss2red-64.asm b/common/jpeg/simd/jiss2red-64.asm index 381180c7..dcf745bd 100644 --- a/common/jpeg/simd/jiss2red-64.asm +++ b/common/jpeg/simd/jiss2red-64.asm @@ -186,10 +186,10 @@ EXTN(jsimd_idct_4x4_sse2): punpckhwd xmm5,xmm1 movdqa xmm0,xmm4 movdqa xmm1,xmm5 - pmaddwd xmm4,[PW_F256_F089] ; xmm4=(tmp2L) - pmaddwd xmm5,[PW_F256_F089] ; xmm5=(tmp2H) - pmaddwd xmm0,[PW_F106_MF217] ; xmm0=(tmp0L) - pmaddwd xmm1,[PW_F106_MF217] ; xmm1=(tmp0H) + pmaddwd xmm4,[rel PW_F256_F089] ; xmm4=(tmp2L) + pmaddwd xmm5,[rel PW_F256_F089] ; xmm5=(tmp2H) + pmaddwd xmm0,[rel PW_F106_MF217] ; xmm0=(tmp0L) + pmaddwd xmm1,[rel PW_F106_MF217] ; xmm1=(tmp0H) movdqa xmm6,xmm2 movdqa xmm7,xmm2 @@ -197,10 +197,10 @@ EXTN(jsimd_idct_4x4_sse2): punpckhwd xmm7,xmm3 movdqa xmm2,xmm6 movdqa xmm3,xmm7 - pmaddwd xmm6,[PW_MF060_MF050] ; xmm6=(tmp2L) - pmaddwd xmm7,[PW_MF060_MF050] ; xmm7=(tmp2H) - pmaddwd xmm2,[PW_F145_MF021] ; xmm2=(tmp0L) - pmaddwd xmm3,[PW_F145_MF021] ; xmm3=(tmp0H) + pmaddwd xmm6,[rel PW_MF060_MF050] ; xmm6=(tmp2L) + pmaddwd xmm7,[rel PW_MF060_MF050] ; xmm7=(tmp2H) + pmaddwd xmm2,[rel PW_F145_MF021] ; xmm2=(tmp0L) + pmaddwd xmm3,[rel PW_F145_MF021] ; xmm3=(tmp0H) paddd xmm6,xmm4 ; xmm6=tmp2L paddd xmm7,xmm5 ; xmm7=tmp2H @@ -229,8 +229,8 @@ EXTN(jsimd_idct_4x4_sse2): movdqa xmm3,xmm5 ; xmm5=in2=z2 punpcklwd xmm5,xmm0 ; xmm0=in6=z3 punpckhwd xmm3,xmm0 - pmaddwd xmm5,[PW_F184_MF076] ; xmm5=tmp2L - pmaddwd xmm3,[PW_F184_MF076] ; xmm3=tmp2H + pmaddwd xmm5,[rel PW_F184_MF076] ; xmm5=tmp2L + pmaddwd xmm3,[rel PW_F184_MF076] ; xmm3=tmp2H movdqa xmm4,xmm1 movdqa xmm0,xmm2 @@ -248,7 +248,7 @@ EXTN(jsimd_idct_4x4_sse2): psubd xmm5,xmm6 ; xmm5=data3L psubd xmm3,xmm7 ; xmm3=data3H - movdqa xmm6,[PD_DESCALE_P1_4] ; xmm6=[PD_DESCALE_P1_4] + movdqa xmm6,[rel PD_DESCALE_P1_4] ; xmm6=[rel PD_DESCALE_P1_4] paddd xmm1,xmm6 paddd xmm2,xmm6 @@ -272,7 +272,7 @@ EXTN(jsimd_idct_4x4_sse2): psubd xmm2,xmm7 ; xmm2=data2L psubd xmm3,xmm6 ; xmm3=data2H - movdqa xmm7,[PD_DESCALE_P1_4] ; xmm7=[PD_DESCALE_P1_4] + movdqa xmm7,[rel PD_DESCALE_P1_4] ; xmm7=[rel PD_DESCALE_P1_4] paddd xmm4,xmm7 paddd xmm0,xmm7 @@ -326,10 +326,10 @@ EXTN(jsimd_idct_4x4_sse2): punpckhwd xmm6,xmm3 movdqa xmm5,xmm1 movdqa xmm2,xmm6 - pmaddwd xmm1,[PW_F256_F089] ; xmm1=(tmp2) - pmaddwd xmm6,[PW_MF060_MF050] ; xmm6=(tmp2) - pmaddwd xmm5,[PW_F106_MF217] ; xmm5=(tmp0) - pmaddwd xmm2,[PW_F145_MF021] ; xmm2=(tmp0) + pmaddwd xmm1,[rel PW_F256_F089] ; xmm1=(tmp2) + pmaddwd xmm6,[rel PW_MF060_MF050] ; xmm6=(tmp2) + pmaddwd xmm5,[rel PW_F106_MF217] ; xmm5=(tmp0) + pmaddwd xmm2,[rel PW_F145_MF021] ; xmm2=(tmp0) paddd xmm6,xmm1 ; xmm6=tmp2 paddd xmm2,xmm5 ; xmm2=tmp0 @@ -337,7 +337,7 @@ EXTN(jsimd_idct_4x4_sse2): ; -- Even part punpcklwd xmm0,xmm3 - pmaddwd xmm0,[PW_F184_MF076] ; xmm0=tmp2 + pmaddwd xmm0,[rel PW_F184_MF076] ; xmm0=tmp2 movdqa xmm7,xmm4 paddd xmm4,xmm0 ; xmm4=tmp10 @@ -345,7 +345,7 @@ EXTN(jsimd_idct_4x4_sse2): ; -- Final output stage - movdqa xmm1,[PD_DESCALE_P2_4] ; xmm1=[PD_DESCALE_P2_4] + movdqa xmm1,[rel PD_DESCALE_P2_4] ; xmm1=[rel PD_DESCALE_P2_4] movdqa xmm5,xmm4 movdqa xmm3,xmm7 @@ -375,7 +375,7 @@ EXTN(jsimd_idct_4x4_sse2): punpckhdq xmm6,xmm0 ; xmm6=(20 21 22 23 30 31 32 33) packsswb xmm4,xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..) - paddb xmm4,[PB_CENTERJSAMP] + paddb xmm4,[rel PB_CENTERJSAMP] pshufd xmm2,xmm4,0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..) pshufd xmm1,xmm4,0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..) @@ -457,8 +457,8 @@ EXTN(jsimd_idct_2x2_sse2): movdqa xmm5,xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57) punpcklwd xmm4,xmm1 ; xmm4=(10 30 11 31 ** ** 13 33) punpcklwd xmm5,xmm3 ; xmm5=(50 70 51 71 ** ** 53 73) - pmaddwd xmm4,[PW_F362_MF127] - pmaddwd xmm5,[PW_F085_MF072] + pmaddwd xmm4,[rel PW_F362_MF127] + pmaddwd xmm5,[rel PW_F085_MF072] psrld xmm0,WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --) pand xmm1,xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37) @@ -466,8 +466,8 @@ EXTN(jsimd_idct_2x2_sse2): pand xmm3,xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77) por xmm0,xmm1 ; xmm0=(11 31 13 33 15 35 17 37) por xmm2,xmm3 ; xmm2=(51 71 53 73 55 75 57 77) - pmaddwd xmm0,[PW_F362_MF127] - pmaddwd xmm2,[PW_F085_MF072] + pmaddwd xmm0,[rel PW_F362_MF127] + pmaddwd xmm2,[rel PW_F085_MF072] paddd xmm4,xmm5 ; xmm4=tmp0[col0 col1 **** col3] paddd xmm0,xmm2 ; xmm0=tmp0[col1 col3 col5 col7] @@ -494,7 +494,7 @@ EXTN(jsimd_idct_2x2_sse2): psubd xmm3,xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **) psubd xmm5,xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7) - movdqa xmm2,[PD_DESCALE_P1_2] ; xmm2=[PD_DESCALE_P1_2] + movdqa xmm2,[rel PD_DESCALE_P1_2] ; xmm2=[rel PD_DESCALE_P1_2] punpckldq xmm6,xmm3 ; xmm6=(A0 B0 ** **) @@ -533,8 +533,8 @@ EXTN(jsimd_idct_2x2_sse2): packssdw xmm1,xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3) packssdw xmm7,xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7) - pmaddwd xmm1,[PW_F362_MF127] - pmaddwd xmm7,[PW_F085_MF072] + pmaddwd xmm1,[rel PW_F362_MF127] + pmaddwd xmm7,[rel PW_F085_MF072] paddd xmm1,xmm7 ; xmm1=tmp0[row0 row1 row0 row1] @@ -550,12 +550,12 @@ EXTN(jsimd_idct_2x2_sse2): punpckldq xmm6,xmm4 ; xmm6=(C0 D0 C1 D1) - paddd xmm6,[PD_DESCALE_P2_2] + paddd xmm6,[rel PD_DESCALE_P2_2] psrad xmm6,DESCALE_P2_2 packssdw xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1) packsswb xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..) - paddb xmm6,[PB_CENTERJSAMP] + paddb xmm6,[rel PB_CENTERJSAMP] pextrw ebx,xmm6,0x00 ; ebx=(C0 D0 -- --) pextrw ecx,xmm6,0x01 ; ecx=(C1 D1 -- --)