Skip to content
Snippets Groups Projects
Commit 5626bd42 authored by Simon Pilgrim's avatar Simon Pilgrim
Browse files

[X86] Fix SLM scheduler model for PMULLD (PR37059)

Adjust the PMULLD entry to match the Intel AoM numbers - PMULLD is a uop nightmare on SLM and we should model it as such.

We had reports of internal regressions the last time this was attempted (rG13a0f83a05ff), but no public repros, and tests I did last year when I had access to a SLM box failed to see anything. My hunch is that the more aggressive PMULLD -> PMADDWD folds we now perform might have helped. We can revisit this again if we ever receive an actual repro.

Fixes #36407
parent 656f0b82
No related branches found
No related tags found
No related merge requests found
......@@ -377,10 +377,8 @@ defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 5, [2]>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
// FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
defm : X86WriteResPairUnsupported<WritePMULLDY>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
......
......@@ -901,10 +901,10 @@ define <4 x i32> @test_mul_v4i32_v4i16_minsize(<4 x i16> %A) minsize {
define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
; SLM-LABEL: test_mul_v8i32_v8i16_minsize:
; SLM: # %bb.0:
; SLM-NEXT: pxor %xmm1, %xmm1
; SLM-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SLM-NEXT: movdqa {{.*#+}} xmm1 = [18778,18778,18778,18778]
; SLM-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SLM-NEXT: pxor %xmm3, %xmm3
; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SLM-NEXT: pmulld %xmm1, %xmm2
; SLM-NEXT: pmulld %xmm0, %xmm1
; SLM-NEXT: movdqa %xmm2, %xmm0
......@@ -946,13 +946,13 @@ define <8 x i32> @test_mul_v8i32_v8i16_minsize(<8 x i16> %A) minsize {
define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
; SLM-LABEL: test_mul_v16i32_v16i16_minsize:
; SLM: # %bb.0:
; SLM-NEXT: movdqa {{.*#+}} xmm3 = [18778,18778,18778,18778]
; SLM-NEXT: movdqa %xmm0, %xmm4
; SLM-NEXT: pxor %xmm3, %xmm3
; SLM-NEXT: pxor %xmm5, %xmm5
; SLM-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; SLM-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SLM-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
; SLM-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
; SLM-NEXT: movdqa {{.*#+}} xmm3 = [18778,18778,18778,18778]
; SLM-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
; SLM-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
; SLM-NEXT: pmulld %xmm3, %xmm4
; SLM-NEXT: pmulld %xmm3, %xmm0
; SLM-NEXT: pmulld %xmm3, %xmm2
......
......@@ -239,8 +239,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 * pmovzxwq (%rax), %xmm2
# CHECK-NEXT: 1 5 2.00 pmuldq %xmm0, %xmm2
# CHECK-NEXT: 1 8 2.00 * pmuldq (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * pmulld (%rax), %xmm2
# CHECK-NEXT: 7 11 11.00 pmulld %xmm0, %xmm2
# CHECK-NEXT: 7 14 11.00 * pmulld (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 ptest %xmm0, %xmm1
# CHECK-NEXT: 1 4 1.00 * ptest (%rax), %xmm1
# CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2
......@@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
# CHECK-NEXT: - - - 108.00 65.00 - - 54.00
# CHECK-NEXT: - - - 128.00 65.00 - - 54.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
......@@ -352,8 +352,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmovzxwq (%rax), %xmm2
# CHECK-NEXT: - - - 2.00 - - - - pmuldq %xmm0, %xmm2
# CHECK-NEXT: - - - 2.00 - - - 1.00 pmuldq (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pmulld %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulld (%rax), %xmm2
# CHECK-NEXT: - - - 11.00 - - - - pmulld %xmm0, %xmm2
# CHECK-NEXT: - - - 11.00 - - - 1.00 pmulld (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - ptest %xmm0, %xmm1
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 ptest (%rax), %xmm1
# CHECK-NEXT: - - - - 1.00 - - - roundpd $1, %xmm0, %xmm2
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment