Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PRFM (register, PLDL1STRM)

Test 1: uops

Code:

  prfm pldl1strm, [x6]
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.001

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)
1004244510011100010003491010001000100011000
1004209710011100010003490610001000100011000
1004209710011100010003498010001000100011000
1004207810011100010003495010001000100011000
1004210010011100010003499010001000100011000
1004209710011100010003495010001000100011000
1004210110011100010003489610001000100011000
1004211110011100010003483010001000100011000
1004210310011100010003488210001000100011000
1004209710011100010003490610001000100011000

Test 2: throughput

Code:

  prfm pldl1strm, [x6]
  add x6, x6, 64

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0110

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
20204211642010310103100001010210006614053504652011810214100141020310003100011000010100
20204201012010610106100001010510000617203477612010610208100081020410004100011000010100
20204200642010310103100001010410000613713495932010410206100061020410004100031000010100
20204199982010110101100001010010006614673501412011810214100141020410004100031000010100
20204202142010510105100001011010006614053504652011810214100141021410014100061000010100
20204201102010610106100001011210006614053504652011810214100141021410014100061000010100
20204201102010610106100001011210006614053504652011810214100141021410014100061000010100
20204201102010610106100001011210039619023501072018210245100461021410014100061000010100
20204201102010610106100001011210006614053504652011810214100141021410014100061000010100
20204201102010610106100001011210006614053504652011810214100141021410014100061000010100

1000 unrolls and 10 iterations

Result (median cycles for code): 1.9959

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
20024202362001610016100001002210006602923478152002810034100141002010000100011000010010
20024198902001110011100001001010000612763475532001010020100001002010000100011000010010
20024198662001110011100001001010000607413467012001010020100001002010000100011000010010
20024198552001110011100001001010000612663492332001010020100001002010000100011000010010
20024200092001110011100001001010000610833470672001010020100001002010000100011000010010
20024199442001110011100001001010000602563472372001010020100001002010000100011000010010
20024199442001110011100001001010000602563472372001010020100001002010000100011000010010
20024199442001110011100001001010000602563472372001010020100001002010000100011000010010
20024200162001110011100001001010000608893470152001010020100001002010000100011000010010
20024200002001110011100001001010000599743477352001010020100001002010000100011000010010

Test 3: throughput

Code:

  prfm pldl1strm, [x6]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0958

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
102042084810101101010000100010057307364555101592021006920010004110000100
102042055710101101010000100010006300361670101062001001220010008110000100
102042077110101101010000100010004300363354101042001001220010008110000100
102042079010101101010000100010004300361410101042001001220010004110000100
106822326210664444010220401010004300357180101042001001220010012110000100
102042096210101101010000100010048307366025101502021006220010012110000100
7336917111468824379437030811371976910000300348060101002001000820010008110000100
102042001110101101010000100010002300345402101022001001220010004110000100
102042005810101101010000100010004300345122101042001001220010004110000100
102042051210101101010000100010000300357056101002001000620010008110000100

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0050

retire uop (01)cycle (02)schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)? int output thing (e9)? ldst retires (ed)? int retires (ef)
1002420052100111110000101000630349222100162010012201000011000010
1002420119100111110000101000030349214100102010000201000011000010
1002420048100111110000101000030349214100102010000201000011000010
1002420048100111110000101000030349214100102010000201000011000010
1002420048100111110000101000030349214100102010000201000011000010
1002420048100111110000101000030349214100102010000201000011000010
1002420048100111110000101000030349214100102010000201000011000010
1002420048100111110000101000030349214100102010000201000011000010
1002420048100111110000101000030349214100102010000201000011000010
1002420048100111110000101000030349214100102010000201000011000010