Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDP (signed offset, S)

Test 1: uops

Code:

  ldp s0, s1, [x6, #0x10]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)03050708090a0b0e0f1e223a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)92inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
20054082110010008113393157202525100010001000158031363408408106314110002000100038940811100141000100001019215810730007510396142019076516664061010510001000409409409390409
2004388211101100810239316019252510001000100015800138340840886314110002000100040840811100141000100001020195610420007610220172451907561666405010010001000409409409409390
2004408311001000430239322020272510001000100014906138340840886314110002000100038940811100141000100001021190104210075105561734519175616664051010010001000390389390409409
2004408311001000881237415706251000100010001485313833894081063121100020001000408408111001010001000010202158107300044105501734519076616654051010510001000390409409409409
20043893111111118411374150202525100010001000157651383408408106314110002000100040840811100141000100001021205810411114410546073019075616554051010510001000409409409409390
20044083111110014402393237206251000100010001483713644084081063141100020001000409408111001410001000010192001042101741054617345190756165540500510001000389409409409409
2004389311001101430337415720252510001000100015800138339138910631411000200010003883891110014100010000101919581073012751054014201907551666405100010001000409409409409409
20044083110010008114393167202525100010001000148561383408388863122100020001000388389111001410001000010192158107320175105461710190756166640500510001000409409409390390
200440821111100082123931572025251000100010001479613834084081063122100020001000408409111001410001000110172058107300044105561734519075516554051010510001000409390409409409
20044083110011008102393220202525100010001000148391383408388106314110002000100038840811100141000100001020205810420007510546172451927551656386010510001000390409409409409

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldp s0, s1, [x6, #0x10]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire (01)cycle (02)030e0f1e22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a7a8acafb5dcache load miss (bf)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
602051200518991010012003911950910946125601034010010000100003010010000100001079396573623061182851120030120054120054111881311240750100302002000010000602001000010000120035120035115020110099100401001000010000110010000010000000100001100032101121111196664000210100100001000040100120055120055120055120055120055
602041200548990011012003611950910946125601064010210001100003010010000100001079396573647061182851120030120054120035111900311241750100302002000010000602001000010000120054120051115020110099100401001000010000010010000110000000100001100132101121111196664000213100100001000040100120055120055120055120055120055
602041200548990010012003911950910946125601034010210001100003010010000100001079552573529361182851120030120054120051111900311240750100302002000010000602001000010000120035120051115020110099100401001000010000110010000010000000100001100032101121111196464000213109100001000040100120055120055120061120055120055
602041200548990010012003911950910945925601034010210001100003010010000100491079552573608461182851120496120051120054111900311240750100302002000010000602001000010000120145120051315020110099100401001000010000110010000110000220100001110032101108111196664000213109100001000040100120055120055120058120055120055
602041200548990010012003611949510946125601034010210000100003010010000100001079552573623061182851120090120054120054111900311240750100302002000010000602001000010000120057120051115020110099100401001000010000010010000110000000100001100032101121111196664000210109100001000040100120036120055120055120055120055
602041200358990010012003611950910945925601034010210001100003010010000100001079396573623061175991120011120054120054111900311240750100302002000010000602001000010000120146120038115020110099100401001000010000010010000010000003100001100032101108111196664000213109100001000040100120055120055120055120055120036
6020412005489900700120039119509109461256010340102100011000030100100001000010794055736374611828511200281200541200511119003112407501003020020000100006020010000100001200541200511150201100991004010010000100001100100001100000011000011100321011211111966640002131012100001000040100120055120036120055120055120036
602041200518990010012003611949510946125601034010210001100003010010000100001079396573623061175991120027120051120054111900311241750100302002000010000602001000010000120035120051115020110099100401001000010000110010000010000003100001100032101108111196664000210109100001000040100120052120055120055120055120052
60204120051899100001200391194951094432560103401001000010000301001000010000107955257362306118285112002712005112005111190031124075010030200200001000060200100001000012005412005111502011009910040100100001000011001000011000000010000110003210110811119656400020109100001000040100120052120052120055120055120055
602041200358990070012003611950910946125601034010210001100003010010000100001079396573623061182851120011120051120051111900311240750100302002000010000602001000010000120054120051115020110099100401001000010000010010000110000000100000100032101121111196564000213109100001000040100120036120055120055120036120055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)030708090e0f18191e1f22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5bbdcache load miss (bf)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
600251200518990010000101012003911948410944325600134001010000100003001010000100001079858573623061257461120011120054120035111903311245150271300202000010000600201000010000120054120051115002110910400101000010000110100001100001010000101003140199111196504000001012100001000040010120055120055120036120055120036
600241200358990000000000012003911948410944325600134001210000100003001010000100001079858573529361257461120030120054120054111922311245150010300202000010000600201000010000120035120051115002110910400101000010000110100001100000010000000003140199111196504000201012100001000040010120055120055120052120036120057
60024120035899000000010101200361194841094612560010400121000010000300101000010000107985857352936124344112002712003512005411192031124525001030020200001000060020100001000012003512005111500211091040010100001000001010001110000061000000100314019911119669400000012100001000040010120036120036120052120055120036
6002412005489900000001000120020119489109443256001340012100011000030010100001000010798885735293612574611200271200541200541119223112451500103002020000100006002010000100001200541200511150021109104001010000100001101000211000040100001010031401941111967040002131312100001000040010120055120055120055120055120036
600241200358990000000100012002011948710946125600104001210001100003001010000100001079858573608461257461120030120054120035111922311243750010300202000010000600201000010000120035120051115002110910400101000010000010100001100000010000101003140194111196694000010109100001000040010120055120052120052120036120036
60024120035899000000010001200391194871094432560010400121000110000300101000010000107988857362306124344112003012005412005111190331124515001030020200001000060020100001000012005412005111500211091040010100001000011010000110000031000011100314019911119672400001300100001000040010120036120052120036120036120059
60024120052900000000010001200201194891094612560013400121000110000300101000010000107985857362306125899112003012005112005411190331124375001030020200001000060020100001000012005112005111500211091040010100001000001010000110000001000010100314019411119672400020012100001000040010120055120052120055120036120055
60024120054899000000040101200201194891094432560016400121000110000300101000010000107976957362306125746112001112005412005411190331124375001030020200001000060020100001000012003512003511500211091040010100001000001010000010000001000010100314019411119672400020130100001000040010120052120052120052120055120055
6002412003589900000001000120039119489109459256001040010100011000030010100621000010799485735341612434411200111200351200511119223112437500103002020000100006002010000100001200541200511150021109104001010000100000101000011000261010000101003329194111196724000213130100001000040010120077120038120058120055120055
6002412003589900000001201012003911948710946125600134001210001100003001010000100001079888573623061243441120030120054120035111922311245150010300202000010000600201000010064120054120054115002110910400101000010000110100001100000010000101003140199111196724000201312100001000040010120055120036120036120036120055

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ldp s0, s1, [x6, #0x10]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)0307090a0b0e0f18191e1f22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
60205120050899000000000010120035119538109443256010340102100011000030100100001000010795395735293611788201200261200471200501118953112374501003020020000100006020010000100001200501200351150201100991004010010000100001100100000110000100100001100321031352311965740002660100001000040100120054120051120036120048120048
60204120047899000011001000120035119503109458256010340102100011000030100100001000010795395735888611788201200231200471200471118953112374501003020020000100006020010000100001200471200471150201100991004010010000100001100100000110000000100001100321031353311965540002960100001000040100120054120151120053120036120036
60204120035899000000000000120035119494109443256010340102100011000030100100001000010791265736035611788201200681200501200351119033112416501003020020000100006020010000100001200501200471150201100991004010010000100001100100000110000000100000100321021083311965540002095100001000040100120049120036120073120057120037
60204120047899000000000000120020119494109458256010340102100011000030100100001000010791265735293611788201200261200501200501118953112416501003020020000100006020010000100001200471200471150201100991004010010000100001100100000010000100100001100321021353311965540002068100001000040100120051120051120048120036120051
60204120047899000000000000120020119538109458256011540102100011000030100100001000010795395735293611825001200111200351200501119063112416501003020020000100006020010000100001200501200471150201100991004010010000100001100100000010000000100000000321031083311965540002990100001000040100120036120036120048120051120048
60204120050899000000001010120020119494109458526010340100100011000030100100001000010795395735293611759901200111200501200501118813112416501003020020000100006020010000100001200501200351150201100991004010010000100001100100000110000000100000100321031353311965540002998100001000040100120056120036120036120036120049
60204120035899000000001010120032119503109458256010040100100011000030100100001000010795395736035611788201200111200501200501118953112421501003020020000100006020010000100001200501200471150201100991004010010000100001100100000110000000100001100321031352311965540002990100001000040100120072120058120051120036120051
60204120047899000000001000120035119494109458256010040102100011000030100100001000010795395736035611825001200261200471200501119033112416501003165722338112326727811273112341222781223122515020110099100401001000010000110010035011003714921861003311403936820910612158240002968100001000040100120037120048120063120054120931
602041206339166141003225422522881012304712082811041111306049340418100751006435345103671009810793305736035611825001226601214801200511121711961139145594434737231881166467374117511133912266412205826150201100991004010010000100001100100383110040120100001100321031083411965740002905100001000040100120051120151120144120036120051
60204120035899000000000001120035119494109458256010340102100011000030100100001000010795395735293611825001200261200471200501118813112416501003020020000100006020010000100001200501200351150201100991004010010000100000100100000010000003100000000321031103311965540002665100001000040100120051120036120151120049120039

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire (01)cycle (02)0304080e0f181e22243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
600251200578990001011001200201194921094602560013400101000010000300101000010000107985857360846125746112002712005412005411190331124485001030020200001000060020100001000012003512005111500211091040010100001000001010002111000000310000110031401194691196694000213012100001000040010120055120116120438120055120036
600241200358990000000001229221209081123838916001340012100021000232969116821150411376395735437612444811200301200351200351119033112437500103002020000100006002010000100001200511200541150021109104001010000100000101000001100000001000001003140899851196724000210100100001000040010120054120130120075120056120055
600241200548990000001001200201195631094612560013400101000110000300101000010000107988857352936125746112001112005412003511192231124375001030020200001000060020100001000012005412003511500211091040010100001000001010000011000000010000110031406995811965040002131312100001000040010120060120052120125120036120055
6002412005489900000600012003911958610945925600134001210001100003001010000100001079888573678061243441120030120035120035111919311243750010300202000010000600201000010000120035120051115002110910400101000010000010100000110000000100000000314079464119650400020139100001000040010120055120104120049120060120055
6002412003589900000100012002011952010946525600134001210001100003015510000100001079888573623061243441120011120035120054111922311245150010300202000010000600201000010000120054120051115002110910400101000010000010100000110000000100001100314089985119672400000109100001000040010120055120052120141120037120055
60024120035899000001000120039119576109550456001340010100011000030010100001000010797695735293612587911200351200541200541119033112437500103002020000100006002010000100001200541200351150021109104001010000100000101000001100000001000010003140699571196724000213130100001000040010120036120052120101120439120055
6002412005489900110300012003911960710945925600104001210000100003001010000100001079769573529361243441120030120054120054111922311245150010300202000010000600201000010000120035120051115002110910400101000010000010100000110000002011000011003140717461196724000013012100001000040010120055120052120120120058120036
60024120054899001101100120039119566109461256001340012100011000030010100001000010798885736084612587911200271200351200351119223112448500103002020000100006002010000100001200541200511150021109104001010000100000101000001100000001000001003140699571196724000010012100001000040010120058120052120121120037120055
600241200358990000011001200391195381094602560013400121000110000300101000010000107988857362306125746112003012005412005411192231124515001030020200001000060020100001000012005412005111500211091040010100001000001010000011000000010000100031403947511965040002131312100001000040010120055120055120109120038120055
6002412003589900000600012003911949810944425600134001210000100003001010000100001079888573529361258791120030120054120054111922311244850010300202000010000600201000010000120054120051115002110910400101000010000010100000010000000100000100314059979119650400020139100001000040010120055120052120104120079120055

Test 4: throughput

Count: 8

Code:

  ldp s0, s1, [x6, #0x10]
  ldp s0, s1, [x6, #0x10]
  ldp s0, s1, [x6, #0x10]
  ldp s0, s1, [x6, #0x10]
  ldp s0, s1, [x6, #0x10]
  ldp s0, s1, [x6, #0x10]
  ldp s0, s1, [x6, #0x10]
  ldp s0, s1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)030e0f191e223a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8acafb5b6bbdcache load miss (bf)c2cfd5d6ddinst fetch restart (de)e0e7eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160205267232010004102267122121216258010010080000100800005001174628126702267272672266503668580100200160000200800002673226738118020110099100100800008000001008000003928801710139800006135390511011611267190106280000800001002672326723267232672826728
1602042672720001045002671221812162580100100800001008000050011746280267022672726722665036685801002001600002008000026727267301180201100991001008000080000010080000039080039013980039610005110116112673301010480000800001002672326742267282672826723
160204267302010004502267122121812258010010080000100800005001174628126697267272672766503668580100200160000200800002673326722118020110099100100800008000001008000004308000000358003961394305110116112672401010480000800001002670826708267082672826728
160204267272000004510267122012162580100100800001008000050011701071266822672226722665036685801002001600002008000026733267311180201100991001008000080000010080000000800390035800396135430511011611267240610480000800001002672826723267082672826728
160204267072000004502267122121216258010010080000100800005001174628126697267272670766503668580100200160000200800002673226748118020110099100100800008000001008000003908003500080039613543051101161126724006480000800001002672826728267232670826728
1602042672720000045022671221212122580100100800001008000050011687541267022672726707665036685801002001600002008000026722267221180201100991001008000080000010080000000800350039800006135430511011611267240010480000800001002672826728267282672826728
160204267272000000022671231218122580100100800001008000050011688801266822670726727665036685801002001600002008000026729267291180201100991001008000080000010080130039080035004080000610430511011611267240410480000800001002672826728267082670826723
1602042672220000045022671221212162580100100800001008000050011746281267022672726727665036685801002001600002008000026734267281180201100991001008000080000010080000043080035003980039615139051101161126724006280000800001002672826723267282672826728
160204267272000004502267122121216258010010080000100800005001174628026702267272672766503668580100200160000200800002672926729118020110099100100800008000001008000003908003500358003961354305110116112672401010280000800001002672326728267302672326730
160204267272000004500267122121216258010010080000100800005001168880126702267072672766503668580100200160000200800002672626738118020110099100100800008000001008000003908003900398003961353905110116112671901010280000800001002672826723267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)0308090e0f1e22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8acafb5b6bbdcache load miss (bf)cficache miss (d3)d5d6d9dadbddinst fetch restart (de)e0e7eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002526734201011144101267162112192580010108000010800005011687540026702267272673166763671180010201600002080000267072672711800211091010800008000001080000043080039000800396039445020014161000117267240141008000080000102670826708267282673226728
160024267272000001441012671620002580010108000010800005011687540026702267072673166533676880010201600002080000267312672711800211091010800008000001080000043080038004180000603905020012166001010267340141478000080000102672826708267322673226728
16002426727200000044101267122121162580010108000010800005011746280026706267072673166763671180010201600002080000267072672711800211091010800008000001080000043080000003880000610445020010168001012267280101078000080000102673226732267282673226708
16002426731200000044101267160121216258001010800001080000501168880002670626707267316676366878001020160000208000026727267311180021109101080000800000108000004308000300080039603844502001016800610267260141048000080000102673226732267322670826732
1600242670720000000101266972101925800101080000108000050116888000267062670726731667236687800102016000020800002673126707118002110910108000080000010800000433280000003880039003943502001016800813267330141408000080000102670826732267322672826708
16002426731200000044000267120110258001010800001080000501173183002670626731267316673367078001020160000208000026731267271180021109101080000800000108000004308003800388003961044502006168001010267240141408000080000102670826732267082672826728
16002426731200000044000266922101625800101080000108000050116862701267062673126727667736711800102016000020800002673126707118002110910108000080000010800000430800380039800006104450200101680061026732001478000080000102673226708267282670826732
160024267312000000000126716211192580010108000010800005011746280026682267312673166763671180010201600002080000267072672711800211091010800008000001080000043080000004180039013944502006168001010267290141008000080000102673226708267082673226732
16002426727200000001012669220119258001010800001080000501168880012670226727267276653367118001020160000208000026731267271180021109101080000800000108000004308003800080000013844502006168001010267330141448000080000102673226732267322670826903
16002426707200001045101266920101625800101080000108000050117488700267062673126731667236687800102016000020800002673126707118002110910108000080000110800000430800380038800386139050200616800106267530141448000080000102673226732267322673226732