Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDURSH (32-bit)

Test 1: uops

Code:

  ldursh w0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)0e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005402311100641023881700202510001000100014583140240222532601000100010004028511100110001000010005910550551055612507551666400101001000384403384403403
100438231011025102368170193251000100010001539414024022243240100010001000402651110011000100001000010550551055015507561666379101001000383403403384403
100440231011061001387254032510001000100015411138340220532601000100010004026511100110001000010005910550251055012544756166639910001000383403403403403
1004402310101251023872541932510001000100014536140238322432601000100010003836511100110001000010000105502510256153447561655380101001000403403384384403
100440231010025101387174022510001000100015521140240222532601000100010004026511100110001000010005910250251055612544756165539901061000383384384403383
100438331011061001387250022510001000100015411140240222532621000100010004026511100110001000010000105502510556025447561666380101061000403403403384403
100438231010061102387254192025100010001000145801402402224326010001000100038385111001100010000100059105505510566125075616663990061000403403403384403
1004383310100610023672501932510001000100015423140240220532411000100010003838511100110001000010005910250551055602544756165639910061000384403383403403
100440231010061002387174192025100010001000154231402402226326010001000100040265111001100010000100001025055105560550755165539901061000403403383403403
10044022101002510238717019325100010001000154691402382205324010001000100040285111001100010000100059105505510556125447561656399101061000403403384403403

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldursh w0, [x6, #1]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570053524110000010000700266978459771254010830106100023010010057616059334235049669617005670053646370364944401003020010000602001000070056351140201100991001000030100100000100100031110003001110000111110261027111698163000666010000301007004270057700547011870042
4020470056525111000070000700416978459770254010830106100013010010000616059334249449669767005970056646370364956401003020010000602001000070056351140201100991001000030100100000100100022110008001202610003111120261017111698193000696910000301007004570042701297009070057
4020470254526112100020100700266970259758254015930106100023010010000616032334268649669767005370056646490364959401003020010000602001000070154351140201100991001000030100100000100100013010003021110000111120261017111698053000366610000301007005470043700427004270058
402047005652511100008104000702296986759813254010830103100023010010000616032334249449669767004170056646520364959401003020010000602001000070056351140201100991001000030100100000100100022110004010410000111110261017111698193000699610000301007004270057700547004270054
40204700565251110000130000700446988359719254010830106100013010010000616078334235049669767005370420648030364959401003020010000602001000070419351140201100991001000030100100000100100021110001101110000111100261017111698193000666910000301007005470054700547005470054
4020470041524110000010001700266986859714254010830106100023010010000616059334249449669767005370053646490364959401003020010000602001000070056351140201100991001000030100100000100100032110001021110000010110261017111698043000600610000301007006770057700547012770057
4020470056525101000020000700416987459742254010430106100023010010000616059334176949669767005670056646490364959401003020010000602001000070042351140201100991001000030100100000100100011110002001110000111110261017111698163000366910000301007005770042700547013270057
4020470053524111000010000700266978459716254010830106100023010010000616032334249449669617005670041646520364956401003020010000602001005270041351140201100991001000030100100000100100012110001002110000111110261017111698193000699010000301007004270054700427014570057
4020470041525111000020000700266978459755254010830106100023010010000616032334249449669767004170053646490364956401003020010000602001000070041351140201100991001000030100100000100100011110002000110000110100261017111698193000600910000301007005770057700547013470042
4020470041525111000020000700456978459739254010830106100023010010000616059334176949669767004170056646490364959401003020010000602001000070056351140201100991001000030100100001100100023110002001110000110110261017111698193000606910000301007004270054701547010970057

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03090e0f1e22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257004752500011070032697285970625400223001310001300101000061695233420624966967700507005064668364978400103002010000600201000070047351140021109101000030010100000101000011000000100001125200147175698103000366610000300107004870048700367004870090
40024700475241000107003569743597062540014300131000130010100006169523346910496696470047700356466536497240010300201000060020100007005035114002110910100003001010000010100001100000010000012520057155698103000006010000300107005170048700507003670048
40024700355250001007003269760597092540010300131000130010100006177803344078496697670050700476466836497540010300201000060020100007005035114002110910100003001010000010100001100000010000112520057176698103000306610000300107004870036700487004870048
40024700475251111107003269728596952540014300131000130010100006178783344174496697070035700356466836497540010300201000060020100007004735114002110910100003001010000010100000100000010000112520057166698133000309610000300107003670048700487003670048
40024700355251001007003269728597062540014300131000130010100006179273344414496696770035700476466536497240010300201000060020100007004735114002110910100003001010000110100030100000010000112520057155698103000366010000300107004870036700487004870048
40024700475240101007003269760597092540014300131000130010100006177253344030496697070050700476466536497240010300201000060020100007004735114002110910100003001010000010100001100000010000112520057155698103000396010000300107005170048700367004870051
40024700475251001007005869763597092540014300131000130010100006177083343166496696770035700356466536496040010300201000060020100007004735114002110910100003001010000010100001100000010000112520057155697983000369610000300107005170048700517004870048
40024700505250001107003269743597952540014300131000130010100006176083341518496696770047700476466536497240010300201000060020100007004735114002110910100003001010000010100001100000010000102520057145698133000000910000300107005170051700517004870048
40024700475250001007003269728596952540014300131000030010100006174293342206496695570047700476466836497240010300201000060020100007004735114002110910100003001010000010100000100000310000112520067156698133000366910000300107003670048700367004870036
400247005052600010070032697435969525400143001310001300101000061779833447984966970700477004764668364972400103002010000600201000070050351140021109101000030010100000101000011000001510000112520057166698103000060910000300107003670048700487003670036

Test 3: throughput

Count: 8

Code:

  ldursh w0, [x6, #1]
  ldursh w0, [x6, #1]
  ldursh w0, [x6, #1]
  ldursh w0, [x6, #1]
  ldursh w0, [x6, #1]
  ldursh w0, [x6, #1]
  ldursh w0, [x6, #1]
  ldursh w0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526724200004500126707201216258010010080000100800005001165789149236470267072672716650316680801002008000020080000267227111802011009910080000100800001100800003980035000800516135430511011621267291064800001002672826708267282672326728
8020426707200105700126712012121225801001008000010080000500116780814923642326841267231663331666580100200800002008000026722711180201100991008000010080000010080000398003902458003961000511011611267041064800001002672826728267082672826728
802042672720000460022671201212122580100100800001008000050011597471492362702672726707166303166658010020080000200800002672271118020110099100800001008000001008000008003900680039610430511011621268351064800001002672826728267282672826728
8020426727200004510226712212016258010010080000100800005001166525149236470267222672216630316685801002008000020080000267077111802011009910080000100800000100800003980039003980039613500511011611267291062800001002672826723267082672826708
80204267272000045102267120181204880100100800001008000050011665251492362702672226727166503166658010020080000200800002672271118020110099100800001008000001008000008003900398000061043051101161126712004800001002670826728267282672826728
802042672720000450002670721812122580100100800001008000050011672311492364702672226727166503166808010020080000200800002670771118020110099100800001008000001008000008003500358003560043051101161126725660800001002670826728267282670826708
8020426722200004500026707001202580100100800001008000050011665251492364702672226727166503166858010020080000200800002670771118020110099100800001008000001008000008003900388000001353905110116212671910100800001002672826708267232672826728
8020426707200004100026692012121225801001008000010080000500116652514923647026727267271665031668580100200800002008000026722711180201100991008000010080000010080000398003500358003961350051101161126704664800001002672826723267232672826728
802042670720000000226707212121225801001008000010080000500116723114923627026740267271665031668580100200800002008000026722561180201100991008000010080000010080000398003900398000061350151101161126707064800001002672826728267082672826708
80204267072001045002267072012124980100100800001008000050011672311492364702672726727166303166858010020080000200800002672271218020110099100800001008000001008000008003500358003961350051101161126722602800001002672326723267232672326723

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03mmu table walk data (08)0e0f18191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)dbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526737200001000000126713012016258001010800001080000501167008149236472672826708166753166888001020800002080000267277711800211091080000108000001080000008003903980000603900502016160117267051010480000102673226709267282673226728
80024267282000000044000026712212016258001010800001080000501171243049236472672826728166723167088001020800002080000267315611800211091080000108000001080000430800000398003961000502051631112267051010780000102673226728267292672826709
800242672820000000440001267162121162580010108000010800005011719740492364826708267271667231668880010208000020800002670877118002110910800001080000010800000080039039800396104305020121601010267051010080000102670926728267282672926709
80024267282000010044000126713201216258001010800001080000501172798049236282673126708166723167088001020800002080000267087711800211091080000108000001080000430800390388000061380050201216081326725100480000102672926709267292672926739
8002426708201000004501012731021212192580010108000010800005011877180492364826727267271667631670780010208000020800002672777118002110910800001080000010800000080038039800400139430502010160131326724100480000102672826709267292672926728
8002426708201000000000026693010192580010108000010800005011774750492364726728267081667231670880010208000020800002672777118002110910800001080000010800004308003900800396039430502012160106267051010480000102670926729267322672826728
8002426728200000004600012669321211925800101080000108000050117230504923647267272670816672316708800102080000208000026727771180021109108000010800000108000000800391398000061394405020916012926705100480000102673226732267092672926729
8002426727200000004501012671221121925800101080000108000050117242514923628267272673116672316711800102080000208000026731771180021109108000010800000108000043080039039800000139430502011160141226705010480000102672926732267322672926709
800242670820000000450101267130111625800101080000108000050116883104923628267272672716672316707800102080000208000026728771180021109108000010800000108000043080039040800390039430502061601410267251414780000102670926728267282672826709
80024267282000010045000126712012121625800101080000108000050117519404923648267082672716652316688800102080000208000026728561180021109108000010800000108000043080038008000061394305020101601082672870480000102672926728267322672826729