Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRSB (unsigned offset, 32-bit)

Test 1: uops

Code:

  ldrsb w0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100540331101000122003388277025100010001000155970382403225324010001000100040364111001010001000010212001058001611040615943190732161137813051000404403403404404
100438131001000067002388079192510001000100015555038240320432611000100010003818511100101000100001019204310591016010000159431917311611400131301000403404404382404
1004403311000000670033673972025100010001000155310403381204326610001000100040364111001010001000010212001019102211040615943191731161137913051000404404405403404
10043823111111006700038830019251000100010001445604034022043261100010001000402641110010100010000101919431059101601040601943190731161140001351000382404404403383
1004403311110000211033663771925100010001000155550403381204326110001000100040364111001010001000010191901059001611040601901927311611400131351000382382383404404
10044033110100006700338827019251000100010001548204034032253261100010001000403641110010100010000101920431060101601039005943191731161140013001000404383383382404
1004382311110000210033872771925100010001000155260403403225326110001000100040385111001010001000010191943105910160100060194319273116114000051000404403382404404
10043813101000007600338827719251000100010001453304033822043260100010001000403641110010100010000101919431058002611000015843190731161140101301000404383404405382
10044043110100006700338807719251000100010001445604033812043261100010001000381851110010100010000101918010581012110406159431907311611378131301000404382404403403
10044032101000006600236907719251000100010001453304034032253260100010001000403641110010100010000101921010601066110000158019073116113790001000404404382403404

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrsb w0, [x6, #8]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570051525000000061007002069782597102540100301001000130100100006160143341470496695507005170051646500364957401003020010000602001000070054351140201100991001000030100100000100100022110001001100001111102610271116982330006010010000301007005570055700367005570055
40204700555240000010100070039697885971625401083010610002301001000061609533425424966977070060700606465403649634010030200100006020010000700573511402011009910010000301001000001001000001100000001000010000026101711169814300000131010000301007006170058700617005870042
40204700415251100000200070042697905971325401043010010000301001000061604133423984966974070054700356465003649574010030200100006020010000700513511402011009910010000301001000011001000220100020011000001110026101711169823300060131010000301007005570055700607005570055
4020470035525000000010007002069791597012540108301061000230100100006160953341769496698007006370060646530364944402963020010000602001000070060351140201100991001000030100100001100100000110000000100000000002610171116981430003013010000301007003670055700557005270052
402047005452400000000000700206979159719254010430106100023010010000616078334254249669610700607004164656036494440100302001000060200100007006235114020110099100100003010010000110010000001000000010000100000261017111698173000313101310000301007006370042700427006170061
40204700605241011000200070045697855969525401043010310001301001000061608633426384966971070054700546465003649384010030200100006020010000700513511402011009910010000301001000011001000331100020011000011111026101711169823300060131310000301007005570055700557003670055
40204700545240000000100070039697645971325401043010310000301001000061604133423984966974070054700546465003649574010030200100006020010000700353511402011009910010000301001000001001000001100000001000010100026101711169814300001313010000301007006170061700617006170061
40204700605251010000110070042697855971325401043010010001301001000061617533423984966974070054700356465003649574010030200100006020010000700543511402011009910010000301001000011001000210100014011000001012026101711169820300060131310000301007005570036700367005270055
402047005452500001001000700396979159719254010830106100023010010000616095334176949669800700607004164637036500640100302001000060200100007006035114020110099100100003010010000010010000001000000010000101000264517111698143000013101010000301007006170042700617006270066
40204700635251010000700070045697855971325401043010010000301001000061601433422544966974070054700546464703649574010030200100006020010000700543511402011009910010000301001000001001000111100010111000011011026101711169804300061313010000301007005270052700367005570036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03090e0f191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570047525101010107007369728597092540014300131000130010100006169523342206049669707005070050646653649724001030020100006002010000700353511400211091010000300101000001010000110009001000011000252027134698103000366610000300107004870051700487004870036
4002470035524100010007003269728597062540014300101000130010100006169523342062049669707005070050646683649604001030020100006002010000700473511400211091010000300101000001010000110000001000011000252027144698103000366610000300107005170051700517005170048
4002470047525011010107003569743597063840014300131000130010100006169523342062049669677005070047646693649724001030020100006002010000700473511400211091010000300101000001010000110000001000010000252027123698133000396910000300107003670048701457006270050
4002470047525100010107003569760597092540014300131000130010100006169523342206049669677004770047646683649724001030020100006002010000700473511400211091010000300101000001010000110000101000011000252027143698133000396610000300107005170048700367003670048
4002470047524011000107003269760597062540014300131000130010100006169523342062049669557004770050646653649724001030020100006002010000700503511400211091010000300101000001010000010000001000011000252027134697983000306610000300107004870048700487004870036
4002470047525000010107003269728597062540014300131000030010100006170063342110049669677004770050646653649724001030020100006002010000700473511400211091010000300101000011010000110000001000011000252027144697983000366610000300107004870048700487004870048
4002470047524000010007003269728597712540014300131000430010100006169523342062049669677004770047646653649724001030020100656002010000700473511400211091010000300101000001010000110000001000011000252027122698103000366610000300107005170048700517003670048
4002470035525111000107003269728597062540014300131000130010100006169523341470049669677005070050646683649754001030020100006002010000700503511400211091010000300101000001010000110000001000001000252027122698103000366610000300107004870048700487003670048
4002470047525000010107003269760597062540014300101000130010100006169523341470049669707005070050646683649724001030020100006002010000700503511400211091010000300101000001010000110000001000011000252047144698103000366010000300107004870036700487004870036
4002470047525000010107003269728597062540014300131000130010100006169823342206049669557005070035646533649724022030020100006002010000700503511400211091010000300101000011010000110000001000011000252027122698103000060610000300107005170048700487004870048

Test 3: throughput

Count: 8

Code:

  ldrsb w0, [x6, #8]
  ldrsb w0, [x6, #8]
  ldrsb w0, [x6, #8]
  ldrsb w0, [x6, #8]
  ldrsb w0, [x6, #8]
  ldrsb w0, [x6, #8]
  ldrsb w0, [x6, #8]
  ldrsb w0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)030e0f1e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267362010161000126720184192025801001008000010080000500116792414923655267352673516658316673801002008000020080000267156511802011009910080000100800000100800000800551055800256155445110116332671201006800001002673626736267362671626736
80204267352000061000126720170193258010010080000100800005001169525049236552673526735166583166738010020080000200800002673385118020110099100800001008000001008000059800550025800256025445110216232673201306800001002673826736267162673626716
8020426735200002500012678921402025801001008000010080000500116792404923655267352673516636316673801002008000020080000267358511802011009910080000100800000100800005980185190558002561554451102162326732013106800001002673626736267162673626736
8020426735200106100012672017019192580100100800001008000050011663340492365526715267151665831667380100200800002008000026736851180201100991008000010080000010080000598002500558005561254451103164226732010106800001002673826736267172671626736
802042673520010610000267201741920258010010080000100800005001177584049236352673526715166583166938010020080000200800002671585118020110099100800001008000001008000059800250055800556155051104164226732010106800001002673626736267162673626716
802042673520000250000267011741932580100100800001008000050011667101492363626715267351665831669380100200800002008000026735851180201100991008000010080000010080000598005500548005561554451103162426732010106800001002671626736267362673726736
802042673520000610001267201741920258010010080000100800005001167564049236552671526715166383166938010020080000200800002673585118020110099100800001008000011008000059800550025800556155051105163126732010100800001002673026716267362671626736
802042673520000610000267201741920258010010080000100800005001167468149236552673526735166583166938010020080000200800002671585118020110099100800001008000001008000008005500558002560254451104164226732010100800001002671626736267162671626736
8020426735200002500002672017419522580100100800001008000050011775840492363626735267351665831669380100200800002008000026735851180201100991008000010080000010080000598005500258005500554451104163126712010106800001002674426736267452673626717
80204267352000061000126720174192025801001008000010080000500117758404923635267352673516658316693801002008000020080000267366511802011009910080000100800000100800000800555058800256125445110316412673201004800001002673626736267162673726737

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)090e0f1e223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)dbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526727200001000012671620121625800101080000108000050116689614923647267282670816652316717800102080000208000026727771180021109108000010800000108000000800390003980039613943005020416033267311010080000102672826728267282672826728
8002426708200000004510266930112192580010108000010800005011668861492364726731267311667231668880010208000020800002670877118002110910800001080000010800000080039000080039611943005020216032267241410080000102672826728267282673726728
80024267282000000001126716001125800101080000108000050116712414923628267282670816652316711800102080000208077126734771180021109108000010800001108000004380039000398003960394400502031603326728010780000102675726900268932672926729
8002426731200000004510267132121216258001010800001080000501166896149236472672726728166523167088001020800002080000267087711800211091080000108000001080000043800000003980039603943005020316033267251010480000102673226709267092670926732
8002426728200000004401267160121219258001010800001080000501167124149236512672826708166763167088001020800002080000267317711800211091080000108000001080000043800390003980039613944005020216023267251310480000102670926729267092672926728
80024267082000000001126712312121625800101080000108000050116712414923651267312673116672316688800102080000208000026727771180021109108000010800000108000004380000000398003961394300502031603326725100080000102673826738267382673726716
80024267282000000067112671221200258001010800001080000501168843149236282673126727166723167088001020800002080000267277711800211091080000108000001080000043800390100800396119431905020316033267051010480000102671626716267092670926737
80024267152001000045012671201212025800101080000108000050116675014923628267282672816672316707800102080000208000026738771180021109108000010800000108000000800000000800006139430050203160332670500480000102672826730267092673426729
8002426728200000004511266932121216258001010800001080000501168843149236472672726727166523167168001020800002080000267277711800211091080000108000001080000044800390003980038613943005020316032267241313580000102673226732267382673826715
8002426727200000000112671201200258001010800001080000501166750149236482670826708166723167088001020800002080000267087711800211091080000108000001080000043800390004280040613944005020416033267281414780000102672926709267322673226732