Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRB (unsigned offset)

Test 1: uops

Code:

  ldrb w0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10053993000001004110137621818162510001000100015362039939922132571000100010003998111100110001000010202142105710059103961574219173216113999921000400399400400400
10043993110101006510338421812122510001000100014838038938921232471000100010003897111100110001000010191942101910059103861574219073116113969921000395390390390390
100438930000000041101374218181225100010001000148380389389212324710001000100038971111001100010000100003910350003510356135390073116113886921000390395392390390
10043893000001004100237921818122510001000100014838038938921232471000100010003897111100110001000110191942105710059103861574219173116113969901000391390392390390
100438930000010041001374218181625100010001000153620399399222325610001000100039981111001100010000100003910350003510356139390073116113866921000400400400400400
100439931101000065103384218181225100010001000148380389391212324710001000100039171111001100010000100003910350003510356135430073116113866921000390390390390390
100438930000000041101374118181525100010001000153340398399221325710001000100039981111001100010001100003910350003510356135390073116113866921000390390390390390
10043913000000004110137421818122510001000100014774038938921232471000100010003947111100110001000010212042105700259103861594219073116113969921000390390390390390
100438920000010041101379218181525100010001000153340399399221325610001000100039982111001100010001100003910350003510356135390073116113866921000390390390390390
100438930000010041101374218181625100010001000153150398398221325710001000100039881111001100010001100003910350003510356135390073116113866921000400400400401400

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrb w0, [x6, #8]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0060

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e223a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057005752501111001170042697885971925401083010610002301001000061617533423981496697470060700606465636496040100302001000060200100007006035114020110099100100003010010000010010001111000200011000011010026156715569820300061010010000301007006170042700617005870061
4020470041524111000101700426979159716254010830106100013010010000616095334268614966977700607005764637364963401003020010000602001000070041351140201100991001000030100100000100100011110001010110000111110261357153698233000613131310000301007005870061700427004270061
4020470060524111000112700266979159716254011230106100023010010000616095334176914966961700607004164656364963401003020010000602001000070060351140201100991001000030100100000100100022010001000110000110120261357154698043000613131310000301007006170042700427004270058
402047006052511100021170042697025971925401043010610002301001000061609533426860496698070060700416463736496340100302001000060200100007005735114020110099100100003010010000010010001111000201111000011110026135715569828300030131310000301007005870061700427005870061
4020470060524110100101700456970259719254010430103100013010010000616078334254214966977700577005764656364944401003020010000602001000070060351140201100991001000030100100000100100022110002000110000011100261357135698203000613131310000301007005870058700617004270042
402047006052511100020170045697915971925401043010610002301001000061606833425420496698070060700576465636496340100302001000060200100007004135114020110099100100003010010000010010001311000100111000011012026153713469820300031301310000301007006170042700587005870058
402047007052411010010170045697915971625401043010610002301001000061609533426860496697770060700606465636494440100302001000060200100007006035114020110099100100003010010000010010001211000300241000001010026155715369804300061313010000301007004270061700617006170058
402047006052411110011270042697025971625401083010610002301001000061609533426860496698070060700606465636496340100302001000060200100007006035114020110099100100003010010000010010003111000200111000011110026136715369823300061301010000301007004270061700617006170058
402047004152411000021270045697885970125401043010610001301001000061609533426860496698070060700606465636496340100302001000060200100007004135114020110099100100003010010000010010001211000200211000011110026155715669804300030131310000301007006170128700577006770061
4020470060525111000101700426979159716254010830106100023010010000616078334268604966977700607005764656364944401003020010000602001000070041351140201100991001000030100100000100100022110002001110000010100261357155698233000610101010000301007006170061700617005870042

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0051

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f18191e223f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570051525101000107004269743597102540014300131000130010100006169913341470049669550700357005164669364976400103002010000600201000070051351140021109101000030010100000101000011000000100001100252027111698143000310101010000300107003670052700527005270052
4002470051525000000107003669775597102540010300131000130010100006169913341470149669710700517005164669364976400103002010000600201000070051351140021109101000030010100000101000011000000100001100252017112698143000310101010000300107003670036700527005270052
400247003552500010010700626977559710254001430013100013001010000617068334225404966955070051700356465336496040010300201000060020100007003535114002110910100003001010000010100001100000010000110025201711169802300060101010000300107005270052700527005270052
4002470051525000000607004269775597102540014300131000130010100006169913342254049669710700517005164669364976400103002010000600201000070051351140021109101000030010100000101000011000000100001100252017111698143000610111010000300107005270052700527005270052
4002470051525001000117004269777597102540014300101000130010100006170683342254049669710700367003864669364960400103002010000600201006670051351140021109101000030010100001101000011000000100001100252017111697983000610101010000300107003670036700367003670052
4002470051524000100117004269775597102540014300131000130010100006170683341470049669710700517005164653364976400103002010000600201000070051352140021109101000030010100000101000011000000100001100252017111698143000610101010000300107005270052700527005270052
4002470035525000000117004269775596952540014300131000130010100006169913342254049669710700517003564669364976400103002010000600201000070051351140021109101000030010100000101000011000003100000100252017111697983000610101010000300107003670052700367005270052
400247003552400100011700426974359695254001430013100013001010000616991334225404966971070051700516466936497640010300201000060020100007005135114002110910100003001010000110100001100000010000110025201711169798300060101010000300107005270052700527005270052
4002470051525000000007004269743597102540014300131000130010100006169913342254049669710700517005164669364976400103002010000600201000070051351140021109101000030010100000101000001000010100001100252017111698143000310101010000300107005270052700527005270052
400247005152500100010700426974359710254001030013100003001010000616991334147004966972070051700766466936502640010300201000060020100007005135114002110910100003001010000010100001100000010000110025201711369814300060101010000300107003670036700367005270052

Test 3: throughput

Count: 8

Code:

  ldrb w0, [x6, #8]
  ldrb w0, [x6, #8]
  ldrb w0, [x6, #8]
  ldrb w0, [x6, #8]
  ldrb w0, [x6, #8]
  ldrb w0, [x6, #8]
  ldrb w0, [x6, #8]
  ldrb w0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526738200111000013800326699070202580100100800001008000050011667584923657267432686116665316694801002008000020080000267146411802011009901008000010080000010080018204380058010608000060194319105110116112673300130800001002671526715267372671526737
80204267142001110100670032672120702580100100800001008000050011673164923656267462687716667316694801002008000020080000267378511802011009901008000010080000110080019194380019101618004060580190051101161126733013130800001002671526715267372671526715
8020426736200100000010510226725070125801001008000010080000500116755549236342673926740166393166948010020080000200800002671485118020110099010080000100800000100800191908006020260800006158431910511011611267330000800001002671626715267372673726737
80204267362011111000541022669920020258010010080000100800005001166519492365626722268851666731669480100200800002008022426721851180201100990100800001008000001008002020080019001618003901190191051101161126733013135800001002673726737267372673726715
802042671420011000014000226721077192580100100800001008000050011675554923656267262682316663316694801002008000020080000267368511802011009901008000010080000010080020200800191006180040015943191051101161126734013130800001002673826737267372673726737
802042673620011100006600126699377202580100100800001008000050011667584923657267212688816663316672801002008000020080000267366411802011009901008000010080000010080021204380059102618004060584319105110116112671100135800001002673726737267372673726715
8020426736200101111067101266993772025801001008000010080000500116731649236562671926716166673166948010020080000200800002673785118020110099010080000100800000100800202043800581006080040616043191051101161126733013130800001002673726715267382673826737
80204267362001011000210012672237702580100100800001008000050011665194923635267472682316670316694801002008000020080000267366411802011009901008000010080000010080020180800590006480039015943191051101161126711013130800001002673726715267372671526715
80204267142001111000670032672137020258010010080000100800005001166519492363426745267381666731667280100200800002008000026714851180201100990100800001008000001008001920438005810121800396059431910511011611267340000800001002671526737267382682626715
802042673720011110008800226699297192580100100800001008000050011698684923656267242674316667316672801002008000020080000267146411802011009901008000010080000110080019210800591006180000615943190051101161126711013135800001002671526715267372673726738

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03090e0f1e223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252672420010041012676420121125800101080000108000050116760549236282672226728166673166888001020800002080000267085611800211091080000108000001080000080035008000061005020516242671966080000102670926709267232672326709
80024267222000100122671821818122580010108000010800005011676054923628267082672216652316688800102080000208000026727561180021109108000010800000108000039800350080039600050204164226719100480000102672826729267292670926723
80024267272000000002670701818122580010108000010800005011688434923628267222672216667316688800102080000208000026728711180021109108000010800001108000008003503980035613539502041642267051010080000102670926709267282672826729
800242672820000145112671021212025800101080000108000050116760549236482672826727166723167078001020800002080000267087111800211091080000108000011080000398000000800006135435020216422671900080000102672826728267282672826709
8002426708200000010267092121202580010108000010800005011667504923628267082672216667316708800102080000208000026708711180021109108000010800000108000008000003980039610435020216242672566080000102670926723267232672926729
800242672720001045012671021818122580010108000010800005011668864923648267282672216667316702800102080000208000026708711180021109108000010800000108000039800000398003561354350203164326719100080000102672826729267292672926728
8002426727200010451226715218012258001010800001080000501166886492364726708267281667231668880010208000020800002672871118002110910800001080000110800003980035108003561005020416242670560080000102670926723267232672326723
8002426708200000450026707018180258001010800001080000501166886492362826728267081665231668880010208000020800002670871118002110910800001080000110800003980000139800006135395020216242670566080000102672326729267092670926723
8002426708200000450226708201216258001010800001080000501167545492364726728267081667231670880010208000020800002672871118002110910800001080000110800003980039008003901354350204164426705100080000102670926709267232670926729
8002426728200000000267152120162580010108000011800005011667504923647267282672716652316688800102080000208000026708561180021109108000010800000108000008003500800006135395020416432670506280000102672326723267232672826709