Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRSW (unsigned offset)

Test 1: uops

Code:

  ldrsw x0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10053943004500238221212162510001000100014844138939121232471000100010003897111100110001000100003910350351035613539731161138610641000390395395390390
10043892004500237921212162510001000100015018039439421632521000100010003947111100110001000100004310390391039613543731161139110641000395395395390395
100438930041002379312121725100010001000148381391389217325210001000100039472111001100010001000043103903910396135437311611391101041000395401395395395
10043892004500237921212162510001000100014838139439421732521000100010003947111100110001000100004310390391039613943731161139110641000395399395395395
100439430045002374218121225100010001000149891394394217324710001000100039471111001100010001000043103903910356139397311611386101021000390400395395390
10043893004500237921212162510001000100014838139439421732511000100010003897211100110001000100003910390351039613639731161139110641000395395395390395
100439430045002374212121625100010001000148381394394217325210001000100039472111001100010001000039103903910396135397311611386101041000395400392395395
100439430045000379212181625100010001000149890394394217325210001000100039471111001100010001000043103903910356135437311611386101041000395395395395390
10043953004100237421818112510001000100014838139439421732521000100010003947111100110001000100003910390361039613943731161139110641000390401390395395
100438930041101379212121625100010001000149891394394217325210001000100039471111001100010001000043103903910356135437311611391101041000395396395400395

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrsw x0, [x6, #8]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0051

retire uop (01)cycle (02)03mmu table walk instruction (07)090e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5e60696a6b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057005152511000010100070036697825971025401043010310001301001000061601433422541149669710700517005164647036495440100302001000060200100007005135114020110099100100003010010000010010000011000005201000011261037122698143000310101010000301007005270052700527003670054
40204700515240000001010007003669782597102540104301031000130100100006160143342254014966971070051700516463103649544010030200100006020010000700513511402011009910010000301001000001001000001100000000100001126102712269798300031001010000301007005270052700527005270052
40204700515250000001010007034469764597102540100301141000730100100006160143342254014966972070051700526465103649384010030200100006020010000700353511402011009910010000301001000001001000001100000000100000126102712269814300031001010000301007005570057700367005270052
40204700515250000001000007003669782597102540104301031000130100100006160143341470014966971070035700516464703649544010030200100006020010000700513511402011009910010000301001000001001000000100000000100001126102712269798300001001010000301007005270052700527005270052
402047003552400000010000070036697825971025401043010310001301001000061601433422540149669710700357005164647036495440100302001000060200100007005135114020110099100100003010010000010010000011000001001000011261027122698143000310101010000301007005270052700527003670036
40204700985250011001000007003669782597102540104301031000130100100006160143342254014966971070051700516464703649544010030200100006020010000700513511402011009910010000301001000001001000001100000100100000126102712269814300031010010000301007005270052700527005270052
40204700355250000001010007003669782597102540104301031000030100100006161753342254014966971070051700516464703649544010030200100006020010000700513511402011009910010000301001000001001000001100000200100001026102712269814300031010010000301007005270052700527009370055
402047005152400000040100070036697825971025401043010310000301001000061601433422540149669550700517005164647036495440100302001000060200100007005135114020110099100100003010010000010010000011000002001000011261027122698143000310101010000301007005270036700527005270054
402047005152400000010000070036697645971025401043010310001301001000061601433422540149669710700517005164647036495440100302001000060200100007005135114020110099100100003010010000010010000011000002001000011261027122698143000310101010000301007005270036700527005270056
4020470051524000000100000700206978259710254010430103100013010010000616014334225401496697107005170051646470364954401003020010000602001000070051351140201100991001000030100100001100100000110000056001000011261027122698143000310101010000301007005270052700527005270052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257005352510001102000170026697025977325400183001610002300101000061700933424941496697370056700536465903649664001030020100006002010000700533511400211091010000300101000001010001211000202110000110110252037112698193000690910000300107004270057700577005770057
400247005652511101102000070026697025971525400183001610002300101000061703633427341496696170056700416467103649814001030020100006002010000700533511400211091010000300101000011010002211000100110000111100252017112698043000696010000300107005770054700577006270057
400247005652410110102000070041697775971525400183001610002300101000061703633424941496697670056700416467403649664001030020100006002010000700563511400211091010000300101000001010001211000101110000111110252027112698193000696910000300107005470057700427005770054
400247005652410011101000070026697025970125400183001610002300101000061703633417691496696170056700416465903649814001030020100006002010000700563511400211091010000300101000001010001111000100110000110100252027111698043000396010000300107004270057700577005770057
400247005652411010002000170038697805971525400223001310002300101000061699533424941496697670056700566465903649664001030020100006002010000700563511400211091010000300101000001010002101000200110000011100252017111698253000696910000300107005770042700577005770054
400247005652411000002000070026697805971525400143001610002300101000061703633424941496697670056700566467403649664001030020100006002010000700413511400211091010000300101000001010001111000301110000111110252027121698193000396910000300107005770057700577005770057
400247004152410010102000170026697775971825400303001610002300101000061703633417691496697370056700536467403649784001030020100006002010000700533511400211091010000300101000001010002211000101110000110120252017111698043000696910000300107005770042700427005470057
400247005652411001002000070041697805971825400183001610002300101000061700933424941496697370041700566466603649814001030020100006002010000700413511400211091010000300101000001010002111000201110000011110252017111698193000666010000300107005770042700547004270057
400247005654311001102000070041697805970125400183001610001300101000061927033469581496696170056700566467403649814001030020100006002010000700413511400211091010000300101000001010001111000100410000111110252017112698043000390910000300107005770057700577005470057
400247005652410110002010070041697805980625400143001610002300101000061703633417691496697670056700416467403649664001030020100006002010000700563511400211091010000300101000011010002211000431110000110120254417112698043000396910000300107005770042700577005470042

Test 3: throughput

Count: 8

Code:

  ldrsw x0, [x6, #8]
  ldrsw x0, [x6, #8]
  ldrsw x0, [x6, #8]
  ldrsw x0, [x6, #8]
  ldrsw x0, [x6, #8]
  ldrsw x0, [x6, #8]
  ldrsw x0, [x6, #8]
  ldrsw x0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)030e0f181e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267232000004110126712218181525801001008000010080015500117192914923627267222672716635616674801152008002420080024267277111802011009910080000100800001008000039800003300800006135430111511801600267191064800001002670826728267082672826867
802042672420000041102267122121216258010010080000100800155001166596149236472670726727166356166798011520080024200800242672756118020110099100800001008000010080000080000303580000613539011151181160026704662800001002670826728267082689726811
80204267272000004510126692018121212580100100800001008000050011657891492364226727267271663031666580100200800002008000026707711180201100991008000010080000100800003980039103980039600430000511011611267241000800001002672826728267282672826740
802042670720000045002267120120124258010010080000100800005001166818149236272670726727166503166808010020080000200800002672771118020110099100800001008000010080000080035003580035603500000511011611267041004800001002672826728267082672826730
80204267162000004500226707218012258010010080000100800005001166525149236472672726722166503166658010020080000200800002672771118020110099100800001008000010080000398003929038000060000000511011611267240104800001002672826728267282672326723
802042673520000045000267122018112580100100800001008000050011665251492363526727267271663031668580100200800002008000026727711180201100991008000010080000100800003980035260398000001043000051101161126724060800001002672326731267292670826717
80204267282001004500226692001212258010010080000100800005001166525149236472670726727166303166858010020080000200800002670772118020110099100800001008000010080000080039323980035613543000051101161126724064800001002672826728267282670826725
802042672720000045100267120121211525801001008000010080000500116578914923647267272672716633316685801002008000020080000267277111802011009910080000100800001008000008003920080000610430000511011611267241000800001002670826728267082672826739
8020426722200000450002671220121182580100100800001008000050011770381492364226727267271665031668580100200800002008000026727721180201100991008000010080000100800003980000103980000613900000511011611267241062800001002672826708267282670826722
802042672720000000022671221201625801001008000010080000500116578914923647267272672716630316685801002008000020080000267277111802011009910080000100800001008000039800391042800396139430000511011611267241064800001002672826730267082672826708

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)d9dbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252673720011000000671003267222772258001010803901080000501169933004923656267372673716681316716800102080000208000026715861180021109110800001080000110800192045080019010678004161594319050200216003326733130080000102672226731267502672126738
80024267372001111001167000226721370202580010108000010800005011739750049236562673726736166813167168001020800002080000267408511800211090108000010800000108002119008005805166800006159019250200416003126711013580000102673726737267372679926893
800242673720010100000661002267210701925800101080000108000050116729801492365626753267151666031671780010208000020800002673685118002110901080000108000001080021204308005803803080040005843192502004160041267121313580000102671526716267162671626825
80024267482001001000066100226721207192580010108000010800005011730740149236562673726737166813167168001020800002080000267378611800211090108000010800000108015820430800590012180039615843190502031160013267331313580000102673726737267372673726829
80024267402001001000021100326722207192580010108000010800005011739750149236562673726736166813167168001020800002080000267148511800211090108000010800000108002021008005702927080000016043192502001160013267331313580000102671526715267152671626872
8002426748200110111006700022670020019258001010800001080000501169451014923635267152673616659316716800102080000208000026737851180021109010800001080000010800201900800591106380039615843191502005160083267341313580000102671926759267372701526810
80024267362001101000066100226728300192580010108000010800005011669600049236572673626736166633166948001020800002080000267368511800211090108000010800000108002120430800590206480040615943190502004160042267331313080000102673726737267152673726817
8002426751200110000006710012670020012580010108000010800005011694510049236562671526737166813166948001020800002080000267376411800211090108000010800000108002120008005715323680039605943191502003160025267121313580000102671526715267152673726873
800242674820010110000660001267213775258001010800001080000501165304004923634267372671416681316716800102080000208000026737851180021109010800001080000010800212043080019130073800006159019050200416004226711013580000102671526737267152673726737
80024267412001110100021000326722200192580010108000010800005011677930049236352673626736166823167178001020800002080000267378511800211090108000010800000108001920450800191390308004061614319050200116001326734013080000102671626715267382671526857