Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRB

Test 1: uops

Code:

  ldrb w0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005394311441013832112425100010001000152671398398221325610001000100039877111001100010001100043103803810386139447311611395141071000399395399399399
1004398310440013812111925100010001000152080398398221325610001000100039877111001100010000100043103803810386139447311611391141071000399399399399399
10043983004400138321211925100010001000152670398394221325610001000100039877111001100010000100043103803810386139447311611395141471000399399399399399
1004398300440013832111925100010001000152670398398224325610001000100039877111001100010001100043103803810386139447311611395141471000399399399399399
1004398300440013832111925100010001000152740398398221325610001000100039877111001100010001100043103803810386138447311611395141471000399399399399399
10043983004400138321211625100010001000152740398398217325210001000100039877111001100010001100043103803810386139447311611395141071000399399399399399
1004398300440013832111925100010001000152080398398221325610001000100039877111001100010000100044103803810386138437311611395141471000399399399399399
10043943004410138321212125100010001000152080398398221325610001000100039877111001100010000100044103803810386138447311611395141471000399399399399399
10043983004400138331211625100010001000152670398398217325610001000100039877111001100010000100043103903910396138447311611395141471000399399399399399
1004398300440013832112025100010001000152670403398221325610001000100039877111001100010001100044103803810396139437311611395141471000399399399395399

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrb w0, [x6]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40205700535241110000201017003869784597012540108301031000230100100006161493352528149669737004170041646373649564010030200100006020010000700653511402011009910010000301001000001001000230100020141000011010261027111701213000666010000301007005470054700547005470054
40204700535241010000201007003869702597012540104301061000230100100006160323342350149669737005370053646493649564010030200100006020010221700533511402011009910010000301001000001001000211100030111000011112261017111698163000666010000301007004270055700427005670054
4020470053524111002130701017003869702597012540108301031000130100100006160783342350149669737005370041646493649564010030200100006020010000700533511402011009910010000301001000001001000111100180011000011111261017111698163000666610000301007005470054700797004870054
40204700535251100000401017003869702597122540108301061000230100100006160323342350149669737005370053646493649444010030200100006020010000700533511402011009910010000301001000001001000121100010111000011113261017111698163000666610000301007005470054700547004270054
40204700535241000000701017003869702597012540108301061000230100100006160323341769149669737004170053646373649564010030200100006020010000700413511402011009910010000301001000001001000220100010211000001110261017111698163000666610000301007004970054700547005470054
40204700415251000000201017013169702597122540108301061000230100100006160323341769149669737004170053646493649564010030200100006020010000700533511402011009910010000301001000001001000110100020211000011112261017111698043000666610000301007042270054700427005470042
40204700535241101100200007003869784597122540104301061000130100100006160323341769149669617005370053646493649524010030200100006020010000700533511402011009910010000301001000001001000221100020111000011111261017111698163000666610000301007005470054700547005470054
40204700535241110100200017003869702597012540122301131000230100100006160783342350149669737005370053646493649594010030200100006020010000700413511402011009910010000301001000001001000241100020311000011111261017111698163000666610000301007005470054700547005470042
402047005352411100001010070038697845971312440108301061000230100100006160323341769149669737005370053646493649564010030200100006020010000700533511402011009910010000301001000001001000211100015311000011011261017111698163000666610000301007004270042700547005470054
40204700535251211000701007003869785597882540108301061000230100100006160323342350149672517004170053646433649564010030200100006020010000700533511402011009910010000301001000001001000120100020011000011111261017111698163000666010000301007004270054700427005470054

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570057524111000020007002069743597102540014300131000130010100006169913342254149669717005170051646690364965400103002010000600201000070051351140021109101000030010100001101000011000000100001125201711169814300030101010000300107005270052700367005770052
4002470051524000000060007003669775597102540014300131000130010100006169913342254149669717003570051646720364976400103002010000600201000070051351140021109101000030010100000101000011000303100001125201711169814300030101010000300107005270052700367005270052
4002470051525000000010107002069775596952540014300131000130010100006169913342254149669557005170051646530364976400103002010000600201000070035351140021109101000030010100000101000011000000100001025201711169814300030101010000300107003670052700527005270052
400247003552600000001010700206977559713444001430013100003001010000616991334225414966971700517005164653036496040010300201000060020100007003535114002110910100003001010000110100000100000010000102520171116981430003001310000300107003670052700367005270036
40024700515240000000101070020697755971025400143001310001300101000061699133422541496697170051700516466903649764001030020100006002010000700513511400211091010000300101000001010000010000001000001255517111698143000310101010000300107005270036700527003670052
40024700515240000000100070036697755971025400103001010001300101000061706833414701496697170051700356466903649764001030020100006002010000700513511400211091010000300101000001010000110000101000011252017111698143000013101010000300107005270036700527005270055
400247005152500000001010700366974359710254001430010100013001010000616991334225414966971700517005164653036497640010300201000060020100007007335114002110910100003001010000110100001100003121000311252017111698143000310101010000300107005570052700527003670036
40024700515250000000100070020697435971025400143001310001300101000061699133414701496697170051700516466903649764001030020100006002010000700513511400211091010000300101000001010000110000001000011252017111698173000010101310000300107005270052700527005270036
400247005152400000001010700366977559710254001430013100013001010000617068334225414966971700357003564669736496040010300201000060020100007005135114002110910100003001010000010100001100000010000112520171116981730000010010000300107005270036700557005270052
400247005152500000001010700366977859710254001430013100013001010000616991334225414966971700517005164669036496040010300201000060020100007005135114002110910100003001010000010100001100001010000102520171116981430003010010000300107005270052700367005270052

Test 3: throughput

Count: 8

Code:

  ldrb w0, [x6]
  ldrb w0, [x6]
  ldrb w0, [x6]
  ldrb w0, [x6]
  ldrb w0, [x6]
  ldrb w0, [x6]
  ldrb w0, [x6]
  ldrb w0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526728201000110101267122121202580100100800001008000050011657891492364726727267271665031678880100200800002008000026730561180201100991008000010080000010080000043800390003980039613943005110116112672410104800001002670826728267282672826728
802042672720000001451002669221212162580100100800001008000050011672310492364726707267271665031671580100200800002008000026735561180201100991008000010080000010080000043800390003980039013943005110116112672410104800001002672826728267082672826728
80204267272000000045101267122121216258010010080000100800005001166525049236472672726707166503168248010020080000200800002672777118020110099100800001008000001008000004380000000398003961394300511011611267040100800001002672826708267282670826708
80204267072000000045100267122121216258010010080000100800005001165789049236472672726727166503167978010020080000200800002671056118020110099100800001008000001008000004380039010428000061394300517611611267241004800001002672826728267282672826728
802042672720000001451012671221212162580100100800001008000050011657890492362726727267271663031674880100200800002008000026707771180201100991008000010080000010080000043800390003980039613943005110116112672410100800001002672826737267282672826708
8020426727200000004510126712212121625801001008000010080000500116723104923647267272672716650316774801002008000020080000267277711802011009910080000100800000100800000080039000380039013943005110140112672410100800001002672826732267282672826728
80204267272010000045100266962121216258010010080000100801785001167677149238092672726729166503168138010020080000200800002672777118020110099100800001008000011008000004380000000398003961043005110116112672610100800001002672826728267082672826728
80204267272000000045101267122121216258010010080000100800005001167231049236472672726727166503167388010020080000200800002672756118020110099100800001008000001008000004380039000428003961394300511011611267261004800001002670826728267282672826728
802042672720000000451012671621212025801001008000010080000500116743504923627267272672716650316753801002008000020080000267277711802011009910080000100800000100800000438029900039800396139430051103161126728004800001002672826732267372672926728
80204267272000000045011267120121216258010010080000100800005001167231049236472672726727166503166858010020080000200800002673677118020110099100800001008000001008000004380039010398003901394300511011611267241004800001002672826708267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd2d5map dispatch bubble (d6)d9dbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252673620010111066100326722077192580010108000010800005011677910149236560267372671516660316703800102080000208000026736851180021109108000010800001080020200800191022380040605843190050201111700108267281410080000102673226729267292673226732
8002426731200000000451000267222771925800101080000108000050116696001492365602673626737166813167218001020800002080000267378611800211091080000108000010800191943800591006180039011943190050200716001010267331313580000102671626737267382673826737
8002426736200101100553000226721377202580010108000010800005011673850149236570267362673616682316724800102080000208000026737861180021109108000010800001080020194380019001608004060194319115020010160071026705140480000102673226732267092673226709
800242673120000000045100126717211025800101080000108000050116675001492365302673126731166723167158001020800002080000267317711800211091080000108000010800000448000000038800396139000050200101600107267331313580000102673726737267162671626715
800242671420010110067100026716211219258001010800001080000501167201004923655026731267281667831672480010208000020800002673764118002110910800001080000108002019080019000218004061194319005020071600710267361313580000102673726737267372671626737
8002426736200111000661000267213702025800101080000108000050116710601492365702673626736166823168758001020800002080000267368511800211091080000108000010800201943800580002180039611901900502001016001010267331313580000102671526737267162673726738
800242671520010000066100326721307202580010108000010800005011668000149236560267362671516660316721800102080000208000026736851180021109108000010800001080019194380019101608004060600190050200816007102673370580000102673726715267372673726738
800252673720010100067100026721377202580010108000010800005011678520149236560267362673616660316726800102080000208000026715851180021109108000010800001080020194380019001618004061594319005020071600101026733013580000102673826738267152673826737
800242673620010010066100126716211025800101080000108000050116720101492365102673126731166763168538001020800002080000267366411800211091080000108000010800202043800191006180039615901910502007160071026734130080000102671626738267382673726737
8002426736200100101183100326700277212580010108000010800005011739750149236560267362671416660316695800102080000208000026736641180021109108000010800001080020204380059100618003961594319305020010160010726734013080000102673826737267152673726737