Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRSB (unsigned offset, 64-bit)

Test 1: uops

Code:

  ldrsb x0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e0f1e22243a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10053943104500135921121925100010001000140603983982213256100010001000398771110011000100011000431039001039613944731161137101071000395395399375395
1004394311441013832101625100010001000152083983982213256100010001000374561110011000100001000441039001000603944731161137114041000399399375375375
100439831144100383011025100010001000152083983742213256100010001000374771110011000100001000010000381038613944731161139501471000375399375399375
1004398300440003592010251000100010001527439839822132321000100010003747711100110001000010000103813810006138437311611391141471000399375395399399
100439830044101383011162510001000100015274398374221323210001000100039877111001100010000100044103814110386139447311611395101401000375399399375375
10043743000000384010192510001000100015267398398221325610001000100039877111001100010000100043103803810386000731161139114071000375375399376399
1004398200441003832010251000100010001406039837419732321000100010003997911100110001000010004310000381000613907311611395141401000375399400375375
100439830001013592111925100010001000152673743982213256100010001000398771110011000100001000431000001038600437311611395101041000399399399399399
100439830000013832121192510001000100015274398398197325710001000100039877111001100010000100043103803810386100731161137114041000375399395399399
100439831044101379211192510001000100015274398398217325610001000100039877111001100010000100043103800103801380731161137110071000399399399402375

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrsb x0, [x6, #8]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)03mmu table walk instruction (07)0e0f18191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057004752501100000070035697815970925401043010310001301001000061617533414700496707570050700506463136495040100302001000060200100007005035114020110099100100003010010000010010000110000000100001010261027111698133000066010000301007003670048700517004870051
402047004752500000000070037697815970925401003010310001301001000061617533422060496697070047700506464636495040100302001000060200100007003535114020110099100100003010010000010010000010000100100000000261017111698103000360910000301007004870048700517004870048
402047004752500100001070035697645969525401003010310001301001000061600533414700496697070035700356463136495340100302001000060200100007005035114020110099100100003010010000110010000010000000100000000261017111698133000366910000301007005170036700517005170048
402047005052500000000070020697815970625401043010310001301001000061600533422060496696770047700506464336495340100302001000060200100007003535114020110099100100003010010000010010000010000100100001010261017111698103000096610000301007005370036700517003670051
402047005052500000001070036697835969525401043010310001301001000061600533414700496695570050700356463136495340100302001000060200100007003535114020110099100100003010010000010010000110000000100001010261017111698103000360710000301007004870051700517005170036
402047005052400000100070020697855970873401003010010001301001000061600533420620496697070050700506463136495340100302001000060200100007005035114020110099100100003010010000010010000010000000100001010261017111698133000066010000301007005170048700487004870048
402047004752500000000070035697645970925401043010310001301001000061600533420620496697070035700356469236493840100302001000060200100007003535114020110099100100003010010000010010000010000000100001010261017111698133000060910000301007003670036700517003670048
402047005052400000101070032697815969525401043010310001301001000061600533422060496697070035700506464636493840100302001000060200100007005035114020110099100100003010010000110010000110000000100001010261017111697983000396010000301007003670036700487005170051
402047004752400000600070020697815969525401043010310001301001000061601533422060496697070047700356464636495040100302001000060200100007003535114020110099100100003010010000010010000110000000100001010261017111697983000060010000301007004870051700367003670051
402047005052400000000070035697815970625401003010310001301001000061600533414700496696770035700506464636495340100302001000060200100007004735114020110099100100003010010000110010000110000800100001010261017111697983000306010000301007003670051700367005170036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f223f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257005352500011001017003269728596952540010300131000130010100006169523341470004966967700357004764665364960400103002010000600201000070047351140021109101000030010100001101000001100000001000011025201671614697983000366610000300107004870048700487004870048
40024700355250001000101700326972859695254001430010100013001010000616988334206205496696770047700476467036497240010300201000060020100007004735114002110910100003001010000010100000110000000100001102520571135698103000366010000300107003670048700487004870036
400247003552400010001007002069743597062540014300131000130010100006170683342062054966967700357004764665364972400103002010000600201000070047351140021109101000030010100000101000001100000001000001025201171144698103000360010000300107004870048700487004870048
400247005052500000000007002069728596952540010300131000130010100006169523342062054966967700357003864665364972400103002010000600201000070047351140021109101000030010100000101000000100000001000010025201471514698103000066010000300107004870048700367004870048
400247004752400000001017002069743597062540014300131000130010100006169523341470054966974700357004764653364972400103002010000600201000070047351140021109101000030010100000101000001100000001000011025201471144698103000366610000300107004870036700367004870036
400247003552400000001007003269760597062540014300131000130010100006169523342206054966955700477003564653364960400103002010000600201000070047351140021109101000030010100000101000001100000001000010025201471114698103000066010000300107004870048700367004870036
40024700355250000000000700206974359695254001430010100003001010000616961334147005496695570047700476466536497240010300201000060020100007004735114002110910100003001010000010100000010000000100000002520571514698103000366610000300107004870048700367004870048
400247004752500000001007003569743597062540014300131000130010100006170683341470054966955700357004764665364960400103002010000600201000070035351140021109101000030010100000101000001100000001000000025206711414698103000060610000300107003670085700487004870051
400247005052400101000007003269728596952540014300101000130010100006170683342062054966955700477004764665364972400103002010000600201000070047351140021109101000030010100000101000000100000001000001025201471514697983000066010000300107005170036700367005170048
40024700475250000100100700326976059695254001430013100013001010000616952334206205496696770035700356465336497240010300201000060020100007004735114002110910100003001010000010100000010000000100001102520571513698103000360610000300107004870036700487003670036

Test 3: throughput

Count: 8

Code:

  ldrsb x0, [x6, #8]
  ldrsb x0, [x6, #8]
  ldrsb x0, [x6, #8]
  ldrsb x0, [x6, #8]
  ldrsb x0, [x6, #8]
  ldrsb x0, [x6, #8]
  ldrsb x0, [x6, #8]
  ldrsb x0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267282000000000661022670330720258010010080000100800005001172437049236562673726736166593166948010020080000200800002673664118020110099100800001008000011008002018008005900021800396119431910511031633267340104800001002672826708267322672826728
80204267072000000000450012689437720258010010080000100800005001166101149236342671426736166593166948010020080000200800002671464118020110099100800001008000001008002019430800181016080039615943191051103163326799005800001002673726716267372673826737
80204267142001011000670032670437719258010010080000100800005001167555049236342673626736166373166948010020080000200800002673664118020110099100800001008000011008002020430800591006180039615843192051103163226734007800001002670826732267082670826708
802042673120000000004510126730211202580100100800001008000050011671271492365126731267311665431668980100200800002008000026731771180201100991008000010080000110080000000800000003880038003800005110316232672814104800001002673226708267282670826728
8020426707200000000000012669221211925801001008000010080000500116652504923647267072673116654316665801002008000020080000267277711802011009910080000100800000100800000008003803504180038603800005110216332670414147800001002681626734268832672926728
8020426727200000000044000266980101625801001008000010080000500116712714923651267072673116654316689801002008000020080000267317711802011009910080000100800000100800000008000000008000060044000511031633267041407800001002673226732267282673226708
8020426731200000000044001267112011925801001008000010080000500116712714923651267312673116654316689801002008000020080000267317711802011009910080000100800000100800000430800380003880000603944000511031633267040144800001002673226732267112672826732
8020426731200000000044001267142001925801001008000010080000500116652504923651267312673116654316689801002008000020080000267275611802011009910080000100800000100800000008003800008003801044000511021633267281400800001002670826732267322670826732
802042673120000000004400126732211025801001008000010080000500116555614923627267272670716654316685801002008000020080000267077711802011009910080000100800000100800000430800380003880038613844000511031633267040144800001002670826708267322672826708
80204267312000000000000026727011216258010010080000100800005001165556049236272673126727166543166658010020080000200800002670777118020110099100800001008000001008000000080000000388003860390000511031623267281004800001002673226708267322673226732

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)daddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252674120100101170103267220771925800101080000108000050116710614923661267152674116689316717800102080000208000026736641180021109010800001080000010800212008006010121800386119431915020516045267341313580000102673726737267372673726738
8002426737200100001000126716210192580010108000010800005011667501492364826731267311665231670880010208000020800002672756118002110901080000108000011080000043800380003880039613944005020516065267051414080000102670926732267322673226732
80024267312000011116710326700200025800101080000108000050116721914923656267362671516681316716800102080000208000026714641180021109010800001080000010800192043800191006380000615943191502051605626735130580000102673726716267372673726715
800242673720110000144001267160001625800101080000108000050116750114923634267152673616659316716800102080000208000026715851180021109010800001080000010800192043800580006080040613944005020516065267051414480000102673226732267092672826732
80024267082000000114400026693000025800101080000108000050116720114923628267282673116676316711800102080000208000026708771180021109010800001080000010800000438000000038800390159431915020516056267341313580000102675726715267432671526737
8002426715200100000440012669300119258001010800001080000501166750049236712673126731166763167118001020800002080000267317711800211090108000010800000108000004380038000398003861180191502061605526734130580000102671526737267152673726737
80024267362001000004400126716201162580010108000010800005011667500492365126708267311665231671180010208000020800002673156118002110901080000108000001080000008003900039800386158431915020516045267341313580000102673726737267372673726738
80024267372011010016600126700277192580010108000010800005011677910492362826731267311667631668880010208000020800002673177118002110901080000108000001080000043800380000800396019431915020616465267121313080000102671626738267382673826716
80024267362001010014400226693211192580010108000010800005011667501492362826731267281665231668880010208000020800002670856118002110901080000108000001080000043800380003880039610430050206160572670500080000102670926732267092670926732
8002426731200001010210032672227720258001010800001080000501166960149236512673126731166523167118001020800002080000267087711800211090108000010800000108000004380000000388000061390005020616075267281414780000102673226732267322673226732