Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (unsigned offset, 64-bit)

Test 1: uops

Code:

  ldr x0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f1e22243a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005403311044000360210192510001000100015267037439821632321000100010003945611100110001000110000431000039100060394300732161139514071000399375399399375
1004374300045001383210192510001000100015267039839822132561000100010003987711100110001000010000010381381038613944007311611371141471000399400399375375
100439820100000383200025100010001000155080398374197325610001000100039877111001100010000100000100003810380104400731161139514041000399399399395399
10043943000440013832121216251000100010001406013983982173256100010001000374771110011000100001000001038039103861044007311611395141471000395399375399399
100440030000100383211192510001000100015078039837422132321000100010003985611100110001000010000010000381038613844007311611391141401000399399375399399
100437430000101379201192510001000100014060137437422132561000100010003985611100110001000010000431040039103861394400731161139514041000395399395375375
1004398300044000359011182510001000100014060139839819732521000100010003987711100110001000010000431038001039603844007311611395141471000375399399375399
100439830004400135900121925100010001000150181398398197325210001000100039856111001100010000100004310380391000000440073116223910071000399375399399395
100439830004400138321016251000100010001498913743942213232100010001000398771110011000100001000043100000103900400007311622371141401000399375375399399
100437420005310135921016251000100010001527413743982293232100010001000398771110011000100011000043103800103861380007311611395141471000375399375375399

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldr x0, [x6, #8]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0055

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22243a3f43494d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570061525001020007004001697865971425401083010310002301001000061605033424461496696370055700556463936494640100302001000060200100007005535114020110099100100003010010000010010000021000100011000111100261047111698063000606010000301007004470044700567004470044
4020470055525000010007004010697865971425401083010310001301001000061597133424461496697570043700556465136495840100302001000060200100007005535114020110099100100003010010000110010000001000101011000101000261017111698183000366610000301007005670056700447004470056
40204700555250000810070044016978659714254010830106100023010010000616050334186714966975700437005564651286518640100302001000060200100007005535214020110099100100003010010000110010000001000100011000101000261017111698183000666610000301007005670056700567004470056
4020470055524000020007004011697865970325401083010610002301001000061605033418671496697570055700436465136494640100302001000060200100007004435114020110099100100003010010000010010000021000100011000111100261017111698183000606610000301007006670102700597005970044
4020470043525000020007004001697865970325401083010310001301001000061597133424461496696370055700556465136494640100302001000060200100007005535114020110099100100003010010000010010000001000100011000111100261017111698183000600610000301007004470056700567004470044
4020470043525000020007004011697105970325401083010610002301001000061605033424461496697570055700436463936494640100302001000060200100007005535114020110099100100003010010000110010000001000100011000111100261017111698183000660610000301007004470044700567005670044
40204700555250000281007004010697865971425401083010610001301001000061605033424461496697570055700556463936495840100302001000060200100007005535114020110099100100003010010000010010000001000100011000111100261017111698183000366610000301007005670056700577005670056
4020470055524000010007002811697865970325401043010310002301001000061597133424461496697570055700436465136495840100302001000060200100007004335114020110099100100003010010000010010000001000100011000111000261017111698063000666610000301007005670056700567005670056
4020470055524000020007004011697865971425401083010310002301001000061605033424461496696370043700436463936495840100302001000060200100007005535114020110099100100003010010000010010000021000100011000111100261017111698063000306610000301007005670056700567005670056
4020470055525000111007008411697865970325401083010610002301001000061605033424461496696370055700556465136495840100302001000060200100007005535114020110099100100003010010000010010000121000100011000111100261017111698183000606610000301007005670056700567005670064

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f18191e1f2223243f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570047524000001000070032697285970625400143001310001300101000061706833420620496697070035700356465336497240010300201000060020100007003535114002110910100003001010000010100001100000010000102520171011698103000360610000300107003670051700517004870048
4002470047524000001000070035697605969525400143001010001300101000061698233422060496697070050700356466836497540010300201000060020100007005035114002110910100003001010000010100000100000010000012520171011698103000660010000300107003670051700517005170051
4002470050524000001000070032697465970925400103001310001300101000061706833414700496697070050700506465336496040010300201000060020100007005035114002110910100003001010000110100001100001010000012520171011698133000396910000300107003670051700517005170048
4002470047525000000000070032697435970625400143001010000300101000061698233414700496697070050700506466836497540010300201000060020100007005035114002110910100003001010000110100001100000010000112520171011698103000360610000300107003670036700517004870036
4002470050524000001000070032697605970625400143001010001300101000061706833422060496695570050700356466836496040010300201000060020100007003535114002110910100003001010000010100001100000310000112520171011697983000306610000300107005170036700367003670048
4002470035525000000000070032697435969525400143001310000300101000061695233414700496697070093701196467136497240010300201000060020100007005035114002110910100003001010000010100000100000010000112520171011698133000399010000300107004870036700517005170036
4002470035524000001000070032697605970925400143001010000300101000061706833422060496695570050700506465336497540010300201000060020100007003535114002110910100003001010000010100001100000010000102520171011698103000300910000300107005170110700577003770051
4002470050525001006010070032697605969525400103001310001300101000061706833414700496695570035700356465336497540010300201000060020100007005035114002110910100003001010000110100001100000010000112520171011698223000309610000300107004870048700367005170051
4002470047525000001000070032697605970925400143001010001300101000061706833422060496697070035700356466836496040010300201000060020100007005035114002110910100003001010000010100001100000010000102520171012698133000390910000300107003670051700517003670051
4002470050525000001000070035697285969525400103001310000300101000061706833414700496695570047700476465336497240010300201000060020100007003535114002110910100003001010000110100001100000010000112520171021698133000099910000300107005170051700517005170051

Test 3: throughput

Count: 8

Code:

  ldr x0, [x6, #8]
  ldr x0, [x6, #8]
  ldr x0, [x6, #8]
  ldr x0, [x6, #8]
  ldr x0, [x6, #8]
  ldr x0, [x6, #8]
  ldr x0, [x6, #8]
  ldr x0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526732200101100021103266992001725801001008000010080000500116948649236522671426732166553167218010020080000200800002671481118020110099100800001008000001008001921428001916006280000605742190051101161126711002800001002673326733267332671526799
80204267432001000000650022671721818125258010010080000100800005001169676492365226732267321663731667280100200800002008000026732811180201100991008000010080000010080019200800580002780000611942190151101161126711002800001002673326733267332671526736
802042673320010000006500226717018016258010010080000100800005001169486492365326714267321663731669180100200800002008000026732631180201100991008000010080000010080021204280019111658003800570192051101161126729900800001002673326715267332673326735
80204267452001111000651032671721818152580100100800001008000050011591204923653267142671416655316691801002008000020080000267156411802011009910080000100800000100800212042800571113080038611942190051101161126729000800001002673326734267332673326735
802042673220011000006500326718218062258010010080000100800005001166958492363426714267331665531669080100200800002008000026714641180201100991008000010080000010080020200800191106880000615742190051101161126729992800001002673326733267152673326739
8020426718200110000021001267172018152580100100800001008000050011594594923634267322673216644316691801002008000020080000267336411802011009910080000100800000100800202042800191102480037015842190051101161126730000800001002673326734267342673426738
8020426738201110000065000267172181816258010010080000100800005001167685492365326732267141665531669080100200800002008000026732641180201100991008000010080000010080019190800571016880000011942192051101161126733090800001002671526734267342673326717
8020426737200100000021001266990181816258010010080000100800005001167685492365226714267141663731669080100200800002008000026732811180201100991008000010080000010080020214280057011338000000570190051101161126730990800001002671526715267152673326741
802042673820010010006500226699218181625801001008000010080000500116948649236342671426714166553166728010020080000200800002673364118020110099100800001008000001008002021080019011628003860190190051101161126729992800001002673426734267342673426747
802042672320010010102100226699200152580100100800001008000050011667584923653267322671416637316672801002008000020080000267158111802011009910080000100800001100800201942800571342658000061570190051101161126730090800001002673426734267152673326738

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025267332001000000651022671721818182580010108000010800005011675990492365226732267331667731671380010208000020800002673281118002110910800001080000010800202042800571015980038615742191050204166826729009680000102673326733267332671526734
80024267332001000100651032671821818162580010108000010800005011658220492363426732267331667731671280010208000020800002673281118002110910800001080000010800202042800570005880038615742191050206166626729099280000102673426734267332673326733
8002426732200110010065103267173181815258001010800001080000501167599049236532673226732166773167128001020800002080000267328211800211091080000108000001080019190800581005980038615742191050208166726729099280000102673326733267332673326733
8002426732200101000065103267172181816258001010800001080000501172719049236522673226715166773167128001020800002080000267328111800211091080000108000011080021190800570015980037615742190050208168826729099280000102673426953267972673626734
80024267332001000111650032671721818162580010108000010800005011669600492365226732267331667731671280010208000020800002673281118002110910800001080000010800201942800571012180038615742191050209165626729099280000102673326733267332673326733
80024267322001110100651022671720016258001010800001080000501166263049236522673226733166773167128001020800002080000267328111800211091080000108000001080020190800571005980000615742192050205168826729090280000102673426733267152673326733
80024267152001100101651042671821818162580010108000010800005011702040492365226733267321667731671280010208000020800002673364118002110910800001080000010800192042800561015980038615742190050205165626729009280000102673326733267332673326733
80024267392001100100651032671721818152580010108000010800005011675990492365226714267331667731671280010208000020800002673281118002110910800001080000010800191942800580005980038615742191050207168626729099280000102673426734267332673326733
80024267322011100011651022671821818162580010108000010800005011653040492365226732267321667731671280010208000020800002673281118002110910800001080000010800191942800571015980038615742191050206166626729099280000102673326733267332673326734
80024267332001010100651032671721818152580010108000010800005011675990492365326732267321667731671280010208000020800002673282118002110910800001080000010800191942800570015980038615742190050206166626730099280000102673326733267332671626733