Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (unsigned offset, Q)

Test 1: uops

Code:

  ldr q0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03090e0f1e223a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005389300041113742181812251000100010001483813643893892123247100010001000389389111001100010000100042103535103561353973216223866621000390390390392390
1004389300059113742181812251000100010001483813663893912123247100010001000389389111001100010000100039103535103561353973216223866621000390390390390390
1004389300041013742181812251000100010001477413643913892123247100010001000389389111001100010001100039103535103561353973216223866621000390390390390390
1004389300042113742181812251000100010001483813643893892123252100010001000423389111001100010000100039103535103561353973216223866621000390390390390394
1004389300041013742181812251000100010001483813643893892123247100010001000389389111001100010000100039103535103561353973216223866621000390390392390390
1004389200041113742181812251000100010001503713693893892123252100010001000389389111001100010000100039103539103561353973216223866621000390390392390390
1004389300048113742181812251000100010001483813643893892123247100010001000389389111001100010000100039103535103961353973216223866621000390395390390390
1004389300041123742121812251000100010001483813643893892123247100010001000389389111001100010000100039103936103561353973216223866621000390394390395390
10043923000410137421818112510001000100014838136638939121232521000100010003943911110011000100001000391035351035613539732162238610621000390390390390390
1004389301141013742181812251000100010001483813643943912123259100010001000389389111001100010001100039103535103561353973216223866621000390390395390390

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr q0, [x6, #8]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
502051200539000000000010100012008311951910944925601034010210001100003010010000100001079209573545561338180120023012004712004711314331136365010030200100001000060200100001000012004712005311502011009910040100100001000001001000001100000101000010100003210213511119660400026651000040100120048120036120051120036120037
502041200478990000000010100012006511951910946125601034010210001100003010010000100001079200573604461338180120023012004712003511314331136365010030200100001000060200100001000012003512004711502011009910040100100001000001001000001100000001000000000003210113511119660400026651000040100120050120433120051120150120048
50204120047900000000002888100012010911951910946125601034010210001100003010010000100001078862573604461338180120011012004712004711314331136365010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100000001000010100003210113501119813400166651000040100120131120243120265120048120148
502041202328990000000010000012006611951910944925601034010010001100003010010000100001079209573604461345130120023012004712004711314331136365010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100000061000010100003210113511119646400006651000040100120048120048120048120048120048
502041200478990000000010100012007211951910946125601034010210001100003010010000100001079200573604461338180120011012004712004711314331136365010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100000001000010100003210213511119660400026651000040100120048120048120048120048120036
502041200478990000000000100012008111951910946125601034010210000100003010010000100001078862573604461338180120023012004712004711314331136365010030200100001000060200100001000012004712003511502011009910040100100001000001001000001100000031000000100003250113511119660400006651000040100120048120048120048120048120048
502041200478990000000060100012008311949310946125601034010210001100003010010000100001079200573545561338180120023012003512004711314331136595010030200100001000060200100001000012003512004711502011009910040100100001000001001000001100000001000010100103210110111119646400026651000040100120036120048120036120048120051
5020412004789900001000101000120094119519109461256010340102100011000030100100001000010792005736044613381811200110120047120047113143311363650100302001000010000602001000010000120047120047115020110099100401001000010000010010000001000000425631003010102003932130513121679402016651000040100122437122433122508122421122632
50204122509917011000000010011221171205031103397626044740318100451005433928111031137811421715798562619675001227310123356123336114223460115477588653506811981117717167211922116371234441234123515020110099100401001000010000010010034111004800925781002810102003987231411119660400626651000040100122105122224122637122075122127
5020412003596301000020253304184810001200431195101094642560103401021000110000301001000010000107900857354556138362012002701200411200511131413113668501003020010000100006020010000100001200511200351150201100991004010010000100000100100000110001001100001010000321011351111965840002101091000040100120036120052120052120036120036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire uop (01)cycle (02)03mmu table walk data (08)090e0f181e22233f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
500251200478990001000012003511950510950525600134001210001100003001010000100001079548573618861334990120011120050120050113165031136745001030020100001000060020100001000012004712003511500221091040010100001000011010000010000061000011003140210732119662400009681000040010120051120036120048120048120049
500241200508990000010012003511950510946525600134001210001100003001010000100001079548573545561334991120026120035120050113153031136895001030020100001000060020100001000012005012003511500211091040010100001000001010000010000001000011003140110711119665400020981000040010120048120048120048120091120051
500241200358990000010012003511950510947825600104001210001100003001010000100001079523573618861327570120023120035120047113165031136865001030020100001000060020100001000012005012009111500211091040010100001000001010000110000001000010003140110721119665400000681000040010120036120051120048120076120051
500241200528990100000012003511958510944925600134001210001100003001010000100001079548573618861334991120011120047120050113168031136895001030020100001000060398100001000012005012003511500211091040010100001000011010000110000001000011203140110711119650400029001000040010120036120036120048120103120036
5002412005089900000370012002011950510946125600134001010001100003001010000100001079517573618861334990120026120050120050113153031136745001030020100001000060020100001000012003512003511500211091040010100001000001010000110000001000011003140110711119665400026081000040010120051120036120102120060120037
500241200508990000010012003211950510949225600104001010001100003001010000100001079523573604461336521120011120035120047113168031136665001030204100001000060020100001000012005612005311500211091040010100001000001010000110000001000001003140110711119665400026601000040010120048120051120048120097120051
5002412004789900000100012003511950510947925600134001210000100003001010000100001079523573604461327570120011120047120050113168031136895001030020100001000060020100001000012003512004711500211091040010100001000001010000110000001000010003140110711119665400029081000040010120036120051120048120100120036
500241200978990000410012003511950510948125600134001010001100003001010000100001079523573687061341650120026120047120035113168031136895001030020100001000060020100001000012005012003511500211091040010100001000001010000110000401000011003140110711119650400026681000040010120051120048120048120107120036
500241200358990000001012003511954110946125600134001210001100003001010000100001079523573545561334990120026120047120047113153031136745001030020100001000060020100001000012003512004711500211091040010100001000001010000010000031000001003140110711119662400020651000040010120036120036120142120073120048
500241200358990001010012003511950510947725600134001210001100003001010000100001079523573545561335511120011120050120050113153031136895001030020100001000060020100001000012003512004711500211091040010100001000001010000010000001000011003140110711119662400009001000040010120050120048120124120052120051

Test 3: throughput

Count: 8

Code:

  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)67696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526734200111011113103266990181802580100100800001008000050011670160268622680226734166653166908010020080000200800002673326732118020110099100100800008000001008002019428005710259801686157019051101161126729092800001002673326733267152673426715
802042673320011101065002266992181802580100100800001008000050011679430267072673326732166373166908010020080000200800002673326714118020110099100100800008000001008002019080057101598003861584019151101161126729992800001002671526733267332671526715
80204267322001100116510226699218181625801001008000010080000500116726402668926733267161665531669080100200800002008000026733267321180201100991001008000080000010080019194280056102598003861574219151101161126729990800001002673426734267152673326733
8020426732201110011650022671800181525801001008000010080000500116967602671626714267341665531669080100200800002008000026732267321180201100991001008000080000010080020194280057101598003861194219051101161126729992800001002673426733267332673326733
8020426732201100110651022671821818172580100100800001008000050011667580267392674626736166623166918010020080000200800002673226714118020110099100100800008000001008001919080057000218003861574219151101161126729992800001002673426734267332673326733
8020426714200110110650022671821818125801001008000010080000500116768502670726733267321665531669080100200800002008000026733267321180201100991001008000080000010080020204280057100988000060574219051101161126711992800001002673326715267152671526733
802042671420011111065101267173001625801001008000010080000500115912002670726732267321665531669180100200800002008000026714267141180201100991001008000080000010080020194280057000598003901574219151101161126729900800001002673326733267332673326733
8020426732200110110211012671700016258010010080000100800005001159120026708267332673216655316690801002008000020080000267322671411802011009910010080000800000100800201908005710121800386119019051101161126729092800001002673326715267332673326734
8020426733200100110651022671720181625801001008000010080000500116967602671026732267321663731669080100200800002008000026732267321180201100991001008000080000010080019204280057100598003861194219051101161126730992800001002673326733267332673326715
802042673320010111065102267170181812580100100800001008000050011665190267232674126732166553166908010020080000200800002673226732118020110099100100800008000001008002120080019100598003861574219151101161126729992800001002673726734267342673326733

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025267322001110106410326700218181725800101080000108000050116759926709267322673216677316712800102080000208000026732267321180021109101080000800000108002020428005810121800366156421915020716642672999280000102673326853267702671726737
8002426733200100010210022671721818162580010108000010800005011730742671626733267321665931671280010208000020800002673326732118002110910108000080000010800202042800560005980038601901905020616662671200280000102673326733267162673326734
80024267152001100116510326717218016258001010800001080000501167298267162673226732166783167138001020800002080000267322673211800211091010800008000001080020190800570005980038605701905020416762672909080000102673326733267332673326733
800242673220011111065002267172180152580010108000010800005011675992672226732267331667731669580010208000020800002673326733118002110910108000080000010800181942800570005980038605701905020616642672990280000102673326716267342673426733
8002426732200101010211032669920181525800101080000108000050116759926708267152673216677316712800102080000208000026732267331180021109101080000800000108001918428005700359800376157421925020416752673090280000102671626734267332673326716
8002426715200110010650012670000181625800101080000108000050116955726707267332673216659316712800102080000208000026733267321180021109101080000800000108002120428005710259800000056421915020616462672999280000102673326733267332673326733
80024267142001110116500226700218181625800101080000108000050116766026718267142673216677316712800102080000208000026732267321180021109101080000800000108001919428005710021800006157421905020616642672999280000102673426734267162673626733
800242673220010011065002267172180162580010108000010800005011676602689126740267371667831671380010208000020800002673226732118002110910108000080000010800222008005700259800386019421915020616652673290280000102673626715267152671626734
80024267152001111106810026717018181525800101080000108000050116759926711267322673216677316712800102080000208000026732267151180021109101080000800000108001921428005700058800386157421905020516462672999080000102673326733267332673326715
80024267322001111116510226717018181625800101080000108000050116759926708267322671516677316712800102080000208000026732267151180021109101080000800001108002020428005720159800000157421905020416462673000080000102673326733267162673326733