Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (unsigned offset, Q)

Test 1: uops

Code:

  ldr q0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)03090e0f1e223a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
1005389300041113742181812251000100010001483813643893892123247100010001000389389111001100010000100042103535103561353973216223866621000390390390392390
1004389300059113742181812251000100010001483813663893912123247100010001000389389111001100010000100039103535103561353973216223866621000390390390390390
1004389300041013742181812251000100010001477413643913892123247100010001000389389111001100010001100039103535103561353973216223866621000390390390390390
1004389300042113742181812251000100010001483813643893892123252100010001000423389111001100010000100039103535103561353973216223866621000390390390390394
1004389300041013742181812251000100010001483813643893892123247100010001000389389111001100010000100039103535103561353973216223866621000390390392390390
1004389200041113742181812251000100010001503713693893892123252100010001000389389111001100010000100039103539103561353973216223866621000390390392390390
1004389300048113742181812251000100010001483813643893892123247100010001000389389111001100010000100039103535103961353973216223866621000390395390390390
1004389300041123742121812251000100010001483813643893892123247100010001000389389111001100010000100039103936103561353973216223866621000390394390395390
10043923000410137421818112510001000100014838136638939121232521000100010003943911110011000100001000391035351035613539732162238610621000390390390390390
1004389301141013742181812251000100010001483813643943912123259100010001000389389111001100010001100039103535103561353973216223866621000390390395390390

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr q0, [x6, #8]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)030508090b0e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
502051200539000000000010100012008311951910944925601034010210001100003010010000100001079209573545561338180120023012004712004711314331136365010030200100001000060200100001000012004712005311502011009910040100100001000001001000001100000101000010100003210213511119660400026651000040100120048120036120051120036120037
502041200478990000000010100012006511951910946125601034010210001100003010010000100001079200573604461338180120023012004712003511314331136365010030200100001000060200100001000012003512004711502011009910040100100001000001001000001100000001000000000003210113511119660400026651000040100120050120433120051120150120048
50204120047900000000002888100012010911951910946125601034010210001100003010010000100001078862573604461338180120011012004712004711314331136365010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100000001000010100003210113501119813400166651000040100120131120243120265120048120148
502041202328990000000010000012006611951910944925601034010010001100003010010000100001079209573604461345130120023012004712004711314331136365010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100000061000010100003210113511119646400006651000040100120048120048120048120048120048
502041200478990000000010100012007211951910946125601034010210001100003010010000100001079200573604461338180120011012004712004711314331136365010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100000001000010100003210213511119660400026651000040100120048120048120048120048120036
502041200478990000000000100012008111951910946125601034010210000100003010010000100001078862573604461338180120023012004712004711314331136365010030200100001000060200100001000012004712003511502011009910040100100001000001001000001100000031000000100003250113511119660400006651000040100120048120048120048120048120048
502041200478990000000060100012008311949310946125601034010210001100003010010000100001079200573545561338180120023012003512004711314331136595010030200100001000060200100001000012003512004711502011009910040100100001000001001000001100000001000010100103210110111119646400026651000040100120036120048120036120048120051
5020412004789900001000101000120094119519109461256010340102100011000030100100001000010792005736044613381811200110120047120047113143311363650100302001000010000602001000010000120047120047115020110099100401001000010000010010000001000000425631003010102003932130513121679402016651000040100122437122433122508122421122632
50204122509917011000000010011221171205031103397626044740318100451005433928111031137811421715798562619675001227310123356123336114223460115477588653506811981117717167211922116371234441234123515020110099100401001000010000010010034111004800925781002810102003987231411119660400626651000040100122105122224122637122075122127
5020412003596301000020253304184810001200431195101094642560103401021000110000301001000010000107900857354556138362012002701200411200511131413113668501003020010000100006020010000100001200511200351150201100991004010010000100000100100000110001001100001010000321011351111965840002101091000040100120036120052120052120036120036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)0308090e0f181e22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5dcache load miss (bf)c2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200478990001000012003511950510950525600134001210001100003001010000100001079548573618861334990120011120050120050113165031136745001030020100001000060020100001000012004712003511500221091040010100001000011010000010000061000011003140210732119662400009681000040010120051120036120048120048120049
500241200508990000010012003511950510946525600134001210001100003001010000100001079548573545561334991120026120035120050113153031136895001030020100001000060020100001000012005012003511500211091040010100001000001010000010000001000011003140110711119665400020981000040010120048120048120048120091120051
500241200358990000010012003511950510947825600104001210001100003001010000100001079523573618861327570120023120035120047113165031136865001030020100001000060020100001000012005012009111500211091040010100001000001010000110000001000010003140110721119665400000681000040010120036120051120048120076120051
500241200528990100000012003511958510944925600134001210001100003001010000100001079548573618861334991120011120047120050113168031136895001030020100001000060398100001000012005012003511500211091040010100001000011010000110000001000011203140110711119650400029001000040010120036120036120048120103120036
5002412005089900000370012002011950510946125600134001010001100003001010000100001079517573618861334990120026120050120050113153031136745001030020100001000060020100001000012003512003511500211091040010100001000001010000110000001000011003140110711119665400026081000040010120051120036120102120060120037
500241200508990000010012003211950510949225600104001010001100003001010000100001079523573604461336521120011120035120047113168031136665001030204100001000060020100001000012005612005311500211091040010100001000001010000110000001000001003140110711119665400026601000040010120048120051120048120097120051
5002412004789900000100012003511950510947925600134001210000100003001010000100001079523573604461327570120011120047120050113168031136895001030020100001000060020100001000012003512004711500211091040010100001000001010000110000001000010003140110711119665400029081000040010120036120051120048120100120036
500241200978990000410012003511950510948125600134001010001100003001010000100001079523573687061341650120026120047120035113168031136895001030020100001000060020100001000012005012003511500211091040010100001000001010000110000401000011003140110711119650400026681000040010120051120048120048120107120036
500241200358990000001012003511954110946125600134001210001100003001010000100001079523573545561334990120026120047120047113153031136745001030020100001000060020100001000012003512004711500211091040010100001000001010000010000031000001003140110711119662400020651000040010120036120036120142120073120048
500241200358990001010012003511950510947725600134001210001100003001010000100001079523573545561335511120011120050120050113153031136895001030020100001000060020100001000012003512004711500211091040010100001000001010000010000001000011003140110711119662400009001000040010120050120048120124120052120051

Test 3: throughput

Count: 8

Code:

  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  ldr q0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire (01)cycle (02)030508090b0e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)67696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020526734200111011113103266990181802580100100800001008000050011670160268622680226734166653166908010020080000200800002673326732118020110099100100800008000001008002019428005710259801686157019051101161126729092800001002673326733267152673426715
802042673320011101065002266992181802580100100800001008000050011679430267072673326732166373166908010020080000200800002673326714118020110099100100800008000001008002019080057101598003861584019151101161126729992800001002671526733267332671526715
80204267322001100116510226699218181625801001008000010080000500116726402668926733267161665531669080100200800002008000026733267321180201100991001008000080000010080019194280056102598003861574219151101161126729990800001002673426734267152673326733
8020426732201110011650022671800181525801001008000010080000500116967602671626714267341665531669080100200800002008000026732267321180201100991001008000080000010080020194280057101598003861194219051101161126729992800001002673426733267332673326733
8020426732201100110651022671821818172580100100800001008000050011667580267392674626736166623166918010020080000200800002673226714118020110099100100800008000001008001919080057000218003861574219151101161126729992800001002673426734267332673326733
8020426714200110110650022671821818125801001008000010080000500116768502670726733267321665531669080100200800002008000026733267321180201100991001008000080000010080020204280057100988000060574219051101161126711992800001002673326715267152671526733
802042671420011111065101267173001625801001008000010080000500115912002670726732267321665531669180100200800002008000026714267141180201100991001008000080000010080020194280057000598003901574219151101161126729900800001002673326733267332673326733
8020426732200110110211012671700016258010010080000100800005001159120026708267332673216655316690801002008000020080000267322671411802011009910010080000800000100800201908005710121800386119019051101161126729092800001002673326715267332673326734
8020426733200100110651022671720181625801001008000010080000500116967602671026732267321663731669080100200800002008000026732267321180201100991001008000080000010080019204280057100598003861194219051101161126730992800001002673326733267332673326715
802042673320010111065102267170181812580100100800001008000050011665190267232674126732166553166908010020080000200800002673226732118020110099100100800008000001008002120080019100598003861574219151101161126729992800001002673726734267342673326733

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire (01)cycle (02)030508090b0e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025267322001110106410326700218181725800101080000108000050116759926709267322673216677316712800102080000208000026732267321180021109101080000800000108002020428005810121800366156421915020716642672999280000102673326853267702671726737
8002426733200100010210022671721818162580010108000010800005011730742671626733267321665931671280010208000020800002673326732118002110910108000080000010800202042800560005980038601901905020616662671200280000102673326733267162673326734
80024267152001100116510326717218016258001010800001080000501167298267162673226732166783167138001020800002080000267322673211800211091010800008000001080020190800570005980038605701905020416762672909080000102673326733267332673326733
800242673220011111065002267172180152580010108000010800005011675992672226732267331667731669580010208000020800002673326733118002110910108000080000010800181942800570005980038605701905020616642672990280000102673326716267342673426733
8002426732200101010211032669920181525800101080000108000050116759926708267152673216677316712800102080000208000026732267331180021109101080000800000108001918428005700359800376157421925020416752673090280000102671626734267332673326716
8002426715200110010650012670000181625800101080000108000050116955726707267332673216659316712800102080000208000026733267321180021109101080000800000108002120428005710259800000056421915020616462672999280000102673326733267332673326733
80024267142001110116500226700218181625800101080000108000050116766026718267142673216677316712800102080000208000026732267321180021109101080000800000108001919428005710021800006157421905020616642672999280000102673426734267162673626733
800242673220010011065002267172180162580010108000010800005011676602689126740267371667831671380010208000020800002673226732118002110910108000080000010800222008005700259800386019421915020616652673290280000102673626715267152671626734
80024267152001111106810026717018181525800101080000108000050116759926711267322673216677316712800102080000208000026732267151180021109101080000800000108001921428005700058800386157421905020516462672999080000102673326733267332673326715
80024267322001111116510226717018181625800101080000108000050116759926708267322671516677316712800102080000208000026732267151180021109101080000800001108002020428005720159800000157421905020416462673000080000102673326733267162673326733