Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (register, Q)

Test 1: uops

Code:

  ldr q0, [x6, x7]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b0e0f1e22243a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
100539831100016510138401801525100010001000146740374381399221325710001000200038139811100110001000110191942105710069103760194219273116113959901000383399400399400
100439921111016510238320181625100010001000153750377399398221323910001000200039938111100110001000010192042105810159103861574219173116113969921000382382399383382
10043992111100650023842001625100010001000143990373398398221325710001000200039839911100110001000010201941101901059103860194219173116113789001000400400400400399
1004381310110165102384218181625100010001000153750374399399221325610001000200039939911100110001000010191942105700021103800573819173116113960021000400399400400400
10043813100101650023662180162510001000100015318037438139920432401000100020003983991110011000100001020200101920159103860574219273116113969901000400399399383382
1004402310110065103384218181625100010001000153190373399398221323910001000200039838111100110001000010201942105710165103900574219173116113790921000400401400383400
10043983101110650003662018152510001000100015314035639839922132571000100020003993981110011000100001020210101910027103861573819173116113969921000399399400382399
10043982110100211003840180025100010001000153620373399382204323910001000200039838111100110001000010201942105810060100061574219173116113969401000382382400383400
100438131100006600238320002510001000100014399035739939822132571000100020003823991110011000100001020194110191025910380057019173116113959021000383382400400382
10043813111100650013842018152510001000100015334035739939820432571000100020003983991110011000100001020200105610058103861574119073116113969921000382382400382382

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr q0, [x6, x7]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)03090b0e0f18191e2223243f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5dcache load miss (bf)c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50205120047899000000100012003211950910946325601034010210000100003010010000100001078862573545561344611120026120050120050113141031136675010030200100001000060200200001000012005012003511502011009910040100100001000011001000011000000100001103210110713119777400029681000040100120036120051120051120048120036
50204120050899000000000012003511949310946125601034010010001100003010010000100001078999573545561344610120023120047120050113145031137965010030200100001000060200200001000012004212004711502011009910040100100001000011001000011000000100000103210210723119657400026681000040100120048120051120048120051120051
50204120050899000000110012002011949310944925601004010210001100003010010000100001078999573604461338181120026120035120050113143031136365010030200100001000060200200001000012005012003511502011009910040100100001000001001000001000000100001103210210721119659400029681000040100120166120055120048120036120048
50204120047899000000000012002611949310946125601004010210000100003010010000100001078999573545561344611120026120050120047113145031136675010030200100001000060200200001000012005012004711502011009910040100100001000001001000001000000100001103210210722119646400029981000040100120051120051120051120036120036
50204120050899000000100012003511950910946325601034010210001100003010010000100001078999573545561338181120026120050120050113143031136675010030200100001000060200200001000012005012004711502011009910040100100001000001001000001000003100000103210210722119657400020681000040100120104120081120050120048120051
50204120035899000000000012003511949310946325601004010210000100003010010000100001078862573545561344610120026120050120047113141031136675010030200100001000060200200001000012003512004711502011009910040100100001000001001000011000000100001003210310723119657400029001000040100120048120048120051120048120051
50204120050899000000610012003511949310946325601034010210001100003010010000100001078862573559961338691120026120035120035113141031136675010030200100001000060200200001000012004712003511502011009910040100100001000001001000011000000100001103210210132119646400009681000040100120051120036120051120051120036
50204120050899000000110012003511949310946325601004010010001100003010010000100001078999573618861361660120026120050120050113145031136675010030200100001000060200200001000012003512004711502011009910040100100001000001001000011000000100001103210310723119657400020601000040100120054120048120048120051120036
5020412005189900000018910012003511950910946625601034010210001100003010010000100001078999573618861344610120011120050120050113145031136675010030200100001000060200200001000012004712004711502011009910040100100001000001001000011000000100001103210310732119646400009081000040100120036120056120051120036120051
50204120035899000000600012002011950910946325601034010210001100003010010000100001079200573545561338181120026120122120047113145031136675010030200100001000060200200001000012004712003511502011009910040100100001000011001000001000000100001103210310132119646400020981000040100120051120051120051120036120051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0053

retire (01)cycle (02)030508090b0e0f191e22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5cfd0d2d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5002512005389911110002101120026119511109474256001640014100021000030010100001000010795755735750613321600120029012004112005311317131136925001030020100001000060020200001000012005312005311500211091040010100001000001010002211000202110000111110314000210724119668400046651000040010120054120054120054120103120042
50024120053899110100021011200381196051094882446001340014100021000030158100001000010795845736332613376415120029012005312005311315931136925001030020100001000060020200001000012005312005311500211091040010100001000001010001111000241110000110120314000410725119668400040001000040010120054120054120054120091120054
5002412004189911120002001120038119572109466256001640014100021000030010100001000010795755736332613376400120029012004112004111317131136925001030020100001000060020200001000012005312005311500211091040010100001000001010001311000301110000111110314000310722119656400046651000040010120043120054120171120059120054
5002412005389910110001001120038119547109469256001640014100021000030010100001000010794635736332613376400120017012005312005311317131136925001030020100001000060020200001000012005312004111500211091040010100001000001010002201000301410000111110314000210733119668400046651000040010120054120054120054120133120054
5002412005389911110002101120038119499109513256001640014100011000230010100001000010794635736332613376400120029012005712005311317131136925001030020100001000060020200001000012004112005311500211091040010100001000001010002211000212110000111110314000210724119841400046651000040010120054120054120054120118120054
5002412004189911100002101120043119511109467256001340014100021000030010100001000010795755736332613376400120029012005312005311317131136805001030020100001000060020200001000012004112005311500211091040010100001000001010002201000211110000111100314000310724119668400046601000040010120042120054120160120125120054
5002412004189911100002101120038119511109468256001640014100021000030010100001000010795755736332613391700120029012005312005311317131136925001030020100001000060020200001000012010212005321500211091040010100001000001010001211000300110000111120314000310742119668400026051000040010120042120042120054120106120054
5002412005390011100001101120066119511109525256001640014100021000030010100001000010795755736332613376400120029012005312005311317131136925001030020100001000060398200001000012005312005311500211091040010100001000001010002211000101110000111110314000410744119668400046051000040010120054120054120054120054120057
5002412005389911110002101120038119511109510256001640014100021000030010100001000010794635736332613376400120029012005312005311317131136925001030020100001000060020200001000012005312005811500211091040010100001000001010002211000210110000011110314000210752119668400046651000040010120054120054120054120119120054
500241200539001011000110011200261195111094932560016400141000210000300101000010000107957557357506133764001200290120053120053113171221136805001030020100001000060020200001000012005912005311500211091040010100001000001010003211000201410000111110314000410752119668400046601000040010120054120587120054120079120054

Test 3: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldr q0, [x6, x7]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)030708090e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5dcache load miss (bf)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
502051200518990010100101000120036119510109464256010040102100011000030100100001000010790085736236613446101200290120051120051113146311366850100302001000010000602002000010000120055120079115020110099100401001000010000010010000011000003100001103210510711119658400001010121000040100120052120052120052120055120052
50204120051899000000010100012002011949310946725601034010210000100003010010000100001079035573638061363170120027012003512005111314131136585010030200100001000060200200001000012007712005111502011009910040100100001000001001000001100000010000010321011071111959140000131001000040100120036120052120052120052120052
5020412003689900000001000001200201195131094492560103401001000110000301001000010000107900857362366134461012001101200511200351131463113668501003020010000100006020020000100001200731200531150201100991004010010000100000100100000110000001000011032101101111196464000201091000040100120052120052120052120052120052
50204120054899000000010100012003611951010944925601034010010001100003010010000100001079008573638061344611120011012005112003511314631136685010030200100001000060200200001000012006112005111502011009910040100100001000001001000001100000010000011321021071111965840002101001000040100120036120055120036120055120052
5020412005189900000000010001200361195101094642560103401021000010000301001000010000107900857363806136317012002701200541200511131413113668501003020010000100006020020000100001200551200511150201100991004010010000100000100100000110000001000000032101107211196464000210091000040100120563120052120052120036120052
502041200518990000100001000120036119513109464256010040102100011000030100100001000010788625736236613631701200270120035120051113146311366850100302001000010000602002000010000120072120055115020110099100401001000010000010010000011000015410000000321011011111965840024131001000040100120052120052120052120052120052
502041200358990000100101000120036119510109464256010340102100011000030100100001000010790355735455613647011200300120054120035113141311366850100302001000010000602002000010000120083120051115020110099100401001000010000010010000011000000100001103210210111119646400021313121000040100120036120052120052120052122262
502041222899150100017133181211200001224511205381103227396044940245100491005233666113681118111378415799015619719801200270120054120052113141311367150100321431073710490602002000010000120070120056151502011009910040100100001000001001004640100000010008110321011171111964640002131091000040100120041120036120036120052120036
5020412005193101011303442252288008012002011951010944925601034010010001100003010010000100001079008573545561363171120027012003512005111314931136685010030200100001000060200200001000012004212005111502011009910040100100001000001001000001100001010000110321011161111966140002131001000040100120052120052120036120036120036
50204120035899000110000010012002011951310946425601034010210001100003010010000100001079008573623661363171120011012005112005111314631136715010030200100001000060200200001000012006312005111502011009910040100100001000001001000001100000310000010321011731311972840000101001000040100120052120052120052120039120036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)0308090b0f18191e1f22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5dcache load miss (bf)c2branch mispredict (cb)cdcfd5d6d9ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5002512004989900000010101200351195051094612560013400121000110000300101000010000107951757361886133499112002301200501200971131533113689500103002010000100006002020000100001200501200471150021109104001010000100000101000001100000000100001100031404140155119662400020681000040010120036120051120051120036120051
5002412005089900000010001200321195081094634660013400121000110000300101006010000107954857360446133652012001101200501200501131683113689500103002010000100006002020000100001200351200501150021109104001010000100000101000001100000000100001100031405107044119662400000681000040010120036120051120051120048120051
5002412005089900000010001200351194921094632560013400231000010000300101000010000107954857354556133499012002601200351200471131533113689500103002010000100006002020000100001200501200501150021109104001010000100001101000001100000000100000100031404107055119662400029081000040010120036120036120056120051120036
5002412004789900000010101200201195111094632560013400121000110000300101000010000107954857354556132757012002301200501200501131683113689500103002010000100006002020212101601210331200351150021109104001010000100000101000000100000000100000100031405107042119665400000981000040010120051120048120051120051120048
5002412004789900000060001200351195131094492560010400101000010000300101000010000107954857361886133652012001101200351200501131683113689500103002010000100006002020000100001200501200471150021109104001010000100000101000001100000000100001100031404107145119650400029081000040010120051120051120051120048120051
5002412004789900010000101200351195081094632560013400121000110000300101006010000107954857360446132757012002601200501200351131683113689500103002010000100006002020000100001200351200351150021109104001010000100000101000000100000000100001100031403107165119665400029681000040010120036120051120048120036120051
5002412005089900000000001200351195081094492560010400121000010000300101000010000107954857361886133499012002601200471200351131533113689500103002010000100006002020000100001200501200471150021109104001010000100000101000001100000000100001100031405107023119662400029681000040010120036120036120051120036120051
5002412005089900000010101200201195081095062560013400121000110000300101000010000107954857360446134569012002601200351200471131683113689500103002010000100006002020000100001200501200351150021109104001010000100000101000000100000003100001100031404107155119665400106681000040010120036120048120048120051120051
5002412004789900000010001200201194921094632560013400121000110000300101000010000107952357360446133499112002601200351200351131683113674500103002010000100006002020000100001200501200471150021109104001010000100000101000000100000000100001100031403107155119665400006901000040010120051120051120051120036120051
5002412005089900000010101200351195081094632560010400101000110000300101000010000107951757361886133499012001101200501200471131683113689500103002010000100006002020000100001200351200471150021109104001010000100000101000000100000000100000100031404107055119650400020081000040010120051120051120051120051120036

Test 4: throughput

Count: 8

Code:

  ldr q0, [x6, x7]
  ldr q0, [x6, x7]
  ldr q0, [x6, x7]
  ldr q0, [x6, x7]
  ldr q0, [x6, x7]
  ldr q0, [x6, x7]
  ldr q0, [x6, x7]
  ldr q0, [x6, x7]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire (01)cycle (02)030508090b0e0f181e1f22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205267322011000000210102266990180025801001008000010080016500116616002668926714267321666461668880114202800242001600482672026718118020110099100100800008000001008002019420800570012180000611942191111511801600267290992800001002673326715267152673326733
8020426732200110200065000326717018181525801001008000010080016511116990202670726748267321666061666680115200800242001600482688426880118020110099100100800008000001008002020420800571032180038615742190111511801600267110992800001002671526733267152673326888
8020426715200110101086010226724218012580100100800001008001550011677670266902673626736166676166848011320080024200160048268362674311802011009910010080000800000100800181900800180005980038615742190111511801600267360902800001002671526733267332671526733
802042673220011010006601032671720019258010010080000100800155001169563026689267322673316660616666801162008002420016004826724268691180201100991001008000080000010080020210080057110628003701570192111511801600267290002800001002673326715267332673326733
8020426732200100100421000426699218181525801001008000010080016500116721502670726714267321664361666780116200800242001600482673926838118020110099100100800008000001008001919420800190102180038605639190111511801600267110992800001002673426734267332673326733
802042673220010000006501022669901818162580100100800001008001650011665900266892673226732166606166848011520080024200160048267402685111802011009910010080000800000100800202000800190016380038611942191111511801600267290992800001002671526733267332673326733
80204267322001011000210100267180018162580100100800001008001750011697570266892671526732166606166668011620080024202160048267142671411802011009910010080000800000100800201942080057000608003861570191111511801600267300902800001002673426715267332673326719
8020426733204111000065000326717218181425801001008000010080014500116659002668926732267331666061668480114200800242001600482673726831118020110099100100800008000001008002020420800570005980038615742190111511801600267290992800001002672926733267152673426715
802042673220010010001040000267172018325801001008000010080379500116829402670726732267141666061668980112204800242001600482673626818118020110099100100800008000001008054220420800570005980038015742190111511701600267290992800001002671526734267342673426733
802042673320011011102100022669921801725801001008000010080015500116756112670826732267141664261666680116200800242001600482672326813118020110099100100800008000011008001920420800571005980038605742190111511801600267110992800001002673326715267332673326734

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire (01)cycle (02)03050708090b0e0f1e2223243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cdcfd5d6dbddinst fetch restart (de)dfe0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002526728200000100166000226721277192580010108000010800005011718120267132673626715166813167178001020800002016000026749267461180021109010108000080000010800201943080059100618004061590191050432320110267341313580000102673726715267152671626738
8002426737200101010066100226721377192580010108000010800005011682861267112673626736166603167178001020800002016000026736267361180021109010108000080000010800192000800590006480000611943190050201160110267331313580000102673726738267392673726715
8002426736200101110067100326700377202580010108000010800005011672191266902671426736166813167168001020800002016000026741267371180021109010108000080000010800212043080059001218004061580191050201160110267331313080000102673826716267382673826737
800242673620010011006710032672137719258001010800001080000501168286126711267362673616682316717800102080000201600002674426737118002110901010800008000001080019200080059100618004061594519005022516051026734013580000102673726737267372673726738
8002426737200101110067100326721170192580010108000010800005011739751267112673726737166623166958001020800002016000026746267361180021109010108000080000010800191943080060101648003961584519105020116011026712013580000102671526748267372673726737
80024267362001010100210003267002772125800101080000108000050116779102671126714267371668231671780010208000020160000267422673711800211090101080000800000108002120430800580006080040015843190050201160110267121313580000102673726738267392671626738
80024267372001111100671003267212771925800101080000108000050117062202669026736267371668131669480010208000020160000267432673611800211090101080000800000108002119430800591026180041606045190050201160110267331313580000102673826716267382673826738
8002426736200101010021100326722277025800101080000108000050116721902671126737267361668131671680010208000020160000267552673711800211090101080000800000108002019430800591116380040015943191050201160110267331313580000102673726716267382673826715
80024267362011011100691002267222771925800101080000108000050116710602671126737267141668131671680010208000020160000267482673611800211090101080000800000108002121430800191016180040615843191050201160110267331313580000102673726737267372673726737
8002426737200101110067100326723307202580010108000010800005011672190267112673726714166593166958001020800002016000026756267361180021109010108000080000010800201943080060100618004001584419105020116011026734130580000102673826738267152673826737