Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (register, lsl, S)

Test 1: uops

Code:

  ldr s0, [x6, x7, lsl #2]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030e0f1e22243a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
100538931141101374218012251000100010001483813493913892123232100010002000389374111001100010000100039103500103661353973116113710601000375375375390390
10043892004110135920012251000100010001406003643913742123247100010002000389389111001100010000100039100003510356103973116113716621000390375390375375
100437430045102359201216251000100010001483813643743741973232100010002000394389111001100010000100001000039100001394373116113710601000395375395390390
10043943004500035901212162510001000100014060036939439421732521000100020003743891110011000100001000010350010390139073116113910641000395375395395395
1004374300450023742121202510001000100014060136937439421732521000100020003743891110011000100001000010390010396135073116113910001000390375395395395
100439430045002379212121625100010001000147740369374394217323210001000200038937411100110001000010000103903910396035073116113866601000395395395395395
100439430045002374212016251000100010001498913493743941973252100010002000394374111001100010000100039103904210000035437311611371101041000395395395375395
1004394300461003592012025100010001000140601349394374217324710001000200037437411100110001000010003910390391039013543731161139110041000395395395395395
1004394301450003790121216251000100010001498913643743942123232100010002000389374111001100010000100039100003510350035397311611391101021000395395395395395
1004394200450003590120162510001000100014838136939437419732521000100020003943741110011000100001000010390010396039073116113916621000395395375395395

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6, x7, lsl #2]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)0308090b0e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512004789901000001688800001200201194931095142560103401021000110000301111000810155107801457364226134359120026120035120050113233711393950122302281001010010602562045210010120050120047115020110099100401001000010000110010000011000000010000110111325101600119730400026681000040100120051120051120051120051120036
5020412003589900000001010001203941194931095282560100401021000110000301081000810006107903557460526134928120026120050120035113218711374850122302281001010010602562002010010120035120047115020110099100401001000010000010010008011000030010000010111322101600119736400009051000040100120051120051120036120051120051
5020412005089900000000000001200851194931095272560103401001000110000301081000810202107911157356956134359120026120053120421113233711373450122302281001010010602562002010010120035120035115020110099100401001000010000010010000001000000010000110111322101610119727400026901000040100120051120423120052120051120036
5020412005089910001003000001200321194931095142560100401021000110000301081021810006107908457356956134928120011120050120047113233711375350128302281001010010602562002010010120050120047115020110099100401001000010000010010000001000000910000110111322131600119736400020901000040100120051120051120048120051120051
502041200359000011140120000012012811951910946325601004010210001100003010010000100001078889573628461361661200261200501200351131453113667501003020010000100006020020000100001200501200351150202100991004010010000100000100100000110000000100000100003210110111119646400006981000040100120139120052120051120051120150
50204120346902000001410100012003511951210946325601034010210000100003010010000100001079017573618861338181200231200351200351131453113636501003020010000100006020020000100001200541200481150201100991004010010000100000100100000110000300100051000003326110711119660400020051000040100120051120051120048120048120051
502041204078990000040100000120032119509109449133601034010010000100003010010000100001078999573545561339771203121200501200501131413113667510893020010000100006020020000100001200471200921150201100991004010010000100001100100000110008000100000100003325113511119657400546681000040100120051120048120051120048120051
502041200508990000000000000120032119509109449256010340102100001000030100100001000010884275736332613616612003012003512005011314331136585010030200100001000060574200001000012005112023311502011009910040100100001000011001000001100000001000010000032101101221196574000261081000040100120048120036120051120051120036
50204120035899000000000000012005311950910946325601034010210001100003010010000100001078999573545561338181200261200351200501131413113667501003020010000100006020020000100001200501200471150201100991004010010000100000100100000110000003100001100003210110711119657400029901000040100120036120051120051120051120036
50204120050899000000010000012003211951910944925601034010010000100003010010000100001078999573623661451441200261204321200501131453113658501003020010000100006149420000100001200501200351150201100991004010010000100000100100000010000000100001100003210113513119657400029651000040100120051120051120051120048120051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)03090e0f191e22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5dcache load miss (bf)branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5002512005189910006641001200361195181094492560013400121000110000300101000010000107981057368606134699012001112005112005111316931136905001030020100001000060020200001000012005112005111500211091040010100001000001010000011000000100001103140910710911966640002111091000040010120052120052120052120052120052
50024120051900000011001200361195091094642560013400211000110000300101000010000107995357368126133866012002712005112005111316931136905001030020100001000060020200001000012005112005111500211091040010100001000001010000011000000100001103140910791111966640002101091000040010120052120036120052120052120052
500241200518990000110012003611950910946425600134001210001100003001010000100001080133573767661337640120027120051120051113169311369050010300201000010000600202000010000120051120057115002110910400101000010000010100000110000001000011031401010711811966640002101091000040010120052120052120052120052120052
500241200518990000110012002011950910946425600134001210001100003001010000100001080115573777261339310120027120051120051113169311369050010300201000010000600202000010000120051120051115002110910400101000010000010100000110000031000011031401010710911966640000101091000040010120052120052120052120052120052
5002412003590000001100120036119509109464256001340012100011000030010100001000010801245736380613366201200271200511200511131693113690500103002010000100006002020000100001200511200511150021109104001010000100000101000001100000010000110314011107181211966640000101091000040010120052120052120036120052120052
50024120051899000011001200651195091094642560013400121000110000300101000010000108038557363806133662012002712005112005111316931136905001030020100001000060020200001000012003512005111500211091040010100001000001010000011000000100001103140910791111966640002101091000040010120052120052120052120052120052
500241200518990000110012003611950910944925600104001210001100003001010000100001080034573729261339680120027120035120051113169311369050010300201000010000600202000010000120051120051115002110910400101000010000010100000110000101000000031401110711911966640002101091000040010120052120036120052120036120052
500241200519000000100012003911950910946425600104001010000100003001010000100001079647573623661336620120027120051120051113169311367450010300201000010000600202000010000120051120035115002110910400101000010000010100000110000001000001031401010791011966740002101091000040010120052120052120036120052120052
500241200518990000010012003611950910946425600134001210001100003001010000100001079584573623661336620120027120051120051113169311369050010300201000010000600202000010000120051120051115002110910400101000010000010100000110000001000001031401010711911966640002101091000040010120052120052120036120052120052
50024120051899000011001200371195091094642560013400121000110000300101000010000107962057362366132757012002712005112005111316931136905001030020100001000060020200001000012005112005111500211091040010100001000001010000011000000100001003140910791111966640002101091000040010120036120052120052120052120036

Test 3: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6, x7, lsl #2]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)0308090b0e0f18191e1f2223243f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50205120047899000100000000120032119519109461256010340102100011000030100100001000010792005735455613533801200230120047120047113143311363650100302001000010000602002000010000120047120035115020110099100401001000010000110010000010000000100001000032100313522119660400026651000040100120048120051120048120048120048
50204120047900000000010100120032119519109461256010340102100001000030100100001000010792005735455613446111200230120050120047113145311363650100302001000010000602002000010000120047120047115020110099100401001000010000010010000010000100100001010032100213522119660400026651000040100120048120048120048120048120048
50204120047899000000010100120032119493109461256010040102100011000030100100621000010792005736044613381801200230120047120035113145311363650100302001000010000602002000010000120035120047115020110099100401001000010000010010000110000000100001010032100213522119704400006051000040100120048120048120048120036120036
50204120047900000000000100120035119493109449256010040102100011000030100100001000010792005735455613381801200230120035120047113143311363650100302001000010000602002000010000120035120047115020110099100401001000010000010010000010000100100001010032100213522119660400026051000040100120051120048120048120036120048
50204120035899000000000100120032119519109461256010340102100001000030100100001000010792005735455613381811200230120035120047113141311363650100302001000010000602002000010065120047120047115020110099100401001000010000010010000010000000100001010032100213522119660400029651000040100120048120048120048120036120048
50204120047899000000010100120032119509109461256010040102100011000030100100001000010792005735455613392011200230120047120047113143311365850100303911000010000602002000010000120035120057115020110099100401001000010000010010000010000003100001010032100213522119660400029601000040100120048120036120048120048120048
50204120047899000000010100120032119519109447256010340102100001000030100100001000010792005736044613381801200230120047120047113141311365850100302001000010000602002000010000120035120047115020110099100401001000010000010010000110000000100001010032100210722119646400026601000040100120048120048120048120036120048
50204120152899000000010000120032119519109449256010340102100001000030100100001000010792005736044613381801200110120047120047113143311363650100302001000010000602002000010000120035120047115020110099100401001000010000010010000010000000100001010032100213522119725400026051000040100120036120036120048120048120048
50204120035899000000010100120032119519109461256010340102100011000030100100001000010792005736044613381811200230120035120076113143311363650100302001000010000602002000010000120050120035115020110099100401001000010000010010000110000000100001010032100330552121538402110651000040100122340122312122319122336122424
50204122423917010012526344522001001200351194931095032560100401821002410026319701042311086113268357919276174220112207901225941220391138512681150945500833442113891145466950228921149612256312230328150201100991004010010000100000100100330100380291340100321010233783252899412231540380101001000040100120052120052120052121694123690

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)03080b0e0f181e1f22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5dcache load miss (bf)c2branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200478990010010101200321195051094612560010400101000010000300101000010000107952357360446133499012002301200471200471131533113686500103002010000100006002020000100001200351200471150021109104001010000100000101000111000000100001100031400610754119662400026651000040010120048120048120138120048120048
500241200478990000010101200321195081094612560013400121000110000300101000010000107952357360446133499012002301200471200471131653113686500103002010000100006002020000100001200471200471150021109104001010000100000101000011000000100001100031400410754119662400026651000040010120048120048120048120048120048
500241200478990000010101200321195051094612560013400121000110000300101000010000107952357360446133499012006201200471200471131653113686500103002010000100006002020000100001200471200471150021109104001010000100000101000011000000100001100031400410745119662400026651000040010120098120081120048120048120048
500241200358990000010101200321195051094492560013400121000110000300101000010000107952357360446133499012002301200471200351131653113686500103002010000100006002020000100001200471200471150021109104001010000100000101000011000000100001100031400610744119662400026051000040010120048120048120048120048120048
500241200478990000010101200321195051094612560010400121000110000300101000010000107952357360446133499012001101200351200471131653113686500103002010000100006002020000100001200471200471150021109104001010000100000101000011000000100000100031400410745119662400026651000040010120049120048120048120048120048
500241200478990000010101200321195051094612560021400121000110000300101000010000107952357360446133499012002301200471200471131653113686500103002010000100006002020000100001200471200471150021109104001010000100000101000011000010100001100031400410744119662400026651000040010120048120053120048120048120048
500241200478990010010101200321195051094492560013400121000010000300101000010000107952357360446133499012002301200471200471131653113686500103002010000100006002020000100001200471200471150021109104001010000100000101000011000000100001100031405410745119650400026651000040010120048120048120048120048120048
500241200478990000010101200321195051094614660013400121000110000300101000010000107952357360446133499012001101200971200351131653113686500103002010000100006002020106100001200471200481150021109104001010000100000101000011000000100001100031400410745119662400026601000040010120048120048120036120048120048
5002412004789900000460001200321195051094612560013400121000110000300101000010000107952357360446133499012002301200471200351131653113686500103002010000100006002020000100001200471200471150021109104001010000100000101000011000000100001100031400610755119662400026051000040010120048120048120100120057120048
5002412004789900010370101200321195051094622560013400121000110000300101000010000107952357360446133499012002301200351200351131653113686500103002010000100006002020000100001200471200471150021109104001010000100000101000011000000100001100031400510754119662400026651000040010120048120048120036120048120048

Test 4: throughput

Count: 8

Code:

  ldr s0, [x6, x7, lsl #2]
  ldr s0, [x6, x7, lsl #2]
  ldr s0, [x6, x7, lsl #2]
  ldr s0, [x6, x7, lsl #2]
  ldr s0, [x6, x7, lsl #2]
  ldr s0, [x6, x7, lsl #2]
  ldr s0, [x6, x7, lsl #2]
  ldr s0, [x6, x7, lsl #2]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)030b0e0f18191e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a5a6a8acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
802052672920000100450012673020020258010010080000100800145001165668126703268232670716656616684801152008002420016004826732267281180201100991001008000080000010080000430800000418004201420111511801600267071507800001002673426733267332673326708
802042673220100000440002669220120258010010080000100800135001167246126682267322673216635616659801152008002420016004826728267281180201100991001008000080000010080000430800420080041014244111511801600267371507800001002673326733267332670826733
80204267322000110044001267172012125801001008000010080015500117753412670726707267331663561668480114200800242001600482673426711118020110099100100800008000001008000003180000038004131410111511801600267471507800001002673326729267292670826733
8020426728200000004400126717012117258010010080000100800155001168428026707267072673216660616684801152008002420016004826707267321180201100991001008000080000010080000430800420418000000044111511801600267360150800001002673326733267082670826729
802042673220000011451012669221117258010010080000100800155001166596126703267322670716660616684801162008002420016004826732267321180201100991001008000080000010080000430800420418004100420111511801600267110117800001002673326708267332673326729
80204267322000000044001267172012172580100100800001008001550011684281267032670726732166606166598011320080024200160048267072673211802011009910010080000800000100800004308004104180000304244111511801600267440157800001002673526733267332673326733
802042670720000000440002669221020258010010080000100800155001166596126703267322670716656616684801152008002420016004826732267321180201100991001008000080000010080000430800000080000014244111511801600268140150800001002673326733267292673326733
8020426728200000004700026692001202580100100800001008001350011672461267032673226732166606166598011420080024200160048267322673211802011009910010080000800000100800004408004104280000314143111511801600268240114800001002673326729267292673326708
80204267322000000045101267170112025801001008000010080015500116817412668226732267321666061665980113200800242001600482673226707118020110099100100800008000001008000000800410418004101044111511801600267291504800001002670826708267332673326708
802042673220000000441012671721102580100100800001008001450011665961267032670726732166566166848011320080024200160048267322672811802011009910010080000800001100800004308004104180041314244111511801600267370117800001002673326733267332673326733

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)03080b0e0f18191e22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a8a9acafb5b6bbdcache load miss (bf)cfd5d6dbddinst fetch restart (de)e0e7? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025267282000010004510026693212017258001010800001080000501166934026683267282670816673316688800102080000201600002672426724118002110910108000080000010800000390800390038800003104350202165232673400117480000102673026725267252670926709
8002426728200000000450022671422101725800101080000108000050116889102670326728267231665231670880010208000020160000267282672411800211091010800008000011080000039080042000800383038050204164342679500117080000102673026730267292670926730
8002426729200000000450002671320121325800101080000108000050116675002668326728267291666831670980010208000020160000267282672411800211091010800008000011080000000800000038800423138050262164332670500117080000102672926712267292670926729
8002426724200000000000226725012017498040010801301080000501166934026703267282670816652316708800102080000201600002670826724118002110910108000080000010800000390800420042800423042435020216523268220077480000102670926725267302673026729
80024267282000000004500026714212121325800101080000108000050116716302670326729267081667331668880010208000020160000267282672411800211091010800008000001080000000801720008000001424350202164222673500117080000102670926729267292672926727
80024267242000000000002266932000258001010800001080000501166944026699267232670816673316709800102080000201600002672926708118002110910108000080000010800000390800000042800003038435020216422267240277280000102673026730267272672926709
8002426708200000000000026709212121725800101080000108000050116675002670426729267241667331668880010208000020160000267292672411800211091010800008000001080000039080000000800423042435020316424267300000480000102672926729267092672926709
8002426708200000000451002669300120258001010800001080000501166750126699267292673016673316708800102080000201600002672926708118002110910108000080000010800000390800000008000031384350203164452725900117280000102673026730267092670926709
80024267282000000004100226714212121325800101080000108000050116693412669926729267241665231670980010208000020160000267282670811800211091010800008000011080000000800420042800423004350202164232674600110080000102670926709267252673026725
80024267082000000004100226711200172580010108000010800005011667500267032672926728166763166888001020800002016000026729267231180021109101080000800000108000003908000010428000030005020216434267260007480000102672926737267302673426712