Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDUR (S)

Test 1: uops

Code:

  ldur s0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b0e0f1e223a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
10053893000010411135920180251000100010001484436438937421232471000100010003913891110011000100001000039100000001035600390073216113766621000390390395392390
10043892000000410237921818162510001000100015018369389389197325210001000100038140311100110001000010192001057111621038615742190731161137110641000375392395375375
100438920000000123592018025100010001000149893693893942123232100010001000381402111001100010000101820421019111211000615742191731161137110081000375395375395395
100437430000000003592121216251000100010001483834939439421232521000100010003893891110011000100001000001039000351039600000731161137110041000375375375395395
1004394300000045023842181816251000100010001537537439939922132391000100010003744091110011000100001000039100000035103960043007311611371101021000390390375375395
100439420000004502384201816251000100010001536237339939822132571000100010004093891110011000100001000039103900039100061043007311611391101041000395395375375375
100439430000000003760012162510001000100014838364389389212325210001000100037438911100110001000010000391035000010396135430073116113710001000395375395375395
100438930000004502384018181525100010001000144563563823992213240100010001000398399111001100010000101921421057000211000015742190731161139110641000395395395395395
10043943000000002379212121625100010001000148383493743742173232100010001000399398111001100010000101919421019102211000615742190731161137110641000375395395392395
1004374300000045013792121212251000100010001498936937438921732471000100010003943741110011000100001000039103500001039610430073116113799921000399400400400382

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldur s0, [x6, #1]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)030508090b0e0f18191e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512004789900001100931000112003811951510946625601034010410001100003010010000100001079097573633261364191120023012004712004711314331136705010030200100001000060200100001000012004112005311502011009910040100100001000001001000220100010011100001101103210310711119660400026001000040100120042120054120054120054120042
502041200568991001000067010012002011950910946125601034010210001100003010010000100001078862573545561338180120026012003512009511314531136675010030200100001000060200100001000012003512003511502011009910040100100001000001001000001100000000100000000003210113511119660400000651000040100120036120048120036120048120036
502041200478990000000080635210012003511950910946125601004010210001100003010010000100001078999573604461338180120063012005012005011314331136365010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100000000100001010003210110111119657400026651000040100120048120036120036120037120036
5020412003589900000000521010012003211949310944925601034010210001100003010010000100001078999573604461344610120023012004712005011314331136395010030200100001000060200100001000012005012004711502011009910040100100001000001001000001100000000100001000003210113511119660400026601000040100120051120051120048120048120051
502041200508990000000084501001200321194931094492560103401021000110000301001000010000107886257354556133818112002301200471200351131433113639501003020010000100006020010000100001200471200351150201100991004010010000100000100100000010000000010000101000321011712119660400000601000040100120036120036120048120048120048
50204120050899000000007010012003211949310946125601034010010001100003010010000100001079200573618861338180120011012004712004711314331136365010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100000000100001000003210110111119660400026681000040100120051120048120036120051120036
5020412005089900000000865010012003211949310946325601034010010001100003010010061100001078862573618861361660120023012004712004711314331136365010030200100001000060200100001000012004712003511502011009910040100100001000001001000001100000003100001010003210110111119657400000651000040100120051120038120036120036120048
502041200479000000000084835201012003211951910946125601034013410001100003010010000100001078862573604461342830120023012004712004711314131136365010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100000100100000010003210110121119660400020051000040100120048120048120048120048120048
5020412004789900000000722001012002011951910946125601034010210000100003010010000100001079200573618861338180120023012004712004711314131136365010030200100001000060200100001000012004712004711502011009910040100100001000001001000000100000000100010000003210110711119660400026601000040100120051120048120048120048120048
5020412003589900001100532000012002011951910946125601034010210001100003010010000100001078862573642861338180120023012004712004711314331136365010030200100001000060200100641000012005012003511502011009910040100100001000001001000000100000000100001000003210113511119660400009081000040100120036120051120048120048120048

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)0307080a0b0e0f18191e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5002512005189900000000701001200361195091094642560013400121000110000300101000010000107951757362366133662012001112005112005111316931136905001030020100001000060020100001000012008912006811500211091040010100001000001010000011000000010000110314012107641196504000201091000040010120036120052120052120052120052
50024120051899000000006010012003611964210946425600134001210001100003001010000100001079557573633261336620120027120051120035113169241136035027530020100001000060020100001000012006312005411500211091040010100001000001010000011000000010000110314031093411966840002101091000040010120052120052120052120052120052
5002412005189900000000101001200361195091094642560010400121000110000300101000010000107955757362366133662012002712003512005111324131136905001030020100001000060020100001000012008612007011500211091040010100001000001010000011000000010000110314031073411966640002101001000040010120052120052120052120052120052
5002412005189900000000101001200361195091094642560013400121000110000300101000010000107955757362366133662012002712005112005111316931136745001030020100001000060020100001000012005012005411500211091040010100001000001010000001000010010000110314031074411966640002101091000040010120036120052120052120052120052
5002512005193100000000601001200361195091094642560013400121000110000300101000010000107955757362366133662012002712006312005111316931136905001030020100001000060020100001000012009112005811500211091040010100001000011010000011000000310000110314041076711966640010101091000040010120036120052120036120052120052
5002412005189900001100295010012003611957510952813760013400281000110002300101005210098107951757362366133764212002912005112005111316931136905001030179100001011060020101601000012011212018521500211091040010100001000001010000011000000610000100314031074411966640002101091000040010120052120052120052120052120052
5002412005290000000000157881011202351195091094642560025400121000110000300101000010000107955757362366134783012002712015412005311316931136905001030020100001000060020100531000012006112006711500211091040010100001000001010000211000000310000110314061077611966640002101091000040010120243120053120053120147120156
5002412005189900000000001001200361195691094642560013400121000110000302931000010000107955757363326133662112002712005112005111316931136905001030020100001000060020100001005412008612008611500211091040010100001000001010000011000000010000110314031073411966640002101091000040010120052120052120052120052120052
500241200519000000000010100120020119509109483256001340012100011000030010100001000010795575736236613366201200111200511200511131703113690500103002010000100006002010000100001200511200511150021109104001010000100000101000001100000031000011031404107761196664000201091000040010120052120052120036120052120052
5002412005189900000001101001200361195691094642560013400121000110000300101000010000107955757362366133662012002712005112005111316917113776500103002010000100006002010000100001200511200521150021109104001010000100001101000001100000001000011031404107341196664000213091000040010120052120052120052120052120037

Test 3: throughput

Count: 8

Code:

  ldur s0, [x6, #1]
  ldur s0, [x6, #1]
  ldur s0, [x6, #1]
  ldur s0, [x6, #1]
  ldur s0, [x6, #1]
  ldur s0, [x6, #1]
  ldur s0, [x6, #1]
  ldur s0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)03090a0e0f191e22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a5a6a8a9acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205267362000000044001267120012162580100100800001008000050011728690267062670726727166503166658010020080000200800002672726727118020110099100100800008000001008000043080039000800396039435110116112672814107800001002670826708267282673226728
802042672720100010441002669231002580100100800001008000050011683170266822679727001166573166718010020080000200800002673126727118020110099100100800008000011008000043080039003980000013943511011611267280147800001002672926728267322673226728
802042673120000010450012669221102580100100800001008000050011728410267062670726731166303166658010020080000200800002673126727118020110099100100800008000011008000043080038003880038600435110116112672814147800001002673226732267322673226708
80204267272000000044101267122001925801001008000010080000500117323212668226727267071665031666580100200800002008000026707267271180201100991001008000080000010080000430800380038800006139445110116112670414144800001002670826708267282670826708
80204267072001100001002671600019258010010080000100800005001166525126702267072672716630316665801002008019320080000267072672711802011009910010080000800000100800000080038000800386139051101161126724007800001002670826732267282672826708
8020426707201000004500026712212002580100100800001008000050011733701267022670726707166303166898010020080000200800002673126707118020110099100100800008000001008000043080038103880039603905110116112672814140800001002672826728267082672826728
8020426727200000004400126692211162580100100800001008000050011702391267062673126707166303166658010020080000200800002670726727118020110099100100800008000001008000000800380010480000600445110116112670414147800001002670826732267082673226732
802042670720000100450012671221212162580100100800001008000050011699141267062671026730166543166658010020080000200800002670726727118020110099100100800008000001008000043080000003880038610435110116112670414147800001002672826732267282670826728
80204267072000000000012671521212162580100100800001008000050011678251267022672726727166303166858010020080000200800002670726707118020110099100100800008000001008000000800000008003961043511011611267280144800001002672826732267322672826728
80204267072001011045001267162011725801001008000010080000500116831212670626731267071665431668980100200800002008000026707267071180201100991001008000080000010080000430800000038800380139445110116112672810147800001002673226728267322673226732

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)0305080a0b0e0f1e1f22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
800252672220000011141010326718218180258001010800001080000501172370126697267222672216667316712800102080000208000026732267331180021109101080000800000108000003980035010358003561544219150201116262671906280000102672526723267232672326723
80024267222000000006500032671721818025800101080000108000050117007802670826732267151666031671280010208000020800002673226732118002110910108000080000010800192042800191016280039613539005020216222671966280000102672326723267232672326723
8002426722200000000410100266932180112580010108000010800005011662911266832671426722166543166888001020800002080000267222672211800211091010800008000011080000039800350003580035613539005020216222671960280000102672326723267232672326726
80024267222010000005001012670720181225800101080000108000050116760502670826732267331667731671280010208000020800002673226733118002110910108000080000010800000398003500035800356157421905020216222672999280000102672326812268862673426723
800242672320001000041000126707218011258001010800001080000501174614126697267222672216667316702800102080000208000026708267221180021109101080000800000108000003980035000358003561350005020216222671966280000102672326723267232672326723
800242672220000000000101267072181812258001010800001080000501173655026697267082670816652316702800102080000208000026722267081180021109101080000800000108000003980035000080000013539005020216222671966280000102672326723267092670926723
800242672220000000000101267070181812258001010800001080000501172941026697267222670816729316702800102080000208000026722267221180021109101080000800000108000000800350003580035613539005020216222671966280000102672326723267092672326723
800242672220000010065000126707018181225800101080000108000050117269402669726722267221665231668880010208000020800002672226722118002110910108000080000010800000080035000080035003539005020216262670566280000102672326709267232672326723
8002426708200000000410001266932181812258001010800001080000501166750026697267222670816667316833800102080000208000026722267221180021109101080000800000108000000800000003580035613539005020216622671966280000102672326723267232672326723
80024267222000001102101032671721818142580010108000010800005011730320267082672226708166673167028001020800002080000267222672211800211091010800008000001080000039800350003580035603539005020216622670566280000102672326723267092672326709