Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (register, uxtw, D)

Test 1: uops

Code:

  ldr d0, [x6, w7, uxtw]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b0e1e223a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)92inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
10054032101106602388377192510001000100015508378403404225326110001000200040340211100111000100011020214310590006110406159431907321611400131351000404403404416404
10044033111106603388277182510001000100015621378403404225326110001000200040340311100101000100001020204310590006010406158431917311611399131351000404404403405404
10044033111106613387377192510001000100015555378404403225326110001000200040340211100111000100001020194310591012110406158431907311611400131351000407404404404403
10044053110006703388277202510001000100015480379404403225326010001000200040340311100101000100001019194310591016110406158431907311611399131351000404404403404404
10044043110006703388277192510001000100015524378403403225326110001000200040240311100101000100001021194310581016110406159451917311611400131351000404404404403404
10044033101006712388377192510001000100015526378402403225326110001000200040340311100101000100001019204310591016010406158431917311611400131351000403404404404404
10044022111106702388377202510001000100015480378405404225326110001000200040340211100111000100011020214310591026110396159431927311611400131351000403404404404404
10044023110106702388377202510001000100015480378403403225326010001000200040340311100101000100011019194310581016110406159431917311611399131351000404404405404404
10044032110106703388377202510001000100015480378403403226326010001000200040340311100101000100001019204310590016010406158431917311611399131351000404403403404404
10044033110106712388377192510001000100015506378403403225326410001000200040340311100101000100001020214310591006110426159431927311611400131351000404404404404404

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, w7, uxtw]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)0305080b0e0f1e1f22243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
502051200538990000037010012003511951910960025601004010210001100003010010000100001078862573618861338181120026120050120050113145311365850100302001000010000602002000010000120035120035115020110099100401001000010000110010000011001000010000101003210110711119657400009081000040100120036120036120051120051120054
50204120050899000001010012002311950910946325601154013610001100003010010000100001078999573618861361660120026120050120035113145311363650100302001000010000602002000010000120047120076115020110099100401001000010000010010000011000000010000101003210110121119660400020681000040100120051120036120036120051120036
50204120050899000101010012003511950910946325601034010210000100003010010000100001078999573604461344610120026120050120050113145311363650100302001000010163605842000010000120047120047115020110099100401001000010000010010000011000000010000101003210110111119657400020601000040100120051120036120051120036120036
502041200508990000010000120035119509109463256010340102100011000030100100001000010789995736188613616611200261200511200531131423113667501003039710000100006020020000100001200501200471150201100991004010010000100000100100000110000001414710000101003210110714119660400029681000040100120056120051120051120051120056
50204120050899000001000012002011950910944925601034010210000100003010010000100001078862573618861338181120026120050120050113143311366750384302001000010000602002000010000120050120035115020110099100401001000010000010010000011000010010000101003210110111119660400029681000040100120036120036120051120051120036
50204120047899000010000012003511951910946325601034010010001100003010010000100001079200573618861361661120011120050120050113145311366750100302001000010000602002000010000120050120035115020110099100401001000010000010010000011000000010000001003210113411119665400339081000040100120051120051120051120051120052
50204120050899000001000012004011950910946325601034010010001100003010010000100001078999573618861361661120026120035120050113244311366750100302001000010000602002000010000120047120035115020110099100401001000010000010010000011000000010000101003210115711119657400020951000040100120052120425120051120036120048
50204120035899000001000012003511950910946125601034010210000100003010010000100001078862573618861361661120023120050120035113145311365850100302001000010000602002000010000120050120055115020110099100401001000010000010010000001000000010000101003210110711119661400029681000040100120051120036120051120036120051
50204120050899000000010012003511950910946825601034010210000100003010010000100001078999573618861338181120068120050120050113141311366750100302001000010000602002000010000120050120035115020110099100401001000010000010010000011000010010000101003210110711119657400009601000040100120036120051120048120036120036
50204120035899000001010012003211950910944925601034010010001100003010010000100001078999573618861361660120026120050120050113145311366750100302001000010000602002000010000120035120035115020110099100401001000010000110010000011000000610000001003210110731119660400026651000040100120051120036120051120048120048

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0057

retire (01)cycle (02)0305080b0e0f18191e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50025120057899110001220010112004211962010951225600164001410002100003001010052100981079611573652461339681200331200571200571131753113700500103002010000100006002020000100001200571200571150021109104001010000100000101000131100018011000011110031402107221196564000404591000040010120058120042120058120042120059
500241200579001020122179176001120042119515109470256001640014100021000030010100001000010796115736524613396812003612005712005711315931136965001030020100001000060020200001000012005912005711500211091040010100001000001010003111000212110000111110314031072211967240004102891000040010120059120063120059120058120058
5002412004190011100002000012004211951510947025600164001410002100003001010000100001079611573652461332161200331200571200571131753113680500103002010000100006002020000100001200571200571150021109104001010000100000101000221100010011000011111031403107321196724000401091000040010120058120058120058120058120058
5002412004189911100001000112002611951510947025600164001210002100003001010000100001079463573575061332161200331200571200411131593113696500103002010000100006002020000100001200411200411150021109104001010000100001101000220100010011000001111031402107221196724000201091000040010120058120058120058120058120058
50024120057899100010020101120026119515109470256001340014100021000030010100001000010796115736524613396812001712004112004111317531136805001030020100001000060020200001000012005712004111500211091040010100001000011010002111000202110000111110314021073311965640004101091000040010120058120058120058120058120042
50024120057899101000010001120026119515109470256001640014100021000030010100001000010818485735750613396812003312005712005711315931136965001030020100001000060020200001000012005712004121500211091040010100001000001010001211000100110000111100314021072211967940004101091000040010120058120042120058120507120058
5002412005789911000002010112004211951510945525600164001410001100003001010000100001079611573652461332161200331200411200411131753113696500103002010000100006002020128100001200571200411150021109104001010000100001101000111100026011000011110031402107241196724000401091000040010120058120042120058120042120042
50024120057899110000020101120042119499109455256001640012100021000030010100001000010794635736524613506512003312005712005711315931136805028130020100001000060020200001000012005712005711500211091040010100001000001010002101000200110000110110314021072211967240004101001000040010120058120058120058120058120058
50024120057899110000020001120026119499109470256001640014100021000030010100001000010794635736524613396812003312005712005711315931136965001030020100001000060020200001000012005712005711500211091040010100001000011010003111000101110000010110314021072211965640004101091000040010120058120058120058120058120105
50024120057899101110020101120042119515109470256001640012100011000030010100001000010827255735750613396812003612005712005711317531136965001030020100001000060020200001000012005712005711500211091040010100001000011010001311000211110000111110314021072211967240004102291000040010120058120058120058120058120058

Test 3: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, w7, uxtw]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire (01)cycle (02)030e0f18191e2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
502051200478990000131001120041119513109466256010640104100011000030100100001000010790535736476613345811200360120056120117113143031136735010030200100001000060200200001000012005612005311502011009910040100100001000001001000110100031041000011003210210311119661400399681000040100120039120051120036120051120051
502041200478990000100001200341195091094612560103401001000110000301001000010000107899957354556133818112002601200501200351131580311365850100302001000010000602002000010000120047120047115020110099100401001000010000110010002011000014033861000001003210110111119647400006981000040100120051120036120036120051120051
50204120035899000010100120035119509109463256010040102100001000030100100001000010788625736188613446111200110120050120047113144031136675010030200100001000060200200001000012003512004711502011009910040100100001000001001000001100000001000011003210110711119657400029681000040100120051120036120036120051120036
5020412005089900005500011200381195191094612560103401001000110000301001000010000107920057361886133818112002301200501201051131460311366750100302001000010000602002000010000120050120035115020110099100401001000010000010010000011000015001000010003210110711119646400020001000040100120051120051120051120036120051
50204120035899110010000120035119493109463256010340102100011000030100100001000010788625735455613446111200110120047120092113143031136675010030200100001000060200200001000012003512003511502011009910040100100001000001001000000100000001000011003210110111119657400029081000040100120051120052120048120036120051
502041200508990000730000120035119493109463256010340100100011000030100100001000010788625735455613616611200260120035120047113165031136675010030200100001000060200200001000012005012003511502011009910040100100001000001001000001100000001000010003210110711119657400009001000040100120036120036120051120051120051
50204120050899000010000120020119509109449256010040102100011000030100100001000010792005736188613381811200550120035120035113182031136585010030200100001000060200201341000012005012004811502011009910040100100001000001001000001100009101000010003247110111119657400020081000040100120052120410120037120083120036
502051201388990000000001200201195091094632560103401001000310000301001000010000107920057354556158294112025001200361200501131870521136685010030200100001000060200206401021612003512004711502011009910040100100001000001001000001100020001000210203266110751119657400009601000040100120051120051120051120051120157
50204120050900004060100120020119519109463256010340102100011000030100100001000010789995736188614349311200260120050120176113184031136675010030200100541000060200200001000012004712009111502011009910040100100001000001001000001100000031000011003210110111119657400000681000040100120051120036120051120051120051
50204120050899000018010012003511951010946125601004010210001100003010010000100001079200573545561338181120026012004812003511314103113658501003020010000100666020020106100001200621200471150201100991004010010000100001100100000110000000100001000321011352211966140002131091000040100120058120055120105120055120052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)03050b0e0f1e22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcficache miss (d3)d5d6dbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50025120057899111021001200451196121094672560010400121000010000300101000010000107955757362366132757012002712003512005411317231136905001030020100001000060020200001000012005112005711500211090104001010000100001101000001100000001000011111000314002010705171196754000213091000040010120061120042120042120058120042
500241200578991010100012002611949910945525600164001410001100003001010000100001079638573666861339680120036120060120057113178311369650010300201000010000600202000010000120041120041115002110901040010100001000001010000011000000010000101000003140071070171711966640010010121000040010120036120105120091120036120055
500241200548990000110012003611951210965925600134001210001100003001010000100551079584573545561336620120027120035120051113169311367450010300201000010000600202000010000120035120119115002110901040010100001000001010000011000000010000101000003140017107017611966940002101091000040010120055120055120055120055120055
500241200548990000000012003911951210946725600104002010000100003016610000100001079517573638061343070120030120035120035113153311369050010300201000010000600202000010000120054120057115002110901040010100001000001010000011000000010000001000003140017107017171196664000201001000040010120036120055120055120052120055
5002412003589900100100120039119512109472256001340012100001000030010100001000010795845735455613275701200111200591200541131723113693500103002010000100006002020000100001200541200961150021109010400101000010000010100000110000000100001010000031400171070717119669400021313121000040010120060120036120060120055120055
5002412003589900000100120039119492109467256001040010100011000030010100001000010795845735455613366201200301200591200541131693113674500103002010000100006002020000100001200541200571150021109010400101000010000010100000110000000100001010000031400171070717119650400021310121000040010120055120060120055120036120055
500241200358990000000012003911951210946725600104001010001100003001010000100001079557573545561338150120027120054120035113172311369050010300201000010000600202012810000120054120164115002110901040010100001000001010000011000000010000000000003140017107081711966940002010121000040010120055120055120036120055120055
500241200549000000110012003911951710946425600134001210000100003001010000100001079517573623661336621120027120035120035113172311369050010300201000010000600202000010000120035120057115002110901040010100001000001010000011000010010000001000003140017107081711966640002131001000040010120036120036120055120052120052
5002412005189900001000120020119512109467256001040012100011000030010100001000010795845736380613366201200311200541200541131723113690500103002010000100006002020000100001200541201301150021109010400101000010000010100000110000000100000010000031400171070171711966940002131091000040010120052120052120052120052120055
500241200518990001010012003911951210944925600134001010001100003001010000100001079517573638061338150120011120054120054113153311369350010300201000010000600202000010000120054120057115002110901040010100001000001010000011000000010000100000003140017107017711965040002131001000040010120055120055120036120052120055

Test 4: throughput

Count: 8

Code:

  ldr d0, [x6, w7, uxtw]
  ldr d0, [x6, w7, uxtw]
  ldr d0, [x6, w7, uxtw]
  ldr d0, [x6, w7, uxtw]
  ldr d0, [x6, w7, uxtw]
  ldr d0, [x6, w7, uxtw]
  ldr d0, [x6, w7, uxtw]
  ldr d0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)03090b0e0f18191e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020526723200100100690101266922018122580100100800001008001550011678751026701267312672216650616659801142008002420016004826707267221180201100991001008000080000110080000039800000080035610391115118001600267190660800001002670826723267232672326723
80204267222000011000000126707001802580100100800001008001550011678751026687267232674016650616659801152008002420016004826707267221180201100991001008000080000010080000039800350080000013601115118001600267190660800001002672326708267232672326728
802042672220000010000101266920000258010010080000100800155001166596002670426722267221663561667480114200800242001600482672226707118020110099100100800008000001008000003980000035800356035391115118001600267190660800001002672326708267082672326723
8020426722200000100410000266922180025801001008000010080014500116659600267102670726722166506166748011520080024200160048267072670711802011009910010080000800000100800000398003503580035013501115118001600267040062800001002672326708267082672326723
8020426722200000100000112670720012258010010080000100800145001167875002671326707267221663561665980115200800242001600482670726722118020110099100100800008000001008000003980000035800356035391115118001600267240662800001002672326723267082670826723
802042672220000010041001126709018180258010010080000100800155001166596002671426729267241665061667480115200800242001600482672226707118020110099100100800008000001008000003980000035800356135391115118001600267190060800001002670826708267082670826723
80204267222000001004100112671001818025801001008000010080015500116787500266972672926837166586166598011420080024200160048267222672211802011009910010080000800001100800000398003500800350135391115118001600267040660800001002672326723267232672326723
802042670720000010041000026692018181225801001008000010080015500116659610267232670926707166356166748011520080024200160048267222670711802011009910010080000800000100800000080035035800356035391115118001600267040060800001002670826708267082672326723
802042672220000010041001026712200325801001008000010080014500116659600266882670726722166506166598011520080024200160048267222670711802011009910010080000800000100800000398003503580000603501115118001600267040002800001002672326723267232672326708
802042672520000010041001126707201812258010010080000100800145001167875002670326722267221663561667480115200800242001600482672226722118020110099100100800008000001008000000800000080000610391115118001600267040660800001002670826723267082672326708

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)03090e0f18191e223a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a5a6a8acafb5b6bbdcache load miss (bf)cfd0d5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002526722200100007110266932181811258001010800001080000501167605126847267302672216671316702800102080000201600002672226722118002110910108000080000010800003908003503580035610395020016161572671966280000102672326723267232672326723
8002426722200000004111267072181811258001010800001080000501167605026811267382672816667316702800102080000201600002672226708118002110910108000080000010800003908000000800350135395020017161782670566280000102672326723267232670926723
800242672220100000410126707201812258001010800001080000501167605026835267372672716667316688800102080000201600002672226722118002110910108000080000010800003908003500800000103950200171617172671906280000102672326723267092672326709
8002426722200000000112670721818025800101080000108000050116760502675926728267291668131670280010208000020160000267222670811800211091010800008000001080000390800350080035013639502047168172672766280000102670926723267232672326723
8002426722200000004111267072181812258001010800001080000501167605026860267732688216654316702800102080000201600002672226722118002110910108000080000010800003908003500800356135395020017161762671906080000102672626712267232672326723
8002426726200000006511267072181812258001010800001080000501167605026847267162672716667316688800102080000201600002672226722118002110910108000080000010800000080035035800350135395020017161782671906280000102670926709267232672326723
80024267222000000086112669321818122580010108000010800005011667500268462673226714166523167028001020800002016000026722267221180021109101080000800000108000039080000038800356135395020017166172671906280000102672326723267092672326723
800242672220001100610266932181802580010108000010800005011676051268402673226737166523167028001020800002016000026722267221180021109101080000800000108000039080035035800356135395020017161482671966280000102670926723267232672326723
800242672220000000411126707218181225800101080000108000050116675012681226734267271666731670280010208000020160000267222672211800211091010800008000001080000390800350368003561353950200181617172671960280000102670926723267232672326723
800242672220000040501026707218181225800101080000108000050116760502669726790268821680831670280010208000020160000267222672211800211091010800008000001080000430800000080035603539505108161762671960280000102670926723267232670926709