Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (register, D)

Test 1: uops

Code:

  ldr d0, [x6, x7]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)03090e0f1e22243a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
100539421100001379312121625100010001000150371369394394220325210001000200039439411100110001000110004310393910396139437331622391101041000395395395395396
1004394300045001379212121625100010001000150370369394394217325210001000200039439411100110001000010004310393810386139437331633395101071000399399399399399
1004394301044101383212121925100010001000152740373394398221325210001000200039439411100110001000010004310383810386139447331633391101471000399399399399399
10043983000440013832111925100010001000152740373398398221325610001000200039839411100110001000010004310383810386139447331633395141441000399399399399399
10043983000440013790111925100010001000152670373398398221325610001000200039939711100110001000010004310383810386138437331633391141071000399399399399399
1004394300044001383212122225100010001000152670373394395221325210001000200039439411100110001000010004310383810386139447331633395101071000399399399399399
10043982000440013832111925100010001000149890373398398221325610001000200039839411100110001000010004310393810396139447331622395141071000399399399399399
10043983000441013842111925100010001000150860369398398221325610001000200039439411100110001000010004310383810006139447331633395141471000399399399399399
100439830004400138321211925100010001000152670369398394221325610001000200039439411100110001000010004310393810386139447331623395141471000399399399395399
10043943000440013832111925100010001000152741373398398221325610001000200039839511100110001000010004310383810406139447331633395141071000399399399399399

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, x7]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire (01)cycle (02)030508090b0e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50205120047900000000002960100012002011951410946825601034010610000100003010010000100001078925573618861338181200261200501200351131370311363650100302001000010000602002000010000120050120035115020110099100401001000010000110010000001000000010000001000003210113511119660400029651000040100120051120048120048120051120051
50204120050899000000002050000012003511949310946125601034010010001100003010010000100001078862573618861338181200111200351200501132180311365850100302001000010000608422000010000120035120047115020210099100401001000010000010010000001000000010000100000003210110101119657400369081000040100120036120036120051120048120051
50204120050954001211001480000112004511951910945825601064010410002100003010010000100001079062573652461334581200171200601200621132410311368050100302001000010000602002000010000120057120058115020110099100401001000010000010010001101000220161000011110000321011071111966440004010121000040100120061120064120064120049120056
502041200359640000010010001012004111951510953325601064010410002100003011110010100061077902573671061339991200261200501200531132360311366750100302001000010000602002000010000120130120050115020110099100401001000010000010010000011000000010000001000003210110711119646400029681000040100120051120051120051120051120051
5020412005089900000000100000120035119519109461256010340102100011000030100100001000010792005735455613381812001112005012005611314403113667501003020010000100006020020000100001200351200471150201100991004010010000100001100100000110000000100001010000132210160011973040004100121000040100120061120061120042120061120042
502041200608991000000060001012003911951410951425601004010210001100003010810008100061079120573661461367831200301200571200571132370711373250122302281001010010602562002010010120054120051115020110099100401001000010000010010000011000000010000101001113220016011197344000200121000040100120052120036120055120055120055
502041200548990000000000001012002011949310946725601004010210000100003010010000100001079035573638061363171200271201751200471131610311366850100302001000010000602002000010053120097120051115020110099100401001000010000110010000011000010186100001010000032101107111196614000001301000040100120055120055120052120055120041
5020412005489900001111178880010120236119493109468256010340102100011000030100100001000010790625736380613631712001112004112010211315003113671501003020010000100006020020000100001200351200511150201100991004010010000100000100100000110000000100000010000032103107111198034005613091000040100120036120052120058120055120052
5020412003689900001000120001012005011951310946725601034010210001100003010010000100001079008573652461363171200301200571201071131460311367250100302001000010000602002000010000120051120036115020110099100401001000010000010010000001000010610000101000003210110111119661400021313121000040100120052120095120063120085120048
502041200498990000000060001012002011950910946125601034010010001100003010010000100001078999573545561338181200261200541200501131410311365850100302001000010000602002000010000120050120047115020110099100401001000010000010010000011000000010000111120003210110111119660400140651000040100120048120036120048120036120036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire (01)cycle (02)0308090e0f1e22243f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8a9acafb5dcache load miss (bf)c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50025120051899010090012002011949210944925600354002110004100003001010000100001079557573623661336620120030012005412003511317231136935001030020100001000060020200001000012003512005111500211091040010100001000011010000110000000100001100314031072211966940002101091000040010120036120036120055120036120055
50024120051899010000012003911949210946725600104001210000100003001010000100001082126573844461337150120030012003512003511316931136935001030020100001000060020200001000012003512005111500211091040010100001000001010000110000002410000100031402107331196694000010001000040010120055120036120052120052120055
50024120054899000010012003911950910946725600104001210001100003001010000100001079584573545561336620120011012005412003511315331136935001030020100001000060020200001000012005412003511500211091040010100001000001010000110000000100001100314021072211966940002013121000040010120055120055120055120055120055
5002412005489900001101200201195121094642560013400121000110000300101000010000107958457363806132757112001101200351200351131533113693500103002010064100006002020130100001200351200521150021109104001010000100000101000011000000010000110031402107221196504000010091000040010120055120052120052120055120052
5002412005189901001001200201195121094672560013400101000010000300101000010000107951757363806133662012002701200541200541131533113693500103002010000100006002020000100001200541200511150021109104001010000100000101000011000000010000110031402107331196664000213091000040010120055120055120055120052120036
500241200548990000110120039119492109467256001340012100011000030010100001000010795845736380613366201200110120054120054113153311367450010300201000010000600202000010000120035120051115002110910400101000010000010100000100000001000011003140210732119666400021310121000040010120055120055120055120055120055
50024120054899000010012003911951210946725600104001210001100003001010000100001079517573638061336621120030012003512003511317231136745001030020100001000060020200001000012005412005111500211091040010100001000001010000010000000100000100314021073211980740002100121000040010120036120052120052120052120055
50024120035899000011012003911950910946725600134001210001100003001010000100001079557573623661338150120030012009912003511316931136935001030020100001000060020200001000012005412003511500211091040010100001000001010000110000000100001000314021073311966640000131301000040010120055120055120055120036120036
500241200358990000100120039119492109467256001340010100011000030010100001000010795575736380613381511201143120035120058113172311369050010300201000010000600202000010000120035120051115002110910400101000010000010100001100003000100001100314021072211966640002130121000040010120055120055120055120056120052
50024120035899000011012003611950910944925600134001210001100003001010000100001079584573638061327571120027012006112003511316931136745001030020100001000060020200001000012005112003511500211091040010100001000001010000110000000100001100314021072211971840002101001000040010120055120055120052120052120055

Test 3: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, x7]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0056

retire (01)cycle (02)03050708090b0e0f181e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512005389910111110100001120038119727109469256010640102100011000030100100001000010790535736332613641911200321200411200411131372211367550100302001000010000602002000010000120056120053115020110099100401001000010000010010001301000231110000111130032100310722119654400049081000040100120059120057120057120042120042
502041200568991011100020000112004111951510947425601064010410002100003010010000100001079053573575061364191120017120056120056113137311363050100302001000010000602002021410000120056120053115020110099100401001000010000010010001311000201110000010110032100213522119660400020001000040100120057120057120057120414120042
502041200568991100011010000012003511950910946325601004010210002100003067110000100001079097573647661365721120032120041120053113148311363050100302001000010000602002000010000120041120053115020110099100401001000010000010010003111000151110000010110032100210722119654400566681000040100120057120042120042120059120057
502041200568991000111020000012004111951310946925601034010210002100003010010000100001079053573652461367251120029120056120041113138311363050100302001000010000602002000010000120053120041115020110099100401001000010000010010003301000600110000010130032100210722119660400046951000040100120054120042120057120057120042
5020412005689910111000201000120041119515109455256010640102100021000030100100001000010790975736332614685011200181200531200411131513113670501003020010000100006149220000100001200531200531150201100991004010010000100000100100011110001001310000110110033270210723119663400049981000040100120042120042120057120057120062
502041200418991010200050100012003111951610945525601034010210001100003010010000100001079035574601261335611120032120056120060113137311367350100302001000010000602002000010000120053120053115020110099100401001000010000010010002201000210110000010110032100210722119661400046081000040100120042120054120057120057120042
502041200568991011000310100012004111951510946925601034010410002100003010010000100001079026573647661364191120032120060120056113137311363050100302001000010000602002000010000120044120418115020110099100401001000010000010010001211000202110000111110032100213522119654400029681000040100120042120054120054120060120057
50204120056899101111101000011222681206641101796596042340321100521005033677113651122611362425792533619298311218881208051223271138793111149715620934451113401134768578227681133612239412239226150201100991004010010000100000100100021110002112510000111130039610237622121592400040951000040100120057120042120042120055120612
50204120056105310100110140100012004111952610990011306058740467100441008436527124761000011219775807519621950411200361200601200601131553113630501003020010000100006020020000100001200601204391150201100991004010010000100000100100013110001201100001101100321002107221196544000201091000040100120061120046120437120061120058
5020412006089910111100100001120042119519109455256010640102100021000030100100001000010790895735750613345811200331200411200601131553113677501003020010000100006020020000100001200411200571150201100991004010010000100000100100021110001001100000111000321002107221196644000213001000040100120042120061120042120058120058

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)030508090b0e0f18191e22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200578991111110011012003911949210946725600104001010001100003001010000100001079584573638061336620120027120054120035113153311369850010300201000010000600202000010000120054120051115002110910400101000010000110100000110000001000010100314041072211966940002131001000040010120055120036120055120052120055
500241200559000000100010012003911949210946725600134001010001100003001010000100001079517573638061336620120011120035120054113172311369050010300201000010000600202000010000120035120051115002110910400101000010000010100000110000001000000100314021072211965040000010121000040010120055120055120055120036120036
500241200358990000100013200120095119509109505256001340012100011000030010100001000010795845735455613366211200351200541200541131533113690500103002010000100006002020000100001200541200511150021109104001010000100000101000001100000010000101003140110722119669400001310121000040010120052120055120055120055120055
5002412005490000001030000120039119512109467256001340010100001000030010100001000010795355736380613381511200301200541200541131544111369050010300201000010064600202000010000120052120051115002110910400101000010000010100000110000001000000102314021072211966640000130121000040010120039120036120055120055120055
500241200548990000110000012003911968110947225600104001010001100003001010000100001079517573545561336621120011120077120051113172311369050010300201000010000600202000010000120054120035115002110910400101000010000010100060010000001000000100314011071111966640002010121000040010120055120036120036120055120036
500241200548990000100011012003911951210946725600104001210001100003001010063100001079584573638061327570120011120054120035113169311367450010300201000010000600202000010000120054120051115002110910400101000010000010100000010000001000010000314021072211966940002131091000040010120055120036120052120052120036
50024120054899000010001001200391195121094672560013400101000010000300101000010000107958457363806133662012003012005412003511317231136745001030020100001000060020200001000012005412005111500211091040010100001000001010000011000000100001010031402107221196504000013091000040010120055120036120055120036120055
50024120035899000010001101200391194921094672560013400101000010000300101000010000107958457363806133662112003012005412005411317231136935001030020100001000060020200001000012005412003511500211091040010100001000001010000001000000100001010031402107221196664000001391000040010120055120052120052120055120055
5002412005489900001100410120020119492109467256001040012100011000030010100001000010795575736380613275711200301200351200351131723113674500103002010000100006002020000100001200351200511150021109104001010000100000101000001100000010000101003140310723119650400021010121000040010120054120052120055120036120052
500241200519000000100012001200391195171094672560010400121000110000300101000010000107958457354556133662112003012005412005111317231136935001030020100001000060020200001000012005412005111500211091040010100001000011010000001000000100001000031401107221196664000001301000040010120036120055120055120055120036

Test 4: throughput

Count: 8

Code:

  ldr d0, [x6, x7]
  ldr d0, [x6, x7]
  ldr d0, [x6, x7]
  ldr d0, [x6, x7]
  ldr d0, [x6, x7]
  ldr d0, [x6, x7]
  ldr d0, [x6, x7]
  ldr d0, [x6, x7]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)0307090a0e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a5a6a8acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020526724200111110100267072181881258010010080000100800155001167263126697267222670716650616674801142008002420016004826722267221180201100991001008000080000010080000390800350358003561353911151181161126719662800001002670826723267082679826732
802042672220010100411012670720186325801001008000010080014500115981402669726722267221665061667480114200800242001600482672226707118020110099100100800008000001008000039080036138003501353911151181161126704662800001002672326708267232677926723
8020426722200101104110126707218181225801001008000010080014500116836912668226722267221665061667480114200800242001600482672226707118020110099100100800008000001008000039288003551358003561363911151181161126724662800001002672326723267232672326887
80204268482001010041101267072180732580100100800001008001450011706220266972672226722166356166748011520080024200160048267222670711802011009910010080000800000100800000080035138800000103911151182161126719060800001002672326723267232678526718
8020426722200101004110026707218181325801001008000010080015500117005012668226722267221665561667480114200800242001600482672226722118020110099100100800008000001008000039080035038800356135011151181161126719660800001002672326723267232688026730
8020426727201101004110126707218184325801001008000010080015500116787502669726707267221665061667480114200800242001600482672226722118020110099100100800008000001008000039080035035800356103911151181161126704062800001002672326723268012677126733
80204267222001010041101266920181878258010010080000100800145001166888026697267222672216650616674801142008002420016004826722267221180201100991001008000080000010080000390800351358003500353911151181161126719662800001002670826723267082684226731
80204267222001010041011267072181875258010010080000100800155001167875026697267222672216650616674801152008002420016004826722267221180201100991001008000080000010080000390800001358003560353911151181161126719062800001002672326724267232681226733
8020426722200101000001267072181814258010010080000100800145001167875026697267222672216650616674801142008002420016004826722267221180201100991001008000080000010080000390800351388003561353911151181161126719662800001002672326723267232678926723
80204267222001010041001267072007525801001008000010080015500116659612669726722267221665061667480115200800242001600482672226722118020110099100100800008000001008000039080035108003500353911151181161126704662800001002672326708267232680827318

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)0305090b0e0f1e22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)cfd0d5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002526728200010004510126716212019258001010800001080000501167124126683267312670816676316688800102080000201600002672726727118002110910108000080000010800000800000388000060044502004165326728140780000102673226709267212673226732
8002426731200010004400126716211219258001010800001080000501167201026706267312673116652316711800102080000201600002672826728118002110910108000080000010800004380000038800386139445020051635267051410780000102670926728267092672826709
800242673120001000010126693201219258001010800001080000501166750026683267312673216652316688800102080000201600002673126708118002110910108000080000010800000800001428000000043502005165726728014080000102673226732267092672826709
80024267082000100044001267162101925800101080000108000050116712402670626731267271665231668880010208000020160000267082672711800211091010800008000011080000080038038800006139445020041655267051414480000102673226709267282673226728
8002426731200010004510126693211162580010108000010800005011671240267152670826731166723166888001020800002016038426894267301180021109101080000800000108000043800380388003861390502006165526728014780000102673226732267092672826732
80024267312010100044100267122110258001010800001080000501168843026683267312670816652316708800102080000201600002673126727118002110910108000080000010800004380039008003801044502005163526705140780000102673226732267322670926709
800242670820001000000126716210025800101080000108000050116675002670226731267311667631670880010208000020160000267312670811800211091010800008000001080000080039038800380139435020051676267241410480000102672926729267292670926728
80024267312000101000012671621201925800101080000108000050116720102668326708267311665231671180010208000020160000267312670811800211091010800008000001080000438003904180000600445020051674267051414780000102673226732267322673226732
8002426731200010010000266932111925800101080000108000050116675012668326708267311667631671180010208000020160000267312670811800211091010800008000001080000080039008003800384350200516542670500780000102670926732267322673226732
8002426708200010104410126716210025800101080000108000050116712402671026731267311667231670880010208000020160000267082672711800211091010800008000001080000448003803880038613943502005165326728010780000102673226709267282673226728