Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (unsigned offset, S)

Test 1: uops

Code:

  ldr s0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b0e0f1e22243a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)92inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
100540331100004510138801212162510001000100014989036937439421732521000100010003943741110010100010000101919431019100010006139000732162237101451000399401375399375
100437430000004410139001211625100010001000152670374398398224325610001000100039437411100101000100001019190107300021104060594519173216223950051000404404383409404
10043813100000670013880012025100010001000140601374398394221323210001000100039437411100161000100011000043100000038103800044007321622395101101000399375405399399
1004374300000100013880001925100010001000152670349398394221323210001000100039439411100101000100001019200106000161104061580191732162240013001000404404382403404
100440531100004510139030120251000100010001527403733983982213232100010001000396394111001010001000010000431039000381039613900073216223910041000400375400399399
10043773000000531013882001925100010001000160880373398374217325610001000100039839411100101000100001020204310581002110376104300732162239101351000404404404383383
100438131101110001398077125100010001000152740349374374197325610001000100039839411100101000100011000043103800038100001384400732162239501351000375376399375375
10043742000000461003872771925100010001000155080356403402225326010001000100039839411100101000100001000043103800001000613900073216223710071000399399375399399
100439830000004410038727719251000100010001531503493743742213232100010001000398394111001010001000010201901059101611041603900073216223710041000375375375375399
1004398300000044101388011162510001000100014989034937437422132561000100010003983941110010100010000102019441059101611040601943191732162240013051000404403404382404

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6, #8]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0074

retire (01)cycle (02)0305080e0f18191e2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50205120057899000100610001200201194931094672560103401021000010000301001000010000107886257363806136317112001112007712007111314631136685010030200100001000060200100001000012005112005411502011009910040100100001000011001000000100000001000010100003210110711119658400021310121000040100120055120055120055120055120055
5020412005189900010010000120039119513109467256010340102100011000030100100001000010790355735455613631701200111200941200531131493113671501003020010000100006020010000100001200351200351150201100991004010010000100000100100000110000003100001010000321011071111964640000131091000040100120036120036120055120052120055
502041200548990000006000012002011951010944925601034010010001100003010010000100001078862573623661363171120030120090120096113141311367150100302001000010000602001000010000120054120054115020110099100401001000010000010010000001000000010000001000032101107111196614000001301000040100120055120055120036120052120055
5020412005189900000010000120020119513109467256010040100100011000030100100001000010790355736380613446101200301201211200841131413113671501003020010000100006020010000100001200511200351150201100991004010010000100000100100000110000000100001010000321011071111964640000013121000040100120055120060120055120055120036
5020412005489900000010000012003911951310946725601034010010000100003010010000100001079035573638061363171120030120109120053113146311365850100302001000010000602001000010000120054120051115020110099100401001000010000010010000001000000010000101000032101107111196584000001391000040100120052120036120052120052120052
502041200548990000006000012003611951010946425601034010010001100003010010000100001079008573710161383920120030120108120069113149311375650100302001000010000602001000010000120035120035115020110099100401001000010000010010000001000000010000101040032101107111196624000201001000040100120036120052120036120052120055
5020412005189900000010000120022119513109449256010340102100011000030100100001000010790355735455613446101200271200811200591131493113668501003020010000100006020010000100001200541200351150201100991004010010000100000100100000110000000100000000000321021071111965840002101001000040100120055120052120036120036120055
50204120102899000000100001200201194931094672560103401001000110000301001006210000107902657363806134461012003012007912006411314931136685010030200100001000060200100001000012005412005111502011009910040100100001000001001000001100000001000010100003210110711119658400021310121000040100120055120055120036120036120055
502041200359000000000100012003911951310944925601034010210000100003010010000100001079035573545561363170120030120097120066113141311365850100302001000010000602001000010000120035120051115020110099100401001000010000010010000011000000010000101000032101251111966140002130121000040100120055120055120055120055120060
502041200518990000001100012003911951310946425601034010210001100003010010000100001079182573916461367300120011120035120054113149311367150100302001000010000602001000010000120051120035115020110099100401001000010000010010000001000000010000101000032101101111196644000210091000040100120055120036120060120036120055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0053

retire (01)cycle (02)030508090b0e0f18191e1f22243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50025120053899101101002720000120041119499109469256001640014100021000030010100001000010796025736620613376419120057120056120100113216031136805001030020100001000060020100001000012004112005311500211091040010100001000010100022110002011100001101131409910744119671400049681000040010120057120042120057120042120042
5002412004189910000000440000120041119514109466256001340014100021000030010100001000010794635736476613376419120032120056120067113171031136925001030020100001000060020100001000012004112004111500211091040010100001000010100022110002011100000111131409310734119668400049951000040010120042120057120057120042120054
5002412005389911100000740000120041119499109455256001640014100021000030010100001000010794635736476613376419120031120056120053113218031136955001030020100001000060020100001000012005612004111500211091040010100001000010100011110003011100000111131409310734119656400046981000040010120057120042120057120057120042
5002412004189910010000590100120026119499109466256001640014100021000030010100001000010794635736332613376419120029120056120056113171031136955001030020100001000060020100001000012005612004111500211091040010100001000010100012010002021100000101031409410743119668400029981000040010120054120054120057120054120054
500241200538991111000010100120026119514109469256001340014100021000030010100001005510795895735750613321619120032120053120056113171031136955001030020100001000060020100001000012004112005311500211091040010100001000010100032010001111100001111031409610743119671400049981000040010120057120042120042120042120054
500251200418991110000020100120038119556109469256002440012100021000030010100001000010794635736476613376419120032120041120041113171031136805001030020100001000060020100001000012004612004111500211091040010100001000010100031110002014100001111231409410743119671400049001000040010120057120042120057120042120057
5002412004190011100000140101120038119499109469256001640012100021000030010100001000010794635735750613376419120029120056120041113171031136925001030020100001000060020100001000012005612004111500211091040010100001000010100033010003017100001111131409610774119656400040901000040010120042120057120057120042120057
500241200568991000000070100120026119514109455256001640014100011000030010100001000010795755736476613321619120032120056120056113234031136955001030020100001000060020100001000012005312005311500211091040010100001000010100012110002001100001111031409410746119671400040981000040010120057120042120057120057120054
5002412005389910010000620000120038119511109455256001640014100021000030010100001000010796025735750613376419120017120061120053113171031136805001030020100001000060020100001000012005312005311500211091040010100001000010100012010003021100001111131409410744119671400029681000040010120042120057120042120057120057
5002412005689911101100500101120026119499109455256001640012100021000030010100001000010795935736332613376419120032120041120041113171031136925001030020100001000060020100001000012004112005311500211091040010100001000010100012110001001100001111031409310734119668400029981000040010120057120042120054120042120042

Test 3: throughput

Count: 8

Code:

  ldr s0, [x6, #8]
  ldr s0, [x6, #8]
  ldr s0, [x6, #8]
  ldr s0, [x6, #8]
  ldr s0, [x6, #8]
  ldr s0, [x6, #8]
  ldr s0, [x6, #8]
  ldr s0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)03040708090a0b0e0f18191e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)c2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
802052672820000000000004501012671221212162580100100800001008000050011657890267022672726727166303166858010020080000200800002673126727118020110099100100800008000011008000004308000000038800386139430051102162226733014104800001002673226732267282672826728
80204267272000000000000000002671621120258010010080000100800005001167231126702267272672716650316689801002008000020080000267312672711802011009910010080000800000100800000008003900039800000139440051102162226724010100800001002670826728267282672826732
8020426731200000000000044010026712212121925801001008000010080000500116652512670226727267271665031668580100200800002008000026707267271180201100991001008000080000010080000043080038000080000613900051102162226724010104800001002672826708267082672826728
80204267272000000000100440001267120101925801001008000010080000500116652502670226727267271665031668580100200800002008000026727267271180201100991001008000080000010080000000800390104280000603943005110216222672400104800001002672826728267322672826728
80204267072000000000000450001267142111925801001008000010080000500116723102670626727267311665031666580100200800002008000026731267271180201100991001008000080000010080000043080038000080038613900051102162227135014104800001002672826728267282670826708
802042673120000000011004500012672821120258010010080000100800005001177038126702267272672716654316685801002008000020080000267272672711802011009910010080000800000100800000430800390003980039613944005110216222673200104800001002670826732267322670826728
8020426727200000000000044000126716212016258010010080000100800005001167127026706267312673116654316689801002008000020080000267072672711802011009910010080000800000100800000430800390003980039000430051102162226727014107800001002670826728267082672826708
80204267272000000000100450101267122121162580100100800001008000050011672311267022673126707166543166858010020080000200800002672726727118020110099100100800008000001008000004308000000039800006139430051102162226724010107800001002673226708267282673226708
80204267312000000000000440000267122120162580100100800001008000050011657890266822670726727166503166858010020080000200800002672726727118020110099100100800008000001008000004308003801039800006039430051102162226724010107800001002672826708267282670826732
8020426727200000000110045010126902212102580100100800001008000050011671270267022672726731166543166858010020080000200800002672726727118020110099100100800008000011008000004308000000062980039013943005110216222679801407800001002670826732267322672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)030e0f1e223a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)cfd5d6d9daddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80025267272000045102669321212162580010108000010800005011667501267020267082672716659316707800102080000208000026708267271180021109101080000800000108000043800390388003961043502011169079267281410480000102670926728267292672826709
800242672720000000266930120272580010108000010800005011668861267030267082672716672316688800102080000208000026708267271180021109101080000800000108000043800390080039613943502011168010826776010080000102670926709267092672926728
800242672720000450026693201202580010108000010800005011688430267030267272672716652316708800102080000208000026728267281180021109101080000800000108000043800381398003961390502081680710267051010480000102670926728267312670926729
8002426727201000012671620016258001010800001080000501166750026702326727267081667631668880010208000020800002672826727118002110910108000080000010800004380039008003861394350209168071127213100480000102673226729267282672826729
8002426728200004500267122012162580140108000010800005011667501267020267272672716652316708800102080000208000026708267081180021109101080000800000108000008003903980039013943502011168061026728100480000102670926728267292670926729
80024267082000045112671321201625800101080000108000050116675012670302672726727166723167088001020800002080000267272672711800211091010800008000001080000080000039800396139435020101680108267241010480000102672826729267092672826709
800242672719900451026712012016258001010800001080000501166886026683026728267281667231668880010208000020800002672726728118002110910108000080000010800000800390080039613943502010168079267321010480000102672826728267292670926732
800242673120011451126716212016258001010800001080000501166896026702026727267081667231668880010208000020800002672726727118002110910108000080000010800000800380398004061043502071680101026709010080000102672826709267092686426728
800242670820000450126712201216258001010800001080000501166750126683026728267081667231670880010208000020800002672726728118002110910108000080000010800004380000039800000104350207161001092672800480000102670926728267092673226728
8002426729200004511267122121216258001010800001080000501166886126703026727267271665231670880010208000020800002672726727118002110910108000080000010800004380039008003901394350206161008726725140480000102670926709267282670926709