Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD1 (multiple, three registers, 2S)

Test 1: uops

Code:

  ld1 { v0.2s, v1.2s, v2.2s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)0304090e1e22243a3f43464951inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
63005293402197105104524288230002416320002000200010000716177285912921231020003000200029134291321161001100010000200042000002000600412982931368913081374206563109381416545628349160621358915339200010002929729357293022939929342
630042937422050010104662288620002425720002000200010000516175286112924231020003000200029163290661161001100010000200042000032000400412866954668983139059206233448381515545628509164241303615570200010002926829411294112925729385
630042935321940041045172878602024202200020002000100001016164287892930131020003000200029182291191161001100010000200042000002000400413098917769203087358206613092381612576828338162061356115877200010002938129186292972925829301
63004292462205004104526290470012419020002000200010000716185286102930631020003000200029080292311161002100010000200042000002000400613111926469153146157205843048380913555828356162741375215386200010002924229252292882925629313
6300429331219300210470828841000240912000200020001000081616828730293013102000300020002921929215116100110001000020004200000200040041312690706924308705920596302738127545428406164351372715755200010002924929239292782928329330
630042936622020054104530288500202424320002000200010001516179286742926331020003000200029151291281161001100010000200042000002000400412942899968873116157205933070381513615828376161641389415697200010002934529256292852928029293
63004293342217005104538288250012427020002000200010000016160289352921031020003000200029086291491161001100010000200042000002000600412980903568143144058205713106381113626528334164691381115673200010002938329256292952931129335
63004293832206000104612289110002420520002000200010000116148286722934031020003000200029247291761161001100010000200042000012000400413060914568843074250206453055381610595228537161441353215523200010002935529274293622924229299
63004293862206000104636288700022419320002000200010000316182288762931931020003000200029172291871161001100010000200042000002000400412894917268193149060206363088381613575328315161861362415428200010002932329114293052933129179
63004293392193004104882288130002411820002000200010000716159286432930431020003000200029144290901161001100010000200042000002000400613100918569323142064207383100381110615428477163361365015671200010002936929318293112932629429

Test 2: throughput

Count: 8

Code:

  ld1 { v0.2s, v1.2s, v2.2s }, [x6]
  ld1 { v0.2s, v1.2s, v2.2s }, [x6]
  ld1 { v0.2s, v1.2s, v2.2s }, [x6]
  ld1 { v0.2s, v1.2s, v2.2s }, [x6]
  ld1 { v0.2s, v1.2s, v2.2s }, [x6]
  ld1 { v0.2s, v1.2s, v2.2s }, [x6]
  ld1 { v0.2s, v1.2s, v2.2s }, [x6]
  ld1 { v0.2s, v1.2s, v2.2s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6675

retire (01)cycle (02)0305080b0e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
2402055340040011100651005336621818025160100100160000100160018500233616805337453399533812332572360716011720024004020016002153381533811180201100991001008000080000110016002020421600570005916003861574219111151171161153378002160000800001005339953382534005340053400
24020453399400101006510253366018181625160100100160000100160017500233426115335653398533812332582359016011520024003220016002153398533981180201100991001008000080000010016002120421600571005916000061574219111151171161153378900160000800001005339953382534005338253400
240204533814001000065003533660181816251601001001600001001600155002334114153374533815338123324823350160117200240040200160027533985339911802011009910010080000800000100160020190160057101581600386057019011151171161153396990160000800001005340053400533825340053399
2402045338139910100650025338421801625160100100160000100160016500235831105337453414533852332482337816011720024003220016002153381533991180201100991001008000080000010016001921411600581115916003861194219011151171161153396992160000800001005340053400534005338253399
24020453398400110006500253384001802516010010016000010016001350023581631533745339853398233258236251601132002400322001600215338153398118020110099100100800008000001001600201901600571005916003801574219211151171161153378992160000800001005338253399533825340053382
2402045338139911100651025338301818162516010010016013010016001650023397570533565339953398233257236271601172002400322001600275339953381118020110099100100800008000001001600192001600190015916003861574219011151171161153395002160000800001005340053382533825340053583
240204533984001110021102533852181812516010010016000010016001650023583070533745339953399233258235821601142002400402001600215338353399118020110099100100800008000001001600212042160019000211600386157019111151171161153396900160000800001005338253382534055340053399
24020453399400111006500053383200162516010010016000010016001650023341140533745338153398233248236011601172002400402001600275339953398118020110099100100800008000001001600192101600571115916000000574219111151171161153378992160000800001005338253399533825339953399
24020453401400111006400253384218181625160100100160000100160018500233616805337453399533812332472338116011520024003220016002753381533981180201100991001008000080000010016001920421600191005916003861574219011151181161153378090160000800001005340053382534005338253382
24020453399400110007400253384101802516010010016000010016001450023403700534655338153399233258236071601132002400322001600215339953399118020110099100100800008000011001600201901600570015916003860674219111151171161153396992160000800001005340053400533825339953382

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6675

retire (01)cycle (02)03050708090a0b0e0f18191e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cdcfd0d2d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
24002553400400101000000010300025336621801251600101016000010160000502353389105335653399533812332632336116001020240000201600005339953398118002110901010800008000001016002021420160019000211600386157019005020002216222253395099016000080000105340053399534005339953400
24002453399400100100000010401035338320181525160010101600001016000050233403600533745338153381233433233801600102024000020160000533995339811800211090101080000800000101600202000160057101591600386119019105020001116231453378009216000080000105340053382533825338253382
2400245339940010110100003770103533832018152516001010160000101600005023580911053356533985338123343323394160010202400002016000053399533811180021109010108000080000010160019194201600190005916003861574219205020002616242553396099216000080000105340053399533995340053382
240024533994001001020000423010253383018181525160010101600001016000050235809110533745339953381233433234291600102024000020160000549425524413180021109010108000080000010161845194240616187921610769161690605742193052550045134202555162099216000080000105558255397555455552755558
2400245447841412020000131419131144104535380181812516001010160000101600005023449300055131555305540724456150248741625042024375722162697554005566614180021109010108000080000010161715194239716187900010717161858615741193052530030142252955289209216000080000105555155559554945539055538
240024555364161020000023213058193610356905218181818696163001141629901216320455244700400559315629256177249302162547316232620243758201625015478453399118002110901010800008000011016001920423801601872126416172861194219405020001316262853540099216000080000105600855705552425552955848
2400245658743912023200001190002560042180162516001010160130111633827324233170055516558475585324735180248191630372024519020163078544345339911800211090101080000800001101600201903461601891005916003861574019105020002416262553691009216000080000105341853386534075340753400
240024533994131000011100113000253390077025160010101600001016000050233403600533785338153381233483233831600102024000020160000534035340211800211090101080000800000101600192000160059002611600406159431900502000241613235339901313516000080000105340453382534045338253404
240024533814001010000000940002533870001925160010101600001016000050233124600533785338153381233263233831600102024000020160000534035340211800211090101080000800001101600202000160019000641600006159431900502000261626255340001313516000080000105340453404534045338253404
2400245338139910000111004830003533883772025160010101600001016000050235912400533775340353403233473233821600102024000020160000536575345811800211090101080000800001101600202043016005800222160039005901920502000201624125337801313516000080000105340353382534035338253404