Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (single, S)

Test 1: uops

Code:

  st4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
630072888022341211100001048872874312156513000200010002000100012809179081235612852328628310300010002000200050002863528714116100110001000100110310010111000101013415932969653135164199583163380327616328135155771317913685100020002877128553284632861928740
6300428693223012101100020470628661111571830002000100020001000128081790518235732857628722310300010002002200050002894129073216100110001000100113010020316661000141013328967369733291065200683167380614636028228157761312914030100020002896829070288652899528992
630042901622301101000021205220282190115202300020001000200010001281317903182353028400283063103000100020002000500028250282861161001100010001002231100101110001311139411044071683453159197163276381914615927978146011239912332100020002845528047283942840128366
63004283312120111021000115095283820015187300020001000200010001280917902172355028322282393103000100020002000500028734288781161001100010001001200100000251000131113235954669143224061198893334380324555627853140801221013109100020002827328505283922880228879
63004282252160110011000105183281480015127300020001000200010001280617902172354828197281543103000100020002000500028313284581161001100010001002332100101110001311134451012272573385062194253426380912616128026144031238412629100020002843928229284162831128089
6300428215213011100100020515728370001522330002000100020001000128101790410232652824428182310300010002000200050002828928000116100110001000100223110010211000121113745999272743447059197653431380618645927873149161197112128100020002836628084282662819328307
63004283542120121021002710494328025001532830002000100020001000128121790572355728285283373103000100020002000500028275284341161001100010001002131100101110001310136671014771883340062197703454380720536227969146251244712777100020002839628138283572833028486
630042825721001010210001051472820800151543000200010002000100012809179077236142820428410310300010002000200050002838128452116100110001000100113110010111000121113761977673393487064198943355380812556327893150411238613041100020002842728436284172844428221
6300428375212011101100010522928249001548230002000100020001000128131790482356728155284143103000100020002000500028525284101161001100010001002222100101110001311137191033272763334162197053473381612586327944145121228112762100020002836528390283052835128280
630042841321201010110002050952838900154863000200010002000100012810179042235482824228289310300010002000200050002844328327116100110001000100123010010211000121113484103777175342515819538331238119646227948145891237712999100020002826828286281372814928303

Test 2: throughput

Count: 8

Code:

  st4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  st4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  st4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  st4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  st4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  st4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  st4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  st4 { v0.s, v1.s, v2.s, v3.s }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5639

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e18191e1f23373a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24020745151361122201091050131961449831616141118252513861001730408006010016002780002500207937132928624538744726452671525861469824013120080013160032200160026400667457184545821802011009910010080000800001008000004222730800002011888000224400511011611450900800001600001004520045093451544503245052
24020444923359000001009101141504496416168776252525711001727938000010016000080000500207192933255424438745260453021501314150422401002008000016000020016000040000044744450651180201100991001008000080000100800000422254080002001348000424200511011611452410800001600001004468244479451874507244915
2402044498736201000000301163804529416162589555252256100173015800001001602368000050020969403340454451694513845117150123149982401002008000016000020016000040000045149453551180201100991001008000080000100800000427095080002001708000224200511011611451180800001600001004517745258452384544044803
2402044510736200000000301103304512716161383525250923100172096800001001600008043250020727043270331450724546048273148303148842401002008000016000020016000040000045057446301180201100991001008000080000100800000422325080002001708000424200511011611449540800001600001004482944622446324535245586
2402044514036000000000301313804530316162192225252343100171920800001001600008000050020882883297543455114539945357151603149392401002008000016000020016000040000045198448551180201100991001008000080000100800000422284080000001408000224200511011611451820800001600001004519845053453634508745121
240204449913600000000030123410454571616158442525209010017118080000100160000800005002070721330512545241453014462915035315363240100200800001600002001600004000004499244825118020110099100100800008000010080000042250908000200988000224200511011611453900800001600001004505445229452984498044842
240204451513490000000030115450452511616219462525248610017167580000100160000800005002075866333702944751451064505115080315025240100200800001600002001600004000004500445100118020110099100100800008000010080000002290080002001198000224200511011611450690800001600001004464544932446334489144832
2402044533035100000000301191904451416164288725250834100172640800001001600008000050020982273345391453794517245126149913153382401002008000016000020016000040000045042450911180201100991001008000080000100800000432286080002001828000224200511011611451480800001600001004500045077454324501644575
240204452503480000000000121230453951616219822525252410017240780000100160000803245002068991330614245349451984516214599314876240100200800001600002001600004000004515745090118020110099100100800008000010080000042232108000200140800042000511011611447610800001600001004533645281449914530844868
24020444685349000000003013338044881002391925251620100171901800001001600008000050021036253338786455214530845238151143152372401002008000016000020016000040000045234449101180201100991001008000080000100800000422260080002001618000224200511011611452220800001600001004495145130453414510945013

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5644

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0f18191e1f2224373f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cecfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24002745266352000000008101311844856161612941252522461017211880000101600008000050207976733062130455314485644934153350315161240010208012916000020160000400000450834525211800211090101080000800001080000034237508000220118000223400050203163345171080000160000104500845533450954530045388
2400244531035200000000200120774531816163014442525247710172545800001016023680000502095269326982104472445189456021505903152402400102080000160000201600004006004560245128118002110901010800008000010800001436234108000210148000223400050203163345138080000160000104503545194451004512345253
240024452683490000000020012024453331616810272525337410172289800001016000080000502093454334249604469444801450741481403151372400102080000160000201600004000004507144783118002110901010800008000010800000024870800020028000223400050203163345050080000160000104511844968452594534545410
2400244499835100000006200125224514416166835252518981017289180000101600008000050209079133037640451154500045055152370315033240010208000016000020160000400000454424513211800211090101080000800001080000153625230800022028000223400050203163344731080000160000104524045107449664521845611
24002444971348000000007001193245204161627808252521211017146580000101600008000050208405533360791452784538645083150280314946240010208000016000020160000400000448734517811800211090101080000800001080000034199708000232028000223400050203163345173080000160000104501045136451544544645207
2400244488135000000000210115594482416161873425252814101714348000010160000800005021027903355623045141450444513414966031479824001020800001600002016000040000045062450711180021109010108000080000108000014026740800020088000023400050203162345085080000160000104523445157451454556644987
240024451353510000000020011791446350161998025251653101714288000010160000800005020929813323058045110453154560915289031482124001020800001600002016000040000045419450771180021109010108000080000108000003422140800020058000203400050203163345285080000160000104488044873449044523745317
2400244489534900000006600125474532016162988025251643101726118000010160000800005021009953311796044679454024524315332031503224001020800001600002016000040000045321449891180021109010108000080000108000014023610800020098000223400050203163345339080000160000104508544986453604532245201
240024450773510000000050012761449031616187902525255010172858800001016000080000502100687332493604512145032451371479703147182400102080000160000201600004000004521445453118002110901010800008000010800000023200800020008000223400050202163245003080000160000104530145273450834543745132
2400244464434800000000200120294526601622872252508431017242180000101600008000050207482532767990451134514445381149840315232240010208000016000020160000400000449944517911800211090101080000800001080000143623520800022088000223600050202163345029080000160000104473445502450194529145378