Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, 4 regs, 8B)

Test 1: uops

Code:

  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
64006286862234120011000300477428501221748340042000200020002000218111600016219452841828660910400020002000400040002865428665216100110001000200326120022222000262113253955169633206040196343287380810415328131151111262414120200020002853928741285892858328644
64004286692230101111000300483228491221750140002000200020002000218121600015219062847328561310400020002000400040002857228657116100110001000200336120021022000262113302950468913120039197283243380116463928129154041252214165200020002870528608287362871828565
64004287362220111001000300493428527001739240002000200020002000218231600011219062843928537310400020002000400040002858428613116100110001000200346020021011200000201364198397049323913919551322538078424028309149501264213583200020002873728673287272873428655
6400428490221010000000031048732844800174244000200020002000200021814160006219032839828623310400020002000400040002843628679116100110001000200324020020222000262013085942969683195042196363138380412454528188151431250713632200020002853328695286652854428533
64004286732210110011000300474428561021746240002000200020002000218101600018219162843528593310400020002000400040002859828505116100110001000200236020020152000262013418967869833249144197773228380016424428091155741250014070200020002867528663286582856528643
64004286462200111111000300478828557221759340002000200020002000218121600015218732840228669310400020002000400040002843528617116100110001000200246120020222000262113413974570013147145195503186380112414028111153041251814094200020002860728556285842852228654
64004285652210101110000300484628407221740740002000200020002000218091600016219232824428566310400020002000400040002853528693116100110001000200236120022222000262113157971669773254033197043255380415454728229153641249714088200020002850728586285202851228576
640042861922201111110003004715284832217427400020002000200020002180416000162188328397286333104000200020004000400028531284731161001100010002003361200212122000262113195975269923234054196093227380013425128144152871237013921200020002859228580287232841228542
64004286202220111101000300487228505221743640002000200020002000218091600017219102844628503310400020002000400040002868128497116100110001000200336120020052000262113441947769713206044196333204380718414028206150951240913887200020002855228576286132872328521
64004286222230112111000300478828560221733740002000200020002000218111600011218662825628751310400020002000400040002872628532116100110001000200226120020022000262113169973069703173244195513151380511464428199150841248213691200020002862528553286312873228538

Test 2: throughput

Count: 8

Code:

  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f22373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3202068007162110100001407439080038160025325616100166858160000100160000160000500215881712946140800258004880047034132010020016000016000020032000032000080050800581180201100991001008000080000100160014034011600020020160002000000051090117118004201600001600001008004680046800538004680046
320204800456210000006306118080033161602532524710016588316000010016000016000050021588491299726080024800478004603273201002001600001600002003200003200008004580045118020110099100100800008000010016001414360016001610181600022340000051090117118004501600001600001008004680049800458004680046
320204800466200000000315004080522000253275641001652581600001001601181602165002231087129482808002480052800520327320100200160000160000200320000320000800458004611802011009910010080000800001001600140340016000200171600022340000051090117118004101600001600001008004680046800498004680051
320204800456200000002114054431800300160253256451001649071600001001600001600005003679175129540308003380045800460332320100200160000160000200320000320000800508005011802011009910010080000800001001600151500016001410171600022340000051090117118004201600001600001008004680046800458004680046
3202048004562000000000052960800441616025325971100165724160000100160000160000500223399212966510800268005080051033232010020016000016000020032000032000080059800581180201100991001008000080000100160016153800160016001416000214340000051090117118004801600001600001008004680046800508004680045
3202048004562000000012305169080030161602532500910016387016000010016000016000050020788621299158080024800458004503313201002001600001600002003200003200008005280045118020110099100100800008000010016000000001600020121600022340000051090117118004201600001600001008004680046800468004680046
3202048004562000000006052600800301600253250771001665971600001001600001600005002236086129550008002380045800450330320100200160000160000200320000320000800488004911802011009910010080000800001001600140340016000200171600020340000051090117118005001600001600001008004680046800468004680049
32020480048620000000030261108003316161253251421001644891600001001600001600005002399920130069908002680049800480333320100200160000160000200320000320000800508005011802011009910010080000800001001600141500016001602201600021600000051090117118004701600001600001008004680045800468004580046
320204800456200000000305566080030016025326096100166738160000100160000160000500239798013008370800248004580045032732010020016000016000020032000032000080045800451180201100991001008000080000100160014143602160014011616000014340000051090117118004701600001600001008006080061800518005180046
3202048005862010000003051810800301616025325332100164449160000100160000160000500215181712948000800248004480045033432010020016000016000020032000032000080049800461180201100991001008000080000100160000036001600020021600022340000051090117118004201600001600001008004680046800508004680046

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320026800586211000001321800487318003016160253247051016701816000010160000160000502479520130308710580025800528005203343200102016000016000020320240320000800518004511800211091010800008000010160014144400160002002116000216441405019001917101980217160000160000108005280051800468005380052
3200248005362010010012190054301800371516025325563101648011600001016000016010850255974813060790158003780052800508433432001020160000160000203200003200008005180051118002110910108000080000101600001444001600160121600022421405019001717181380042160000160000108005280053800528004680053
32002480052620000101017005951180035161632532493110165237160000101600001600005023999271297206000800248004580050033432001020160000160000203200003200008006280063118002110910108000080000101600140440016001610181600021644005019001926161980049160000160000108004680052800548006380046
320024800456211101000180057810800361616325325291101654931600001016000016000050221061612945632058002480045800450327320010201600001600002032000032000080045800501180021109101080000800001016001604800160016102116000216441405019001817191680048160000160000108005280046802218005180052
3200248005162110110001400439108003016160253256121016588916000010160000160108502278705129369140580024800628006303333200102016000016000020320000320000800528004511800211091010800008000010160014144401160002011616000216441405019001717101880047160000160000108005280053800468005280221
32002480052620101100019004090180037161502532524910165799160000101600001600005036794781294059305800378005280052034432023620160000160000203200003200008005180051118002110910108000080000101600001444001600161021600022421405019501717181080043160000160000108005280052800538004580053
32002480051622000100917004682180038161602532363010162596160000101600001600005024797761317621200800258004480052033432001020160000160000203200003200008006380062118002110910108000080000101600140440116007610271600021644015019001817181080049160000160000108004680052800538006480046
32002480046620111100019014237080045161632532455010164677160000101600001600005022118121297847200800248004580045032832001020160000160000203200003200008004580051118002110910108000080000101600140420016001602191600021644005019001217141880049160000160000108004680046800538006380046
320024800456201020001240069410800301616882532403210164616160000101600001600005021581981297289305800278005180045032732001020160000160000203200003200008004580045118002110910108000080000101600151642301600020016160002042005019001217171780043160000160000108004580046800468004680048
32002480045621000000600112940800290160253249231016622116000010160000160000502223932130436930080024802118004507263200102016000016012020320000320000803788004521800211091010800008000010160060042124016006000925160184242005062001935181880197160000160000108005480215802118004680215