Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, 3 regs, 2S)

Test 1: uops

Code:

  st1 { v0.2s, v1.2s, v2.2s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f243a3f51schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
63006294792364401300061046182922218428300010002000100020001590780000102177729229295673103000200010004000200029356292991161001100010002000040200000020004010513325911869473217073208693217381919727328774161011347814882200010002935229452294562954229456
630042950823702001000089047062930118390300010002000100020001590480000221766291652947131030002000100040002000294512938911610011000100020000402000004382000429313342928869123161075209873302381922747428705162401340514772200010002942729575294462937529519
6300429517237030020001620046472926118502300010002000100020001590880000221758290972951031030002000100040002000293942941211610011000100020000402000000200040013263937970043116066208203265381625716528691163921335115052200010002947629622296672958329350
6300429382238030020000104670293171848830001000200010002000159138000012181629146294843103000200010004000200029354293621161001100010002000040200000020004001327394287018311016920801331838228706328748161741316315049200010002944729518293482950029461
63004294112360200000001047312933418370300010002000100020001590280000221740291742950631030002000100040002000294672935911610011000100020000402000000200040013308938569513151170207873149381816637528695161901343315147200010002936029579294322947329473
63004295062370200300001045872935218519300010002000100020001590580000121741294532971831030002000100040002000294812942211610011000100020000402000000200040013315932368823153072207953335382417697228847161221325915047200010002944629561294832940329504
63004294872360200100001047222928018585300010002000100020001597880000321786291532943931030002000100040002000294692951811610011000100020000402000003200040013320945769143146069207793196381514697228794161951324614980200010002955729529295632942129532
63004294992370200200061047042932118277300010002000100020001590380000221720292452956931030002000100040002000294012938411610011000100020000402000000200040013190941669253122066208173282381812736728830162101333315275200010002944729394295012959229429
6300429409237020010001321047332929218484300010002000100020001590580000021796291132946931030002000100040002000294662941911610011000100020000402000000200040013197957269203136072208053308381623736628806162701341815159200010002952029509294992955529454
630042949823803003000121046562919818503300010002000100020001590580000121819292382938731030002000100040002000293322944811610011000100020000402000020200040013184924169653133072208913238381418746628765160801341415060200010002952529479295082955029488

Test 2: throughput

Count: 8

Code:

  st1 { v0.2s, v1.2s, v2.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s }, [x6]
  st1 { v0.2s, v1.2s, v2.2s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24020680047643110003181042180038160425240123100800461600001008000616000650036800226401718002780058800614997665000424011220016001680008200320032160016800588005811802011009910010080000800001001600141538101600141019160002163614111151160160080039160000800001008005580043800438004180043
2402048004264300000330051080027161602524015110081046160000100800061600075003679491642333800178004280042499606500032401132001600168000820032003216001680040800421180201100991001008000080000100160000034001600020021600022340011151160160080039160000800001008004380043800438005180043
2402048004264200000114310370800270002524010810080517160000100800001600005003679388643529800178004280040499553499982401002001600008000020032000016000080040800421180201100991001008000080000100160000034001600020051600022340000051511161180047160000800001008004380050800438004380051
24020480049643000002400052808002716002524083610081092160000100800001600005003679388643339800178004280042499553500002401002001600008000020032000016000080042800421180202100991001008000080000100160000034001600022051600022340000051101161180037160000800001008004380043800438004380043
2402048004264300000630072508002716160252408361008068416000010080000160000500367986864013880025800428004249953349998240100200160000800002003200001600008004080049118020110099100100800008000010016000000001600000001600022340000051101161180039160000800001008004180043800518004380043
24020480042642000003300872080027161602524049810080000160000100800001600005003679388640109800178004280042499533500072401002001600008000020032000016000080042800421180201100991001008000080000100160000034001600020021600022340000051101161180047160000800001008004380041800438004380043
2402048004064200000390069308002701602524063810080777160000100800001600005003679460642635800178004980042499553500092401002001600008000020032000016000080042800401180201100991001008000080000100160000034001600000021600022360000051101161180046160000800001008004380043800518004380043
240204800426420000000001433080025016025240966100805361600001008000016000050036794606426218001780042800424996235000024010020016000080000200320000160000800428004211802011009910010080000800001001600000340016000220141600022340000051101161180202160000800001008004380052800418004380050
2402048005064300000019009751800381606252401261008055416000010080000160000500368023164249680025800428004249955350000240100200160000800002003200001600008004980050118020110099100100800008000010016000003400160002008160002163614100051101161180049160000800001008004380043800438005180043
240204800406430000031410521800351609252401221008003216000010080000160000500368025564013480017800428004249963350000240100200160000800002003200001600008004280040118020110099100100800008000010016000003400160002208160002163614100051101161180056160000800001008005180048800498005380059

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2400268004962000000006088908003516160252408801080979160000108000016000050368646864002018001780040800424997535003224001020160000800002032000016000080042800421180021109101080000800001016000003400160002102160002234005020132681280047016000080000108004380043800608004380043
24002480042620000000132301002080035002252410351080872160000108000016000050367946064340018001780040800504997735002224001020160000800002032000016000080051800501180021109101080000800001016000003400160002008160002234005020141661480048016000080000108004480043800438005380043
24002480042620000000000836080025016025240754108052216000010800001600005036794606400121800178005080050499753500222400102016000080000203200001600008005080050118002110910108000080000101600000340016000210516000223600502071618880039016000080000108004380052800518004380043
240024800426200000000309950800361600252410551080615160000108000016000050367946064002318001780042800424997735002224001020160000800002032000016000080042800421180021109101080000800001016000003400160002208160002234005020716131180047016000080000108004180043800418004480051
240024800506210000000007110800271616025240677108099916000010800001600005036794606400231800178004280042499773500312400102016000080122203207321603688054880544518002110910108000080000101601802342573160242123982160242234005063643131480626016000080000108004480050800438005280043
24002480042620000100030105508002716160252410271080533160000108000016000050367946064102618001780049800494997735002224001020160000800002032000016000080042802141180021109101080000800001016000003400160002002160002234015020131671680046016000080000108004380043800438005280043
240024800426200000000906150800271616025240709108073616000010800001600005036794606425191800258004280042500983500222400102016000080000203200001600008004280042118002110910108000080000101600000340016000200216000223400502071614880039016000080000108005080043800438004380052
2400248005162110000009106180800271616106252410221080963160000108000016000050367946064258018001580042800424997735002224001020160000800622032000016000080042800421180021109101080000800001016000003400160002108160000200050201416131380047016000080000108021680043800438005080043
24002480042621000000030926080027161602524101210805891600001080000160000503679460642640180017800408005049975350022240010201600008000020320000160000800508005021800211091010800008000010160000002550160002102160002234005020111671580048016000080000108004380043800438004380052
24002480051620000000030882080034161622524070610800461600001080000160000553679460642515180017800518005049977350022240010201600008000020320000160000800508005011800211091010800008000010160062036301600020021600022340050201316111380039016000080000108004380052800618004380043