Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, 4 regs, 4H)

Test 1: uops

Code:

  st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6400628765223210080000010048092860822174094000200020002000200021802160001210218722846228607310400020002000400040002861228561116100110001000020000602002100112002260013401968869283262242195403120380319414528041154401238014031200020002856828586285162865328554
64004286352211901000030004733284762017399400020002000200020002181016000260021928283812858331040002000200040004000285012855211610011000100002000060200225052002000013205980170013162244196093206380917384728221152981247613732200020002850228446283862863028595
64004284682210808000030004776285362217544400020002000200020002181216000200821914283592863231040002000200040004000285092866211610011000100002002000200212092002060013367937269813247442197043154380913414828111150431266213665200020002855628606286022854828701
6400428574223012070001201004758284342217601400020002000200020002181616000200021896283272857331040002000200040004000286772856111610011000100002000000200248022002000013460965870313208144195903219381420454928160153471267613852200020002866028630285202854728646
64004286742210805000030004729285962217408400020002000200020002181216000230021837284092866231040002000200040004000285572853711610011000100002000000200259022002260013249957169223215349195563133381115434628162155621246113882200020002858628453286552857928561
640042861522105060021210004778285192217408400020002000200020002180116000140021855284052863631040002000200040004000285152859211610011000100002000000200045022002264013358941069873248346196963226380517535028139153241247214045200020002837328529286342852028592
64004285572220706000031004747285810017285400020002000200020002181116000140021851284752865131040002000200040004000285852861211610011000100002000060200240002002260013164971769423243343195903215381116494528175151611256313949200020002852928556286602875428595
640042849122201104000610004863284642217208400020002000200020002180516000141821911283962859031040002000200040004000285862859211610011000100002000000200248022002260013248948669843171241195773165380915394228183153081256614153200020002869128591286622864228599
64004285362230603000031004841285652217393400020002000200020002180016000121821895284372869631040002000200040004000284332865311610011000100002000060200246022002260013416947969473187343196363187380518464728194150261239013936200020002854028663286482853428538
64004286252210809000030004862285042217483400020002000200020002180316000181021863283322867931040002000200040004000285632854811610011000100002000060200248022002200013247937869793228445194543140381920434328186151931245313898200020002860428529286082857428602

Test 2: throughput

Count: 8

Code:

  st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f23373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320206800846201100001260409780029160025326204100166026160000100160000160000500307267712968908002408004580045032732032620016000016000020032000032000080045800491180201100991001008000080000100160000340016000200185516006223400510911721800421600001600001008004680050800468004680050
320204800496200000000305728800301616025326097100166563160060100160000160000500223966912954738002408020980045032732010020016000016000020032000032000080045800441180201100991001008000080000100160000340016000200216000023400512511711800421600001600001008004680046800468004680050
32020480049620000000030607880030161602532634510016571916000010016000016000050021983321294670800240800458004903273203262001600001600002003200003200008004580044118020110099100100800008000010016000000016000210216000223400510911711800421600001600001008004680046800508004680045
320204800456200001400303737800341616025326151100166403160000100160000160000500213888012958728002408004580045032732010020016000016000020032000032000080045800441180201100991001008000080000100160000340016000200216000223400510911712800421600001600001008004680050800468004680046
3202048021262000000009045138002916168925325900100164258160000100160000160000500222930313024318002408004580045032732010020016000016000020032000032000080219800491180201100991001008000080000100160000340016000210216000223400510911711800421600001600001008004680046800468004680050
320204800486200000000306020800301616025325561100164272160000100160000160000500222865513010988002408004980217032732010020016000016000020032000032000080045800451180201100991001008000080000100160000340016006210216000223400510911711800411600001600001008004680046800498004680050
320204800446200000000306067800301616025323777100162052160000100160000160000500207945113021198002508004580045016279632484820016000016000020032000032000080049800481180201100991001008000080000100160000340016000200216006003400510911711800421600001600001008004680046800498004680050
3202048004964300000090055978003016160253261471001650891600001001600001600005002138880129852580025080048800450312332010020016000016000020032000032000080045800481180201100991001008000080000100160000340016000210216000223400510911711800431600001600001008004680050800468004680046
3202048004562100000006044868003316160253245631001684671600001001600001600005001919834129851080024080045800450312432010020016012016000020032000032000080049800481180201100991001008000080000100160000340016000010216000223400510911721800461600001600001008021380046800468004680050
320204800506210000000307025800301616025325543100164833160000100160118160000500223343413113768002338021180044032732010020016000016000020032000032000080214800452180201100991001008000080000100160000340016000210216000223400510911711800441600001600001008004580046800458004680050

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200268005864300000123328118003016160253265461016433516000010160000160000502156842130227408002480045800450327320010201600001600002032000032000080045800451180021109101080000800001016000004200160002003801600022420050214173580042160000160000108004680046800468004680046
320024800466210000003449918003016160253254731016705516000010160000160000502158244130386218002480045800450327320010201600001600002032000032000080045800451180021109101080000800001016000004200160002204221600002420050213174580042160000160000108004680046800478004680046
3200248004562000000004602180030161602532431610166574160000101600001600005021563121290543080023800458004503273202362016000016000020320000320000800458004511800211091010800008000010160000042001600020051600022420050214174580044160000160000108004680046800468004680046
3200248004762600000004470180030161602532487210166078160000101600001600005021427611298834080024800458004507273200102016000016000020320000320000800458004511800211091010800008000010160000042021600021051600022420050215175480042160000160000108004680046800468004680045
320024800456200000093664818003016160253237071016691816000010160000160000502229502130243408002480045800450326320010201600001600002032000032000080045802131180021109101080000800001016000004200160002002821600022420050214173580042160000160000108004680045800458004680045
3200248004562100000933462180031161602532524010164796160000101600001601085022351631298338080024800458004503273200102016000016000020320000320000800458004511800211091010800008000010160000042001600020021600022420050215174580042160000160000108004680047800468004680046
32002480045620100000334681800301616025325661101665441617401016306816010850223617413056210800248004580045853273200102016000016000020320000320000800458004511800211091010800008000010160000042001600020021600022480050355174580042160000160000108004680046800468004580046
320024800446200000003748118002916160253245061016371916000010160000160000502144990129702608002480045800450327320010201600001600002032000032000080045800451180021109101080000800001016000004200160002002211600022420050215173580042160000160000108004680046800468004680046
32002480044621000001234996180196161602532563910163527160000101600001601085021589971302543080024800458004584327320010201600001600002032000032000080047800451180021109101080000800001016000004200160002101851600022420050353174580042160000160000108004580046800468004680046
3200248004562100000046085180030161602532582910164155160000101600001600005022384421296060080024800458004503273200102016000016000020320000320000800458004511800211091010800008000010160000042001600020021600022420050215175580042160000160000108004680046800468004680046