Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 4 regs, 1D)

Test 1: uops

Code:

  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
640062891323114200063000464928696221763450001000200020001000200020005000218071600001921893285852893931050002000200050004000288122885911610011000100020000002002902200206001340693836920309416819772317838041966602831710001573912753141542000200010002884128762288032875928780
640042880323103200000000472228657221760750001000200020001000200020005000218071600002321941285762880031050002000200050004000287872891011610011000100020000002000000200006001302593286927316316119935320038132165722838810001572712789136732000200010002891028918288912878728923
64004289292320300006000047212864802176655000100020002000100020002000500021813160000821982286832892531050002000200050004000288122879611610011000100020000602000000200226001300993976896312906619932321538142062642828510001577812561141262000200010002883128866288442886728802
64004289492330340000000045932876500177505000100020002000100020002000500021804160001721964286652889231050002000200050004000288252877611610011000100020000602002000200006001306794696911320416119819314738242564622831010001568012754138992000200010002884928959289522877528870
64004288692320130000310046872880302176745000100020002000100020002000500021809160000821906285922889731050002000200050004000289262899311610011000100020000402000002200202001319993976990318426619826320538192264592835910001558412841139432000200010002886228788289262888129006
64004287632310240000300047542884620177305000100020002000100020002000500021805160000521877286992885231050002000200050004000288382883511610011000100020000002000003200004001325893786966316626519823331738292662682835510001579312778139272000200010002895829085289242892128947
64004288502330320000000047192874620176495000100020002000100020002000500021800160000521881287042890431050002000200050004000288452883111610011000100020000402000000200004001306792476989310306419953321838162861622839410001553112775139702000200010002887328832289022897029006
64004288742340340000100048282874200176635000100020002000100020002000500021816160000521949286412886831050002000200050004000289082890211610011000100020000402000000200004001321692916868314406619974316338162063672835310001570612838140292000200010002874928782289232886528842
64004288992330220000000048222867102176275000100020002000100020002000500021809160000521898285712897931050052000200050004000288052872011610011000100020000402000000200004001309895206888312205619938319838181761652835110001552112613136582000200010002888628921289482885228915
6400428905232024000010004714287382017624500010002000200010012000200050002181416000052189328627288173105000200020005000400028864288491161001100010002000000200000020000401221333194256976320816319996320638172055602826410001557512752138232000200010002890928877288002895428787

Test 2: throughput

Count: 8

Code:

  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2224373a3f46494f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3202068007164300010001600542018003116169002540438980159164644160000801001601181600004804992719449130026218002480045800450313640010020016000016012020040000032000080214800461180201100991001008000080000100160122040633160124002160002240005125217218004280000160000160000801008004680214800468004680046
3202048004562000000163005152180030161689025404702801601651351600008010016011816000048049921533981298687180024800458021508284001002001601201600002004000003202408004680045118020110099100100800008000010016006224000160062105160122040005109217218004280059160000160000801008004680046800468021380046
32020480045620000100091004481180030161600484040328010016598816006080100160000160000480851219618013106591800238004580044863274001002001600001600002004000003200008004580214118020110099100100800008000010016000004000160002022160002240005124217228004380000160000160000801008004680213800468004680046
320204800446220000000300449618003016160025406790801001657971600008010016000016000048049922348901293722180024800458004503264001002001600001600002004000003200008005480045118020110099100100800008000010016000004000160002002160002240005111217228004280000160000160000801008004680136800468004580046
320204800456210000000310496908003116160025405561801001656971600008010016000016000048049922331211299990180024800548004403274001002001600001600002004000003200008004580045118020110099100100800008000010016000004000160002002160002040005109217218004280000160000160000801008004680046800458004580046
320204800456200000001830082891800381616347048405235801001648041600008010016000016000048049921390981299208180024800458004503274001002001600001600002004000003200008004580045118020110099100100800008000010016000004000160002305160002240005109225218004280000160000160000801008004680055800468004780045
3202048004565200001012301557708003016160025404756801001662331600008010016000016000048049925899071297495180026800538005209334003852001600001600002004003003200008005380052218020110099100100800008000010016001213466201600143117160062240005111217218004280000160000160000801008004680383800468004680046
320204800456461200002851500474638003716161000254050808010016543616000080220160000160000480499271980912939791800248004580046031589403522200160000160120200400000320000800458004511802011009910010080000800001001600002401016000210516006214461205109217228020580000160000160000801008005380054800538005180212
32020480215649000000123005639180035161600254053868015916429116000080100160000160000480499222048113014401800258021280211033240064020016000016000020040000032000080045803781180201100991001008000080000100160000032001600022011160062232005109217128004780059160000160000801008005080046800458022080045
3202048004564300010012901435108003516161010254066098010016571616000080100160000160108480499207975812962930800248004580045031284001002001600001600002004003003202408004580045118020110099100100800008000010016000004000160002125160002240005126217228004281890160000160000801008486080893802258004680046

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2223373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32002680045621000000100005579800300002540533080010165258160000800101600001600004800492224450130203880023800448004503364000102016000016000020400000320000800458004511800211091010800008000010160000040001600020051600022400501961761080042800000160000160000800108004680046802148004680046
320024800466200000000030051728019816160254051138001016490916000080010160000160000480049207885213104208002380044800450327400010201600001600002040000032000080045800451180021109101080000800001016000004000160002102160002240050196177980042800000160000160000800108004680046800468004680046
3200248004562100000000300638880030161602540589580010165359160000800101600001600004800492283283130165280024800458004503274000102016000016000020400000320000800458004511800211091010800008000010160000040001600000021600002400501910179680042800000160000160000800108004480046800458004680046
320024800456210000100120006744800301616025407091800101647211600008001016000016000048004921557141301420800238004680045032740001020160000160000204000003200008004580213118002110910108000080000101600000400016000200216000224405019101791080042800000160000160000800108004680047800468078980046
320024800446200000000026700480480030161602540645480010164255160000800101600001600004800492222553129990480024800448004503274000102016000016000020400000320000800458004511800211091010800008000010160242040001600021021600022400501910177980042800000160000160000800108004680046800468004780046
32002480045620000000012310555380030161602540520680010165018160000800101600001600004800492236695129745580023800458004503284000102016000016000020400000320000800458021211800211091010800008000010160000000016000200016000224005019717101080043800590160000160000800108004680048800468004680055
32002480044621000000038282555004661800301616025408528800101647561600608001016000016000048004921589251294782800238004580045032740001020160000160000204000003200008004580045118002110910108000080000101600000406301600020051600020400501910176980042800000160000160000800108004780046800468004880047
320024800456200000000030062048002916160254070968001016660116000080010160000160000480049215934612956668002480045800450312640001020160000160000204000003200008004480045118002110910108000080000101600000400016000000016000024405019917101080042800000160000160000800108004680046800468004680046
32002480045620000000000005539800301600254052118001016668816000080010160000160000480049223248312965368002480045800440312740001020160240160000204000003200008004580046218002110910108000080000101600000400016000210111600022400501910256980042800000160000160000800108004680053807118004680046
32002480045620000100013230066638003016160254062658001016529616000080010160000160000480049215707613003888002980045800450327400010201600001601202040000032000080045800451180021109101080000800001016000004000160002109201600022005019101710780042800000160000160000800108021480046800468004680046