Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST2 (multiple, 8B)

Test 1: uops

Code:

  st2 { v0.8b, v1.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
62006287892240125013210001005145286021117342200010001000100010001090380001221756286432849131020001000100020002000286842857911610011000100010022011002011100014110134299472697631681263201163279381212585728235152461264814021100010002859928304282352844128651
6200428729229112601221001230051832828701177162000100010001000100010905800092178128591285703102000100010002000200028035281131161001100010001002101100102710000200105136961026769393218859200733180381016646428149143561217414091100010002871328664287522870628772
620042877922800280028100182004867285760017219200010001000100010001090580001121751285462880131020001000100020002000286322868211610011000100010021011001021100022100133759675696431541565202033114381321585727981153381268013993100010002879128724288402869828338
6200428419230012811190000200480028569001777120001000100010001000109088000921712285952878331020001000100020002000281322847011610011000100010022011001001100012110133929621693031961562201273165381414626228311154041270514186100010002869228856286782871128742
62004286702210134113110002004932285230017674200010001000100010001091980001021713284302864331020001000100020002000286982862011610011000100010023201001011100012110131029712700731981261201503230381225605628282157591320814772100010002877828742286542878328723
62004288272220126002600001004786286220017857200010001000100010001090880001321700284302870931020001000100020002000287312875011610011000100010000211000000100012110132199754697131671268201433198381318646028276153201258814148100010002869228631287682878928816
620042871522301271026100020047052867600178032000100010001000100010910800082167828468286663102000100010002000200028575286821161001100010001001220100000010000200013430951269423194970201443190381922636628315153871276314079100010002878128777287242879228873
62004287362230124002400002004796285720017669200010001000100010001090980001021694285332865931020001000100020002000286752869111610011000100010022051001001100002000131609629689431781266201663163380715626428203153541279313782100010002873428799287692872428692
6200428797223003300271000200474428550001779820001000100010001000109068000102172628397286533102000100010002000200028707286841161001100010001002301100101110001211013364955969743144960199963221381414624928219155721286714043100010002875528702287352865428643
6200428844223012700270000000481128571001774720001000100010001000109028000112169628566287313102000100010002000200028601285701161001100010001002200100102410011210013102958068963211863201313138381218566028139153481275514121100010002855228668285922862528652

Test 2: throughput

Count: 8

Code:

  st2 { v0.8b, v1.8b }, [x6]
  st2 { v0.8b, v1.8b }, [x6]
  st2 { v0.8b, v1.8b }, [x6]
  st2 { v0.8b, v1.8b }, [x6]
  st2 { v0.8b, v1.8b }, [x6]
  st2 { v0.8b, v1.8b }, [x6]
  st2 { v0.8b, v1.8b }, [x6]
  st2 { v0.8b, v1.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e1f23373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160206400423100002574400271616025162304100825448000010080000800005001840072646079400214004340043199593200011601002008000080000200160000160000400424004311802011009910010080000800001008000042008000210280002042000051101161140040080000800001004004440043400444004340044
1602044004331012303982400281616025161825100810318000010080000800005001840432650601400214004340043199593200001601002008000080000200160000160000400434004211802011009910010080000800001008000042008000200280002242000051101161140039080000800001004004440044400444004440043
16020440042310000451400281616025160484100838058000010080000800005001839712645364400214004340043199593200011601002008000080000200160000160000400434004311802011009910010080000800001008000042008000200280002242000051101161140039080000800001004004440043400444004340043
16020440043310000467400281616025160652100815738000010080000800005001839712641060400214004340043199593200001601002008024080000200160000160000400434004211802011009910010080000800001008000042008006022380002242000051101161140040080000800001004004540043400444004340044
16020440042311040160440028161602516056510081162800001008000080000500183971264165840021400434004319959320000160100200800008000020016000016000040043400451180201100991001008000080000100800000008000200280002242000051101161140039080000800001004004440043400444004440044
1602044004231012303440400281616025160950100806438000010080000800005001839712644566400214004340042199593200011601002008000080000200160000160000400434004211802011009910010080000800001008000042008000200280002242000051101161140039080000800001004004340044400434004440044
16020440043310030150340027016025161868100805288000010080000800005001839712650094400214004340042199593200011601002008000080000200160000160000400424004311802011009910010080000800001008000042008000200580002242000051101161140040080000800001004004440043400454004340043
160204400423100302332400281616025161125100810528000010080000800005001839712642695400214004340052199593200011601002008000080000200160000160000400424004311802011009910010080000800001008000042008000200280002042000051101161140039080000800001004004440043400444004440043
160204400433111230276640027161602516059810084802800001008000080000500183971264197940021400434004319966320001160100200800008000020016000016000040043400421180201100991001008000080000100800000008000200280002242000051101161140039080000800001004004440043400444004340043
1602044004431000034440028161602516340110080392800001008000080000500183971264477540021400434004319959320001160100200800008000020016000016000040043400421180201100991001008000080000100800004200800001088000220000051101161140039080000800001004004540044400434004440044

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)0318191e1f22373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600264005231700030602400331616025160999108449080000108000080000501839712653472400274004340049199843200231600102080000800002016000016000040049400421180021109101080000800001080000340800620880002234503831623402298000080000104004340044400434004440044
1600244004230000090277940027016025160497108072880000108000080000501839712644096400234004240043199853200231600102080000800002016000016000040043400421180021109101080000800001080000340800000280002234502031633400468000080000104004340044400434004440044
160024402463000006036464003316160251606051083136800001080000800005018397126483104002340043400421998410200221600102080000800002016000016000040042400421180021109101080000800001080067340800020580002234502031643400398000080000104004340044400434004440044
1600244004330010030219840027161602516441310853948000010800008000050183971264329340027400434004319982320029160010208000080000201600001600004004240049118002110910108000080000108000000800020080002234502033443400408000080000104004340049402604067840044
16002440048300311719104836400281616025161381108479780000108057980216501839712643989400244004240042199843200281600102080000800002016000016000040043400421180021109101080000800001080000340800020280002234502031633400398000080000104005040050400504005040044
160024400423000003039400281616025161100108218180000108000080000501839832641443400244004240042199843200231600102080000800002016000016000040043400421180021109101080000800001080000340800021280002234502031633400408000080000104004340044400434004440044
16002440049313000603318400331616025165404108073480000108000080000501839712641471400214004940042199823200281600102080000800002016000016000040043400491180021109101080000800001080000340800020880002234502031633400408000080000104005040050400504005040043
16002440049300000604496400331616025160606108004080000108000080000501839712650950400214004240043199823200231600102080000800002016000016000040049400421180021109101080000800001080000340800020280002234502031633400458000080000104004440043400444004340043
16002440042300000901642400341616025160551108449680000108000080000501839712650946400214004940043199823200231600102080000800002016000016000040042400491180021109101080000800001080000340800020280002034502031633400458000080000104004340044400434004440044
1600244004330000030364740027016025160068108440280000108000080000501839712640123400214004240043199823200231600102080000800002016000016000040042400421180021109101080000800001080000340800020280002234502021633400398000080000104004340044400434004440044