Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST2 (single, S)

Test 1: uops

Code:

  st2 { v0.s, v1.s }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6200629414228000010000000467329068001830520001000100010001000109048000121741289582933031020001000100020002000293402939811610011000100010000201000000100020013082919369293119057206493204381614625828582162911335715137100010002942129294293912930929315
62004293092271000100012000465929042001841520001000100010001000109068000121771289782942531020001000100020002000293152927511610011000100010000201000000100030013122914169023125058208013152381816575728562161731346915072100010002935529457293112930429343
6200429442228110010000000473429126001843120001000100010001000109008000221675290372922131020001000100020002000292832929511610011000100010000201000000100020013088916969123126053208313211381522586128755160171358615106100010002929829337293772933129421
62004294002280100000012010456229113001832920001000100010001000109078000021707289582931831020001000100020002000293212929111610011000100010000201000003100020012981926969653152061207273168381519516528613160371341615075100010002937929423293702934629318
6200429380229110010000010468029116001835120001000100010001000109108000121755289492933931020001000100020002000292432923411610011000100010000001000000100000012997924569883130157206383197381810485028566161251349014991100010002930229239293022927529413
6200429358227100010000000465029119001828920001000100010001000109058000221713289982929331020001000100020002000292552928011610011000100010000201000003100000013150914069603114056207753253381217545628548160091321514998100010002939429315293202941629344
6200429312227110000000000470729125001826920001000100010001000109108000621704291092938831020001000100020002000291262923611610011000100010000301000000100030013235912768783056156206503192381510535028532163071324414732100010002936029293294042938429218
62004293912281000100012010461029215001827220001000100010001000109088000521772290122929431020001000100020002000293142921011610011000100010000201000000100020013037931169353214150205923235381316564828541163291320514891100010002926029272293982937129198
6200429377227110010000100474829149001825320001000100010001000109088000621692290772929431020001000100020002000291892919411610011000100010000001000000100000012945918168633046055208103166381819525928654161731346115050100010002926929401292922930629341
6200429373227100000000000459229015001844020001000100010001000109018000721778289012927831020001000100020002000292512929411610011000100010000201000000100020013000924869373143154206693171381022535128569161551329515038100010002927329294292992936329292

Test 2: throughput

Count: 8

Code:

  st2 { v0.s, v1.s }[1], [x6]
  st2 { v0.s, v1.s }[1], [x6]
  st2 { v0.s, v1.s }[1], [x6]
  st2 { v0.s, v1.s }[1], [x6]
  st2 { v0.s, v1.s }[1], [x6]
  st2 { v0.s, v1.s }[1], [x6]
  st2 { v0.s, v1.s }[1], [x6]
  st2 { v0.s, v1.s }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst simd alu (9a)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160206400623111100001141900244214003616160251630281008106180000100800008000050018397126456364003740052400551996703200101601002008000080000200160000160000401954004311802011009910010080000080000100800141544018001601280002244140511011601140047080000800001004005040053400534004840055
16020440051310010100378171034681400391616025162228100835888000010080000800005001840096650438400384004340055199660320012160100200800008000020016000016000040052400541180201100991001008000008000010080014140008001600080000244140511021602140048080000800001004004340053400544005440043
1602044005431001000051190035611400381616025163870100828208000010080000800005001839760650078400284004240043199680320001160100200800008000020016000016000040052400531180201100991001008000008000010080015144400800160120800001644140511011601140047080000800001004004340044400544005140044
1602044005231011110036330012311400270002516304510082774800001008000080000500184007264796340029400624004319964032000016010020080000800002001600001600004005440053118020110099100100800000800001008001515440080002011480000152141511011601240049080000800001004004840043400434005240044
160204400523101000003873001758140027016025161229100812798000010080000800005001840096648543400284005140054199670320012160100200800008000020016000016000040062400541180201100991001008000008000010080000154400800020017800021642140511021601240039080000800001004005440054400434004440053
1602044005031010010054300283714003516161251613951008088980000100800008000050018400726488394002140053400501996503200111601002008000080000200160000160000400544005211802011009910010080000080000100800000440080002001880002164201511011601240040080000800001004005440054400544004440053
16020440042310100100255310206804003916161251635281008122280000100800008000050018397126429694002140054400531996803200091601002008000080000200160000160000400434006211802011009910010080000080000100800160420180016001880000164400511011602140051080000800001004005340053400534004940055
1602044004331110100046522001413040039161612516115510082977800001008000080000500183971264439540027400544005219967032000916010020080000800002001600001600004004240042118020110099100100800000800001008001415420080016011480002160140511021601140047080000800001004005240053400534005340051
16020440054310101000306190027501400391616125163598100822908000010080000800005001840048643528400254005440053199750320010160100200800008000020016000016000040054400421180201100991001008000008000010080014150018001460380000244141511011602140051080000800001004005440064400634006440055
16020440051310000000447180026371400390160251627551008191180000100800008000050018399526464714002240043400521996703200201601002008000080000200160000160000400514005611802011009910010080000080000100800151544008001610280002244140511021602140051080000800001004004440054400644006340043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600264004331000000601109140028160025160855108092280000108000080000501839712640129140021400434004319982032003416001020800008000020160000160000400434004311800211091010800008000010800000420080002005800022420050201316127400398000080000104004540044400454004440044
16002440043311000000303579400271616025164500108539280000108000080000501839712643053140021400424004319982032002316001020800008000020160000160000400434004211800211091010800008000010800000420080002102800022420150201316512400408000080000104004340043400434004440044
1600244004331010001123065040027161602516322110836118000010800008000050183971265617914002140043400431998203200231600102080000800002016000016000040043400421180021109101080000800001080000042008000210280002200050207161211400408000080000104004340044400454004440044
1600244004231100000030103340028016025164335108123580000108000080000501848076644393140021400434004219982032002316001020800008012020160000160000400434004221800211091010800008000010800000420080002002800620420050201025125400408000080000104004340044400434004440044
160024400433101000003013234002800025163650108366380000108000080000501839712644137140021400424004219982032002216001020800008000020160240160000400434004211800211091010800008000010800000420080002105800022420050201316126400398000080000104004440044400444004440044
16002440043311000000311211400270160251606021083901800001080000800005018397126449360400274024540043199820320023160010208012080000201600001600004004340043118002110910108000080000108000004200800020058000224200502013161312400408000080000104004340044400444004440044
1600244004231000000123040540027160402516043210803598000010800008000050183971264205804002640043400421998203200241602342080000800002016000016000040043400431180021109101080000800001080000042960800020058000024400502012161212400408000080000104004440044400444024940246
1600244004231000000123045104002816160251614901082627800001080000801085018397126410810400274004340043199820620023160010208000080000201600001600004004240242118002110910108000080000108000004200800020028006220005020716136400398000080000104004440043400444004340045
1600244004331000000030119840028161602516369310836668000010800008000050183971264190704002140043400431998203200221600102080000800002016000016000040042400431180021109101080000800001080000000080000005800020440050201116712400408000080000104004440043400444004440043
16002440043310000000301643400281616025161685108436480000108000080000501839712642635040027400424004219982032002316001020800008000020160000160000400424004311800211091010800008000010800000420080002002800022420050207161212400408000080000104004640044400444004440044