Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (single, S)

Test 1: uops

Code:

  st3 { v0.s, v1.s, v2.s }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6200629007233110030003200464229184001821020001000100010001000109048000140021709287462903931020001000100020003000288132887111610011000100010000301000200100000013081916970163133066203533219381828707228341164831299114836100010002945929541295402957829307
620042914023203003000010047052897201180972000100010001000100010910800050021716286692886531020001000100020003000289702904611610011000100010000001000000100020013209909869633140168202363220382515807028374157191307614306100010002924129371291582929829009
620042918323402002000010047522880501179932000100010001000100010908800010021719287782893231020001000100020003000290422887911610011000100010000201000000100020013259930868903157173203223259383116767628589158171295814056100010002903528988290252895028949
620042924323302001000010047512910400181932000100010001000100010902800030021698288932915031020001000100020003000291102914711610011000100010077301000000100020013138942769213099062204743201382325736728595158891310814684100010002919029226291332914029253
62004292402360400300015000458629008001825120001000100110021000109218008500216982889729333829200210011000200030032913829053216100110001000100323010010242510002232713073920768983123265205983249382721737628668159461303214864100010002919829215292072907729238
620042921523601012011279177004588290590118097200210011000100110021091180083002173128979291109462002100110002002300329081292432161001100010001002022100104470100132013047933369003107171205003283381924727228554162151300514641100010002930329154292932899129078
6200429329235001111111472650046402900700183522000100010001000100010899800050021683286492897031020001000100020003000290212893211610011000100010000201000020100000013171925569443127087204713204382615737728419156591317414387100010002900528886290592890128962
620042903623201002000010047832884911180012000100010001000100010906800040021786287752902131020001000100020023000287462888411610011000100010000301000100100000013102954469253107163203083151383019778628388156131281914447100010002902829029289492903028934
6200428879232010030001350004631288030117977200010001000100010001091080000002172228695290273302000100010002000300028998288011161001100010001000030100010475100000013126939469393192074202943246382619787628389156981295614330100010002887628882290092913928982
620042899723201000001120004689288330018036200010001000100010001089980005002171328814290157102000100010002000300028843288151161001100010001000020100000395100030013010934669293155062202813250383125766628415157861311314563100010002889228942290942890229037

Test 2: throughput

Count: 8

Code:

  st3 { v0.s, v1.s, v2.s }[1], [x6]
  st3 { v0.s, v1.s, v2.s }[1], [x6]
  st3 { v0.s, v1.s, v2.s }[1], [x6]
  st3 { v0.s, v1.s, v2.s }[1], [x6]
  st3 { v0.s, v1.s, v2.s }[1], [x6]
  st3 { v0.s, v1.s, v2.s }[1], [x6]
  st3 { v0.s, v1.s, v2.s }[1], [x6]
  st3 { v0.s, v1.s, v2.s }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2224373a3f46494f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020640057310000009600396304002716160025161111100834408000010080000800005001839712642734040021400494004219959032000016010020080000800002001600002400004004340042118020110099100100800008000010080000034008000202800022340511001161140039080000800001004004340044400434004440043
16020440043310000000300102504002716160025161242100833408000010080000800005001839856642959040021400424004219959032000116010020080000800002001600002400004004240043118020110099100100800008000010080000034008000205800022340511031161140049080000800001004004440043400434004340043
16020440049311000001231026310400341600025161486100829358000010080000800005001839712647961140023400424004819959032000716010020080000800002001600002400004004240049118020110099100100800008000010080000034008000202800002340511001161140039080000800001004005040043400494005040043
16020440043311000001200028140400341600025162086100814348000010080000800005001839712644392040021400424004319959032000116010020080000800002001600002400004004340052118020110099100100800008000010080000034008000202800022340511001161140040080000800001004004940049400504004941004
1602044004231100000000027920400271616002516305710083969800001008000080000511192172064858104002140049404502011009200061601002008012080120200160240240360400494024611802011009910010080000800001008000023419608000200801220344511001162140229080000800001004169941648414504084540250
1602044024631001121017900229004023416160046162086100811168006010080116800005001839808643405040208402424004320107032048316032420080120800002021602382400004044540043218020110099100100800008000010080067734890801200880122202512701253140040080000800001004045240054402544004940044
16020440243312000010600331404023801627004616380010083032801201008011680000500184970865143704002140453400432011001120001160324200800008011920016000024036040042404501180201100991001008000080000100800000341940800020280000200511001161140039080000800001004004340043400494004440043
16020440043311000000310293304003316160025162913100811308000010080000800005001839712645758040024400434004919959032000016010020080000800002001600002400004004340048118020110099100100800008000010080000034008000203800022340511001161140046080000800001004004340044400434004440043
16020440043311000000600106904002716160025160955100814768000010080000800005001839712644174040021400424004319959032000116010020080000800002001600002400004004240049118020110099100100800008000010080000034008000202800022340511001161140039080000800001004004940050400504005040044
16020440042311000000300421720400281616002516125310082451800001008000080000500183971264498304002340043400491996203200001601002008000080000200160000240000400434004811802011009910010080000800001008000003400800020280002000511001161140040080000800001004004340044400604004440044

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f22373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002640055311100003018024201400371616025160057108183580000108000080000501839712640108040025400544005219987032003416001020800008000020160000240000400514005411800211091010800008000010800161444008001601198000214441410502011611400518000080000104005240053400534005440052
160024400543111010060220207014003816002516006810834078000010800008000050184007264978004002840054400541998203200231600102080000800002016000024000040053400541180021109101080000800001080014144401800160221800021601400502011611400498000080000104005140055400544005540051
16002440051311100006321022461400371616125160093108325980000108000080000501840048652141040029400544005419988032003416001020800008000020160000240000400554005311800211091010800008000010800141444008001601218000216441400502011611400508000080000104006340064400634005240055
16002440052311111004822028681400381616125161089108101780000108000080000501840024650032040029400544005319990032003416001020800008000020160000240000400524005411800211091010800008000010800141444018001600168000216441400502011611400518000080000104004940053401964005340063
160024400543101100054190461400471516125160041108003380000108000080000501840504646230040026400644005519990032004216001020800008000020160000240000400524006211800211091010800008000010800141550018001600178000216441400502011621400518000080000104005240052400524005340063
16002440054310101004172003796140036016125161439108005580000108000080000501840072647124040028400544005419986032003416001020800008000020160000240000400524005411800211091010800008000010800161544008001600168000216441400502011611400488000080000104005640056400564005540055
1600244005531111100181703782140039160125160642108132980000108000080000501840120650106040030400534005519990032003416001020800008000020160000240000400514005311800211091010800008000010800151444018001601178000216441400502011611400498000080000104005640055400554005540055
16002440051310110002551802117040036016125161477108302080000108000080000501840072642936040037400514005319986032004216001020800008000020160000240000400534005111800211091010800008000010800141450008001601188000216441400502011611400598000080000104005440055400554005540053
160024400533101100045317033901400381616125162902108059380000108000080000501840024649801040029400544005319988032003416001020800008000020160000240000400534005411800211091010800008000010800151444008001600188000216441400502011611400498000080000104005540055400554005540054
160024400533101110034218128161400371616225163824108360680000108000080000501839856648220040028400424005219987032003316001020800008000020160000240000400544005411800211091010800008000010800141544008001402208000214441400502011611400508000080000104005540054400644006340055