Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (single, H)

Test 1: uops

Code:

  st4 { v0.h, v1.h, v2.h, v3.h }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
630072950423632050003000048272931701164743000200010002000100012810178987235872914529365310300010002000200050002940029447116100110001000100002010001001000000013386946669193133250207803316381713474728706160751384414391100020002946829490294812944629362
630042944423604010001800004631295060016536300020001000200010001279817903623577292602954931030001000200020005000294552939511610011000100010000001000100100003001312595117006313315120796332038219515228803162331344614469100020002947429492294852941629565
63004295132370203000120000046162942500166083000200010002000100012797179105235582930629641366300010002000200050002939329510116100110001000100003510001001000030013391947669203149052208883274381214534728793163371366414441100020002957329462295162969329485
63004295332370202000111000045512940600166123000200010002000100012800178986236122938929559310300010002000200050002954429411116100110001000100002010000031000030013198949069423153248209423365381214514628830164021375714260100020002968429463296462956729578
63004296402380104000990100464729700111657430062000100020001001129031791942357529457299803103000100020002000500029562296384161001100010001000230100010395100003011413226939069913134144209523241381611444428857163121360714607100020002952729523296102953729717
6300429508236040301035410004659295980016729300320001000200010001280517921623586294282957574430001000200220025000296712956051610011000100010032031000023931002020013282937269003119144208823352381117454428804162691357914416100020002954929591295292952329582
63004296642360200000702100046302956400168483000200010002000100012817179000235352918729423310300010002000200050002928529468116100110001000100002010001001000000012952918568323081142210923157380852524529426163821373514620100020003022830231303013030330371
63004303182430302010330000466729479111635930002000100020001000128011790752355429200292922610300310002000200050002955329660116100110001000100200010001031000020013143944569603122142207413221381112464528540161041357714236100020002940529479293312927929340
6300429436227020200013800004769293350116311300020001000200010001279817897223505293852949431030001000200020005000294392940611610011000100010000201000003100003001338393346904319324420818325938068434428560159521349714289100020002947529333293112944129334
630042933522704020012410004735293631016441300020001000200010001280617899523503291672932631030001000200020005000293582937511610011000100010020201000003100003098013187947769893218149208063387381412474928745161441390614484100020002947529362295672970428780

Test 2: throughput

Count: 8

Code:

  st4 { v0.h, v1.h, v2.h, v3.h }[1], [x6]
  st4 { v0.h, v1.h, v2.h, v3.h }[1], [x6]
  st4 { v0.h, v1.h, v2.h, v3.h }[1], [x6]
  st4 { v0.h, v1.h, v2.h, v3.h }[1], [x6]
  st4 { v0.h, v1.h, v2.h, v3.h }[1], [x6]
  st4 { v0.h, v1.h, v2.h, v3.h }[1], [x6]
  st4 { v0.h, v1.h, v2.h, v3.h }[1], [x6]
  st4 { v0.h, v1.h, v2.h, v3.h }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5639

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f222324373a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402074509135000000123000117920454480161372925252657100171479800001001600008000050020859683320578044794451844522115244031536224010020080000160000200160000400000449264523111802011009910010008000080000100800000422437080002078800022420051100141613134531400800001600001004523145103450674522145321
2402044510935100000930001203704531316161489625251695100172389800001001600008000050020726333291957044840451584955015003031522324010020080000160000200160000400000452934496911802011009910010008000080000100800000422406080002024800022420051100141613134537800800001600001004483244910453374500145267
24020444875363000001230001241304540216162473925253214100172100800001001600008000050020935833313774045014453964537115254031484324010020080000160000200160000400000452854485211802011009910010008000080000100800000422229080002023800022420051100141613114519400800001600001004500545119449144535044887
2402044479436300000030001241804521616162080225251969100174773800001001600008000057620755743345169045004451594506714889031476024010020080000160000200160000400000453704496311802011009910010008000080000100800000422229080002017680002242005110081613134523000800001600001004518844966453564500945292
24020444811367000000300012083045143160081625251369100173050800001001600008000050020715173311290045190451214479515218031493524010020080000160000200160000400000450404510511802011009910010008000080000100800000422362080002027800000420051100131611134545800800001600001004510545070448374503945565
240204453723610000003000122940450461616359052525241810017200480000100160000800005002048428333021804506244973451081513603151702401002008000016000020016000040000045440453371180201100991001000800008000010080000042214508000212080002200051100111614154523100800001600001004512745052449524501945165
24020445140359000009300013267044937161619838252520881001724428000010016000080000500208612333408530452294522544830151320315194240100200800001600002001600004000004486244975118020110099100100080000800001008000000237808000002980002242005110013161364498500800001600001004531544592454154544945055
24020445379364000000310012137044676161618839252524381001731438000010016000080000500209769833322201452984515545161150000315281240100200800001600002001600004000004499145192118020110099100100080000800001008000000250408000201480002242005110013161264457800800001600001004550144813496014498144982
2402044590536500000123000116200453111616198922525129310017284880000100160000800005002097589329751104520845036451381480803151022401002008000016000020016000040000045014455561180201100991001000800008000010080000002593080000014380002200051100131612124497600800001600001004511744822451624527444931
240204454803630000012000012188045294161615798252531931001723558000010016000080000500207190733597070446874530144725150350314860240100200800001600002001600004000004503245303118020110099100100080000800001008000004224570800020107800020420051100121613134506600800001600001004522145308450774523645144

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5641

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)18191e1f22373f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
24002745033350000002013039451971616259582525225310170984800001016000080000502062699329362945362449724514914664031484324001020800001600002016000040000044892456061180021109101080000800001080000034218908000000580002234050385164545452080000160000104497644823455244510845337
24002449644350000000011435453821616269462525236510171925800001016000080000502082154329601445113453894532814986031506424035320801201600002016000040000044695448311180021109101080000800001080000034216908000210280000234050205165545009080000160000104488545210451004516445539
2400244517934900000801225044642161613908252523031017290980000101600008000050207212933184984505545080452591504303147552400102080000160000201600004000004524845099118002110910108000080000108000000202908000200280002234050205164445327080000160000104513045483449004527845000
2400244501035000000201133645277016389442525225310172335800001016000080000502072820337178544759451074500914540031485424001020800001600002016000040000045126451231180021109101080000800001080000034249208000210280002234050205165545317080000160000104553145327451474454045223
240024454323470000080127424508000379362525308110171658800001016000080000502087547330519645128453944492815382031495624001020800001600002016000040000045502449931180021109101080000800001080000034195108000200880002234050204163445576080000160000104537845126448804497045459
240024447683470000020113684573416161912271122538771017307180240101607048054056210072232951874453145534458691535959314913240698208012016096320160240402409454224500421800211091010800008000010800602342752280182021644280002034050405255645320080000160000104571645604496374485745312
2400244577235301435289001275845306161677692525273310172848800001016000080000502092120332340745637449954506214851031516424001020801201600002016000040000045074450331180021109101080000800001080000034218508000210280002234050204165445186080000160000105000250183486525049950039
2400245007338220332700113052452171616289262525193010171856800001016000080000502094323335640645045448104915515543031444124001020800001600002016000040000045140449812180021109101080000800001080000034229908000200880000234150205164445416080000160000104512745006448684517445243
2400244502733800000201082444893161613826252523561017202980000101600008000050208302533216254509544932450551471003153272400102080000160000201600004000004504745220118002110910108000080000108000003424010800020028000020050205164345138080000160000104519444873452914522245073
24002445090338000002011694449911616208562525216210172709800001016000080000502083126331161444833452084525815203031473124001020800001600002016000040000045267450921180021109101080000800001080000034211908000000380000234050205166445231080000160000104452644601452764531744728