Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 3 regs, 2S)

Test 1: uops

Code:

  st1 { v0.2s, v1.2s, v2.2s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 4.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f223a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
63006301992413102212222641790461528889001806840001002100220001001100120045010160108008018217902874029122194840042006100050002000292632883611610011000100020043432002222000040001313095056928319405120348338238072050552847910001583912905140242000100010002899629057293602926728892
630042884023120000000001047362872100179644000100010002000100010002000500015897800050821711285932883631040002000100050002000290102907311610011000100020000402000002000040022311328795176933318205120170321638132055582835110001515612577138922000100010002884428838288982888128870
6300428869232300010000010451429403001839140441007100920201006100920245045162528064191821939291372933684266404920141009503020162954029198151610011000100020142402018257002004040001322893586986315705720263314638121654542832410001580212680136522000100010002856328555288242876329098
6300428753223100011000132004791286130017843400010011000200010001000200050001590980005102171828473287413104000200010005000200028979288111161001100010002000440200005982000040001331195206893310805120132321738102256542838010001551412690139732000100010002881228671287822872228761
63004287552232000010001321046302859100177364000100010002000100010002000500015899800040021718285622882031040002000100050002000287512874911610011000100020002402000002000040001327594246934311815320172321338151653532824310001529312687138182000100010002887028739288142885828738
630042874422311111010003047702864620177924004100010002000100010002000500015911800011021737285592880731040002000100050002000287952866611610011000100020033412002122000262101335292766966315505120147319938131257532822410001533212795137902000100010002871828811287902883128821
63004287292232101111001323046072858420177394000100010002000100010002000500015904800020021655285662883931040002000100050002000287292879611610011000100020034402002122000242101311494427015315505120266321538101453522834510001570012734140462000100010002874428849287682878428748
630042872422321111010003047912858000178404000100010002000100010012000500015909800000021777285652879431040002000100050002000287752872711610011000100020073412002352000242101314595386954325905420200323438181052502825910001582612626136032000100010002883328771287452879428856
630042878022321100010003047902861700175664000100010002000100010002000500015906800001021736284822857031040002000100050002000285092867011610011000100020032402002122000242101357496047106325805519884321638131650572822710001547712809134232000100010002879128505286782863528649
6300428851223012110100213047802847400178214000100010002000100010002000500015908800000021715280122873271040002000100050002000285152845511610011000100020000402000002000040001337994067053325224820057318038131652482826410001505412389131972000100010002858128518284742874428510

Test 2: throughput

Count: 8

Code:

  st1 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st1 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st1 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st1 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st1 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st1 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st1 { v0.2s, v1.2s, v2.2s }, [x6], x8
  st1 { v0.2s, v1.2s, v2.2s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2402068004964200000006900190800351616025320122801008003416006080105800061600071290770367988464218418001880043800434996165001032011720016001680008200400040160016800508005811802011009910010080000800001001600000340016000210216000224200051106232118004680000016000080000801008004380208800448004980043
24020480042643000000006002708002716160253201198016080582160000801008000016000012907443679805640168080018800408005049956350000320100200160000800002004000001600008005080040118020110099100100800008000010016000000001600021021600022340005110116118004080000016000080000801008005380059800518005280059
240204800586431011000018005600800431616725320800801008058516000080100800001600006510703679541642190080035800588005849962350007320100200160000800002004000001600008005880058118020110099100100800008000010016000003621160016001916000016340005110116118003980000016000080000801008004480048800598005880052
240204800496431011000314008421800330167253201288010081432160000801008000016000011906273680021640125080033800518005249974350010320100200160000800002004003571600008004380224118020110099100100800008000010016001423400160016001616000216360005110116118020880000016000080000801008005980058800528005980061
240204800576431011000132191014180034160625320151801008004816000080100800001600001370507367980564064508003380049800494996135001632010020016000080000200401217160000800588004311802011009910010080000800001001600141436001600760021600020014005110116118005680000016000080000801008005980053800508022880054
2402048005164310111000200018180047016112253206508010080143160000801008000016010810507223679805642438080178800428022149970350141320100200160000800002004000001600008005280215118020110099100100800008000010016001414365911600141094316000216014105110116318021480074016000080000801008021680053802248006180212
240204802186430010000132180010651800801616025320988801008037116000080100800591601081370507368023764300708003380058802044996535001632010020016000080000200400000160000800588022611802011009910010080000800001001600151542131600161217160002163414005110116118004680000016000080000801008004180212800628005480059
24020480049644100120013217004808003316160343206818017180753160000801008005916000071919036802616405520800388005180223499633501343201002001601228000020040000016012280043800432180201100991001008000080000100160060034001600622021600622340405110116118020480139016000080000801008022880212803948039280219
240204805576441022031132140012080360161621347320866801798009416012080216800581600001822672369927264058208017180210802075009075040232033720016012280061200400614160000803698037321802011009910010080000800001001600602366421600630018951601222340205123334118019980183016000080000801008038380210803808021380043
240204803746450010000030026080034161602532013080100800311600008010080000160000183028636795416400980800258004380043499563500003201002001600008000020040122216000080043800431180201100991001008000080000100160000034001600020081600022340005110116118004080000016000080000801008004380051800448004480050

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2223373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
2400268005259900000001214006180052161632532081680010805591600008001080000160000119015836803336400000800278005280052499953500313200102016000080000204000001600008004980053118002110910108000080000101600141400016001600181600021636141502042163618800448000016000080000800108005080050800488005980049
240024800526201001100019007121800531614025320023800108000716000080010800001600001190110367980564305208019780060800504999335002832001020160000800002040000016000080047800471180021109101080000800001016001514360016001400141600021636141502018163719800398000016000080000800108004180044800448004380043
2400248004262100000000310508003616163253200128001080727160000800108000016000018297693679541640006080018800408004249978350162320010201600008000020400000160000800428004211800211091010800008000010160000034001600621021600002000502043164121800408000016000080000800108004380041800518004480041
24002480049621000000000006080028161602532001080010807091600008001080000160000182976936795416400030800298020880043499783500313200102016000080000204000001600008004380043118002110910108000080000101600000340016000200216000223400502038163818800458000016000080000800108004380043800508004180041
24002480050620000000000001224080028160025320010800108000016000080010800001600001290239367985364158408001880043800434998435002232001020160000800002040000016000080043800401180021109101080000800001016000000001600020097216000223400502019161947818918000016000080000800108004180041800448021180041
24002480040621000000003006080027161602532001580010800001600008001080000160000182976936860546400290800188004280049499783500203200102016000080000204000001600008004280042118002110910108000080000101600000346501600023051600022000502041164121800398000016000080000800108004380041800448005080043
24002480042620000000003001024080027161602532001080010806311600008001080000160000182976936795416400000800248004380040499783500203200102016000080000204000001600008004380043118002110910108000080000101600000340016000000216000223400502015161839800408000016000080000800108004380043800518004480041
2400248005062100000006300208002801602532058380010800001600008001080000160000182976936795416400080800188005080050499783500293200102016000080000204000001600008005080049118002110910108000080000101600000340016000200016000223400502018161941800408000016000080000800108004180041800518004180050
24002480050621000100090001024080027161602532001980068800011600008001080000160000182976936795416400180800258004380043499783500313200102016000080000204000001600008004380043218002110910108000080000101600000340016000200141600022000502040161940800398000016000080000800108004380043800448004480043
240024800496200000000010001080044161602532107880010800031600008001080000160000182976936795416400000800188005080040499783500293200102016000080000204000001600008004080050118002110910108000080000101600000340016000200216000223400502018164139800398000016000080000800108004380044802108005080043