Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, 2 regs, 16B)

Test 1: uops

Code:

  st1 { v0.16b, v1.16b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)91inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)f5f6f7f8fd
6200528954232570050001201047012847720238882000020002000100000181603028163288423102000200040002859828690116100101000100020000602000003200060013166941069473160268202163244379922636428234015600129281504920002892128826286952878228780
620042884123002002000601046502847402237852000020002000100000151603728139288823102000200040002865728740116100101000100020000602000003200060013260911269343210169202873234380728697228287015673130611496820002888228876288212884928887
620042880023205000000610048122852522237592000020002000100000161602328114288173102000200040002870128704116100101000100020000602000000200060013196938069183116063202323250380221687128239015675130501508020002879128881288602882828802
620042883023001004000010047132857400237222000020002000100000141603528169289513102000200040002875828630116100101000100020000602000000200060013133930169173187168202793199380326747128182015927130881490920002883628893288662882728863
620042883723202003000310046592849522237082000020002000100000151602328121288293102000200040002876028587116100101000100020000402000000200060013098936269173115063202123192380619626728174015709132541489320002882328918287572881528910
62004288422330200200000004700285502023820200002000200010000012160662817928917310200020004000285632870211610010100010002000000200000020000042413298923069453152270202653195380721656428209015347133231501020002877328855287152878228937
62004287792310000401013200146512844422236052000020002002100100201605828058288433302002200040042869428646116100101000100020000622007002280200260013049936669153181266202103160380827716928148015448133211503420002889528836289412878128848
6200428893233021030110890045442858422238342000020002000100000101605528256289397102000200040002877628793116100101000100020002602002002775200400013210931069653136165202803299381828687328290015290131421478520002887828710288382884828851
62004288552320300310113200047782843900237272002020002000100102141603928172288043472000200240002881228684216100101000100020004622000000200060013154915069773081168202003247380926627028181015692128801497120002914928809288122877828819
620042897623203001100000045662844822238062000020002000100000131604628121287273102000200040002886028768116100101000100020000002000000200000013119923869193146168201353192380727736528118015677128891511620002877728923288042886828847

Test 2: throughput

Count: 8

Code:

  st1 { v0.16b, v1.16b }, [x6]
  st1 { v0.16b, v1.16b }, [x6]
  st1 { v0.16b, v1.16b }, [x6]
  st1 { v0.16b, v1.16b }, [x6]
  st1 { v0.16b, v1.16b }, [x6]
  st1 { v0.16b, v1.16b }, [x6]
  st1 { v0.16b, v1.16b }, [x6]
  st1 { v0.16b, v1.16b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
16020580182642000000000300800321616225160100100160000100160000500368048718002680063800635996736000516010020016000020032000080054800541180201100991001008000080000100160015144400160016001416000216441400511011611800601600001008004880053801938005580048
16020480052643100000003200180039016225160100100160000100160000500367969208002280054800555996636000516010020016000020032000080064800631180201100991001008000080000100160014145001160016001816000216441400511011611800511600001008005580064800558005580053
1602048005564311000000020018003816162251601001001600001001600005003679956080038800548005459960360012160100200160000200320000800638005611802011009910010080000800001001600141545311160016011916000214441400511011611800511600001008004880053800558005580053
160204800486431001000031901800491616325160100100160000100160000500368046008002980053800545997636000916010020816204620032000080054800541180201100991001008000080000100160014144400160016101816000216441400511011611800511600001008005580055803298019380048
160204800546441000000061901800391616125160100100160000100160000500368007608002780054800635996036002116010020016000020032000080053800541180201100991001008000080000100160014144400160016012016000216441410511011611800611600001008005680064800548005580049
1602048006464310100000017018003816167825160100100160000100160000500367990808002780054800545996636038116031820016000020032000080430800641180201100991001008000080000100160014154300160016121416000116441401511011611800511600001008006480055800558005380055
160204801916431001000001401800401616025160100100160000100160000500367990808002980055800475996536000516010020016000020032000080055800541180201100991001008000080000100160015144600160014012216000216461400511011611800511600001008005580048800488005580055
1602048006364311110000014108002816161251601001001600001001600005003679472180135800548004259955360000160100200160000200320000800428004211802011009910010080000800001001600000500016000200016000000000511011611800391600001008004180044800438018480041
16020480042642000000003300800251616025160100100160000100160000500367944808001780040800425995636000016010020016000020032000080053800541180201100991001008000080000100160015164402160016001516000214441410512311611800531600001008005580055800538004880055
16020480047643100100103180180040016625160100100160000100160000500367988408003080047800535996836001216010020016000020032000080192800541180201100991001008000080000100160014144400160016001916000216441400511011611800441600001008004980048800488005380055

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f22233a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0ea? ldst retires (ed)? int retires (ef)f5f6f7f8fd
160025800596200000093310080027161602516001010160000101600005036794248001708005080040599943600221600102016000020320000800428018121800211091010800008000010160014034001600021118160002234000502031634800390160000108004380052800438004380051
160024800516200000024310080034161602516001010160000101600005036794248001708005180051599773600301600102016000020320000800428004211800211091010800008000010160014143400160002002160000234000502041646800390160000108004380041800438004380051
16002480042621000004231900080044161692516001010160000101600005036794248001708005280053599883600221600102016000020320000800528004211800211091010800008000010160015143600160016001716000214361410502051634800390160000108004380043800508004380050
1600248004262000000483201008002716002516001010160000101600005036797608001508004280042599823600291600102016000020320000800528005511800211091010800008000010160014144200160016112316000216361400502021633800450160000108005180043800438004180054
160024800526200000048030008002701602516001010160000101600005036794248001708004280042599883600221600102016144020320000800428004211800211091010800008000010160014153600160002008160002234000502041634800440160000108004380043800528004380041
1600248004062100000450310080025161602516001010160000101600005036793528013508004080042599773600311600102016000020320000801918018011800211091010800008000010160000153600160002008160002234000502041643800390160000108004180043800438004380043
160024800406210000003000800351600251600101016000010160000503679424800240800428004259993360033160118201600002032000080059800591180021109101080000800001016001414360116001600161600022361410502031644800560160000108006080060800418004380051
160024800596211000063310080027160025160010101600001016000050367942480026080042800425998436002216001020160000203200008018080058118002110910108000080000101600150340016001600316000200000502061634800440160000108006280054800598005080041
160024800526211000063910080027161602516001010160000101600005036794248002508004080042599863600221600102016000020320000800518005011800211091010800008000010160000150310160016002160002234000502041644800390160000108004380043800438004380050
1600248004262000000129910080027161602516001010160000101600005036794248001708004980042599853600201600102016000020320000800518005011800211091010800008000010160000036011600160051600002340005020316157800390160000108004880061800538018280043