Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

STP (signed offset, D)

Test 1: uops

Code:

  stp d0, d1, [x6, #0x10]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e1f22233f464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd store (99)inst ldst (9b)a0a2a7a8acafbcdcache store miss (c0)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
900611661001101151782520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001610000010001155122161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001510000010000155121162211631000100011671167116711671167
90041166901101151882520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167
90041166801101151882520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001510000310000155132162211631000100011671167116711671167
90041166801101151882520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001510000010000155131161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167

Test 2: throughput

Count: 8

Code:

  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)030508090b181e1f2224373a3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020640058299110001321910263414003516160251632361008235180000100800008000050018399526490650400254005040061199743200181601002008000080000200160000160000400584005811802011009910010080000800001008000000080002105800022340005110116114003980000800001004004340043400434004440043
160204400492990000012310285704002716002516346110081744800001008000080000500183985665098104002140043400421995932000116010020080000800002001600001600004004240252118020110099100100800008000010080000034080002000800022340005110116114004080000800001004004440043400434004340043
16020440042300000040000379504002716160251638941008268780000100800008000050018397126528840400214004240042199613200011601002008000080000200160000160000400494028111802011009910010080000800001008000003408000200280000000005110116114004080000800001004004440043400434004440043
160204400483000000000004366040027161602516315110081684800001008000080000500183971264989904002140042400421996232000116010020080000800002001600001600004004340049118020110099100100800008000010080000034080000000800020340005110116114004080000800001004005040059400534005940048
160204400583001111001400331514004316163251638391008213780000100800008000050018400006497020400254005040059199633200091601002008000080000200160000160272400424004211802011009910010080000800001008000000080002000800020340015110116114004080000800001004005340051400534005040051
1602044005930010010021003210140043161652516144610082348800001008000080000500183990465097404002740058400581996332000716010020080000800002001600001600004005940059118020110099100100800008000010080014140280014001480002163614005110116114004780000800001004005140051400584005240059
1602044005230011010021003281400351616325162989100821548000010080000800005001839928651069040025400504006119971320010160100200800008000020016000016000040058400471180201100991001008000080000100800151636180016012080002143614005110116114004780000800001004004340043400444004340044
160204400493000000003004382040027161602516376910081684800001008000080000500183971265177504002140048400431995932000016010020080000800002001600001600004004240043118020110099100100800008000010080014143628001602178000016014105110116114004780000800001004004440049400434004440043
16020440042300000000300359204002816160251632361008029480000100800008000050018397126515400400214004240042199593200001601002008000080000200160000160000400434004211802011009910010080000800001008000003408000000080002200005110116114003980000800001004004940044400504004340043
16020440042299000000000164604003416166251632241008437380000100800008000050018397126531660400214004840042199593200001601002008000080000200160000160000400424004311802011009910010080000800001008000003408000200280002000005110116114004080000800001004004340043400434004440043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire (01)cycle (02)03050708090a0b18191e1f22373a3f46494f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd store (99)inst ldst (9b)9fa0a1a2a4a6a7a8a9acafbcdcache store miss (c0)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600264005430010100100020123711400481616125160856108434780000108000080000501840504648565400254005240054199880320030160010208000080000201600001600004005040051118002110910108000080000108001414440180016111780002140141502023162323400488000080000104005240055400524005540053
1600244005430010110100019023711400390162251622211082963800001080000800005018404806401094002840054400631998803200321600102080000800002016000016000040050400541180021109101080000800001080015144400800160021800021644140502022162323400518000080000104005140052400554004940051
16002440047299100101000140346214003501612516006310839398000010800008000050184007264712940028400504006219998032003116001020800008000020160000160000400544006311800211091010800008000010800151600180016111780002160140502020162419400598000080000104005440055400564005340054
16002440062299101000001819140931400390161251600591081690800001080000800005018400726471324002540053400521998603200281600102080000800002016000016000040050400501180021109101080000800001080015144401800140014800021444140502023162325400478000080000104005540049400554005440055
16002440048300101000000141322714003916161251600531080029800001080000800005018399046425384002540050400621998603200281600102080000800002016000016000040054400621180021109101080000800001080015144400800160114800021644140502023162123400478000080000104005440051400634005540051
160024400493001000000002212377140037000251600461084133800001080000800005018400966539734002940052400481998303200331600102080000800002016000016000040051400521180021109101080000800001080014154220800160119800021444140502023162022400478000080000104005540053400514005040055
160024400473001000000001704044140039161603616492110825008000010800008000050183990464018440028400634005219987032003016001020800008000020160000160000400504004811800211091010800008000010800161503080014102080000160140502023161422400598000080000104005340053400554005140052
1600244005030010110000018169140035016125163468108397580000108000080000501840000647140400264005440054199860320028160010208000080000201600001600004005440053118002110910108000080000108001514440180016001680002160140502024162625400608000080000104005540048400554005340055
160024400513001011010001704049140036016125163663108411780000108000080000501839928643766400264005440054199890320042160010208000080000201600001600004005340054118002110910108000080000108001416440080014011680002160140502023161823400528000080000104005140052400514005240055
16002440052300101100000191261814003716160251621681080049800001080000800005018400006485394002340050400521999003200311600102080000800002016000016027640054400541180021109101080000800001080015154401800160219800001644141502023161223400518000080000104005340055400634005140055