Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

STP (signed offset, D)

Test 1: uops

Code:

  stp d0, d1, [x6, #0x10]
  nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 7 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)031e1f22233f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
900611661001101151782520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001610000010001155122161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001510000010000155121162211631000100011671167116711671167
90041166901101151882520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167
90041166801101151882520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001510000310000155132162211631000100011671167116711671167
90041166801101151882520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001510000010000155131161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000111451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167
90041166901101151882520001000100010001000108758000011451166116632420001000100020002000116611661180011000100010001510000010000155121161111631000100011671167116711671167

Test 2: throughput

Count: 8

Code:

  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  stp d0, d1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)181e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020640058299110001321910263414003516160251632361008235180000100800008000050018399526490650400254005040061199743200181601002008000080000200160000160000400584005811802011009910010080000800001008000000080002105800022340005110116114003980000800001004004340043400434004440043
160204400492990000012310285704002716002516346110081744800001008000080000500183985665098104002140043400421995932000116010020080000800002001600001600004004240252118020110099100100800008000010080000034080002000800022340005110116114004080000800001004004440043400434004340043
16020440042300000040000379504002716160251638941008268780000100800008000050018397126528840400214004240042199613200011601002008000080000200160000160000400494028111802011009910010080000800001008000003408000200280000000005110116114004080000800001004004440043400434004440043
160204400483000000000004366040027161602516315110081684800001008000080000500183971264989904002140042400421996232000116010020080000800002001600001600004004340049118020110099100100800008000010080000034080000000800020340005110116114004080000800001004005040059400534005940048
160204400583001111001400331514004316163251638391008213780000100800008000050018400006497020400254005040059199633200091601002008000080000200160000160272400424004211802011009910010080000800001008000000080002000800020340015110116114004080000800001004005340051400534005040051
1602044005930010010021003210140043161652516144610082348800001008000080000500183990465097404002740058400581996332000716010020080000800002001600001600004005940059118020110099100100800008000010080014140280014001480002163614005110116114004780000800001004005140051400584005240059
1602044005230011010021003281400351616325162989100821548000010080000800005001839928651069040025400504006119971320010160100200800008000020016000016000040058400471180201100991001008000080000100800151636180016012080002143614005110116114004780000800001004004340043400444004340044
160204400493000000003004382040027161602516376910081684800001008000080000500183971265177504002140048400431995932000016010020080000800002001600001600004004240043118020110099100100800008000010080014143628001602178000016014105110116114004780000800001004004440049400434004440043
16020440042300000000300359204002816160251632361008029480000100800008000050018397126515400400214004240042199593200001601002008000080000200160000160000400434004211802011009910010080000800001008000003408000000080002200005110116114003980000800001004004940044400504004340043
16020440042299000000000164604003416166251632241008437380000100800008000050018397126531660400214004840042199593200001601002008000080000200160000160000400424004311802011009910010080000800001008000003408000200280002000005110116114004080000800001004004340043400434004440043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600264005430010100100020123711400481616125160856108434780000108000080000501840504648565400254005240054199880320030160010208000080000201600001600004005040051118002110910108000080000108001414440180016111780002140141502023162323400488000080000104005240055400524005540053
1600244005430010110100019023711400390162251622211082963800001080000800005018404806401094002840054400631998803200321600102080000800002016000016000040050400541180021109101080000800001080015144400800160021800021644140502022162323400518000080000104005140052400554004940051
16002440047299100101000140346214003501612516006310839398000010800008000050184007264712940028400504006219998032003116001020800008000020160000160000400544006311800211091010800008000010800151600180016111780002160140502020162419400598000080000104005440055400564005340054
16002440062299101000001819140931400390161251600591081690800001080000800005018400726471324002540053400521998603200281600102080000800002016000016000040050400501180021109101080000800001080015144401800140014800021444140502023162325400478000080000104005540049400554005440055
16002440048300101000000141322714003916161251600531080029800001080000800005018399046425384002540050400621998603200281600102080000800002016000016000040054400621180021109101080000800001080015144400800160114800021644140502023162123400478000080000104005440051400634005540051
160024400493001000000002212377140037000251600461084133800001080000800005018400966539734002940052400481998303200331600102080000800002016000016000040051400521180021109101080000800001080014154220800160119800021444140502023162022400478000080000104005540053400514005040055
160024400473001000000001704044140039161603616492110825008000010800008000050183990464018440028400634005219987032003016001020800008000020160000160000400504004811800211091010800008000010800161503080014102080000160140502023161422400598000080000104005340053400554005140052
1600244005030010110000018169140035016125163468108397580000108000080000501840000647140400264005440054199860320028160010208000080000201600001600004005440053118002110910108000080000108001514440180016001680002160140502024162625400608000080000104005540048400554005340055
160024400513001011010001704049140036016125163663108411780000108000080000501839928643766400264005440054199890320042160010208000080000201600001600004005340054118002110910108000080000108001416440080014011680002160140502023161823400528000080000104005140052400514005240055
16002440052300101100000191261814003716160251621681080049800001080000800005018400006485394002340050400521999003200311600102080000800002016000016027640054400541180021109101080000800001080015154401800160219800001644141502023161223400518000080000104005340055400634005140055