Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (single, B)

Test 1: uops

Code:

  st1 { v0.b }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f23243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c3c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6200629109234126133100010149122846200177022000100010001000100010900800020002168728501288513102000100010002000100028658285831161001100010001002240100111110001311001317694057012318914612025831143816156561428220152681264714151100010002867128618286552866728789
620042886623412412910101004772286560017695200010001000100010001090680009002172828515287773102000100010002000100028767286391161001100010001002141100101110011311001310595236978314412682005331753811226163428247152181256314122100010002871728776285762875828742
6200428883231124135000010050092864710177492000100010001000100010906800012002175728437287053102000100010002000100028600285251161001100010001003131100112110012412001338296756869326410652002832533809146160328184149831270414345100010002889228780288032860028781
6200428749221123132000020048352855500177862000100010001000100010911800812002172928586286703102000100010002000100028685286111161001100010001002201100101410001010001324094576903317413612014133923802176457327937152071273914038100010002884828753287352870928495
6200428459232123139100030050302828510175852000100010001000100010912800051021710284792871431020001000100020001000286132862011610011000100010021411001012100012112871501338793416910317810602007832083815286458328446160361297614368100010002902129167291512916229125
620042900123312412700001004864285620017832200010001000100010001090780009082168228406286993102000100010002000100028737287181161001100010001003201100100410001311001329795656994312616592019131443813166460328179154711270614222100010002867128853287892867528715
620042867422211713000001004781282030017729200010001000100010001091080008002172328445283383102000100010002000100028784287791161001100010001002243100101110002411001316695627054334515651958332673815116163328306152521255513878100010002876528767286672862628675
6200428652223129135100010052522817900178382000100010001000100010905800050021720283312824331020001000100020001000283602822011610011000100010022011001011100010110013660100877102337914621975331963816196064328271152741258214214100010002819028474288092865628769
620042875323112213210002004653285720017335200010001000100010001090580001002174628438287473102000100010002000100028617286751161001100010001003120100100110001011001348096977014318516612013232033814196370328113153001282613846100010002863328739286332871628759
620042870122313412400002004761283760017969200010001000100010001090180000002174428247284073102000100010002000100028736288081161001100010001002231100101110001412001324394257084318710632000931803808135761328171152611269513925100010002872028749287292872228798

Test 2: throughput

Count: 8

Code:

  st1 { v0.b }[1], [x6]
  st1 { v0.b }[1], [x6]
  st1 { v0.b }[1], [x6]
  st1 { v0.b }[1], [x6]
  st1 { v0.b }[1], [x6]
  st1 { v0.b }[1], [x6]
  st1 { v0.b }[1], [x6]
  st1 { v0.b }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)18191e1f23243f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696b6d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int store (96)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602064004331000000030040028161602516010010080000800001008000080000500183971264000011040021040043400421995903200011601002008000080000200160000800004004340043118020110099100010080000800001008000004200800021280002200005110116114004080000800001004004440044400444004340044
16020440042311000000300400271616025160100100800008000010080000800005001839712640000415400213400434004219959032000116010020080000800002001600008000040043400431180201100991000100800008000010080000042008000212800022420005110116114004080000800001004004440043400444004340044
160204400433110000012300400271616025160100100800008000010080000800005001839712640000115400210400434004319959032000116010020080000800002001600008000040042400421180201100991000100800008000010080000042008000202800022420005110116114003980000800001004004440044400454004440043
16020440043310000000300400271616025160100100800008000010080000800005001839712640000110400210400424004319959032000116010020080000800002001600008000040043400431180201100991000100800008000010080000042008000202800022420005110116114004080000800001004004440044400444004540043
1602044004331000000030040028161602516010010080116800601008000080000500183971264000010540021040043400441995903200011601002008000080000200160000800004004340043118020110099100010080000800001008000000008000200800002420005110116114004080000800001004004440044400444004540044
16020440042310000000300400281616025160100100800008000010080000800005001839712640000110400210400434004219959032000016010020080000800002001600008000040042400431180201100991000100800008000010080000042008000202800022420005110116114004080000800001004004440044400444004440043
16020440043310000000300400271616025160100100800008000010080000800005001839712640000215400210400434004219959032000116010020080000800002001600008000040043400431180201100991000100800008000010080000042008000212800022420005110116114004080000800001004004340044400444004440044
16020440042310000000300400281616025160100100800008000010080000800005001839712640000210400210400434004219959032000116010020080000800002001600008000040042400431180201100991000100800008000010080000042008000225800022420005110116114003980000800001004004440044400444004440044
16020440043310001009000400281616025160100100800008000010080000800005001839712640000110400210400434004319959032000016010020080000800002001600008000040043400431180201100991000100800008000010080000042008000212800002420005110116114004080000800001004004440044400434004440043
160204400433100000012300400271616025160100100800008000010080000800005001839712640000115400210400434004519959032000116010020080000800002001600008000040042400431180201100991000100800008000010080000042008000202800022420005110116114004080000800001004004440044400444004440043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2223243f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002640058323000000183000400271616025160186108000080000108000080000501839712640000140221400494004219982032002316001020800008000020160000800004004940043118002110910108000080000108000003400800064403800020340502040164120400398000080000104004440043400444004340052
160024400423110000000900040028016025160010108000080000108000080000501839712640000040021400424004319982032002316001020800008000020160000800004004240043118002110910108000080000108000000008000200207800022340502017164039400398000080000104004340044400434004340049
16002440042311000001165900040028161602516001010800008000010800008000050183980864000004002140043400421998203200221600102080000800002016000080000400434004311800211091010800008000010800000340080062001175800022340502045254118400458000080000104004340044400434004440044
160024400433110000009910040027161602516001010800008000010800008000050183971264000004002140043400421998203200231600102080000800002016000080000400424004311800211091010800008000010800000340080002001131800022340502040163919400398000080000104004940050400504004340043
16002440254312100100061004044716160251600101080000800001080000800005018481006400000400214004340244201360320023160010208000080000201600008000040049400431180021109101080000800001080000034008000200177800022340502019163915400398000080000104066540044400444024640256
1600244024131100000006000402270002516001010802328000010800008010850183971264000004002140043400521998203200221600102080000800002016000080000400424004311800211091010800008000010800000000800021026800022340502038163537400408000080000104004440043400444004340043
1600244004231000000060100400281616025160010108000080000108000080000501839832640000040024400424004219982032002916001020800008000020160000800004004340049118002110910108000080000108000003400800028102800022340502041164119400408000080000104005040049400434004440044
160024400493100000000310040034161602516001010800008000010800008000050183983264000004002140042400421998203200291600102080000800002016000080000400424004311800211091010800008000010800000000800028206800002340502040164039400398000080000104004340044400494005040049
160024400493100000000000040028160025160010108000080000108000080000501839712640000040021400424004319984032002216001020800008000020160000800004004240042118002110910108000080000108000003400800028202800020340502016161943400458000080000104004340043400444004340043
1600244005031000000060001400281616025160010108000080000108000080000501839712640000040021400424004319984032002216001020800008000020160000800004004240042118002110910108000080000108000000008000258014800022340502041163918400398000080000104004440043400444004940043