Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (single, D)

Test 1: uops

Code:

  st1 { v0.d }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
620062889523302300300000100047312859811180572000100010001000100010907800062169728748289653102000100010002000100028840288891161001100010001000030100000010000000129629306692130991677203213202381217707128222157051293314367100010002894828885290232888829024
6200429043232032002700000000467128731111786320001001100010001000109008000102165628634289263102000100010002000100028951288401161001100010001000030100000010000300130419480693631081371202573256381719747728407158661286614707100010002890029004289372887228906
620042891523302700200000100048122877310180372000100010001000100010911800062170528636290153102000100010002000100028897288721161001100010001000000100000010000300128769535691930851765204503226381428707128390159611290814661100010002893428931291102906229148
620042903123202900320000010045522885211180292000100010001000100010902800011217182873128972310200010001000200010002892828823116100110001000100003010000001000030012958946469363154875203673300382125706428386157381297714489100010002894328925290002894129035
6200428977233030002500001000464428844101801420001000100010001000109058000112169128739291193102000100010002000100028897289061161001100010001000030100000010000200131249304683931601173203783272381623637028485161371291314197100010002921629146293032882228763
620042886422302700260009100047172868200177442000100010001000100010900800052175728551288833102000100010002000100028759287081161001100010001000030100000010000000132299364694831461468201343214381833727828333157391286614576100010002879128836287842884128802
62004288392230250028000000004730285830117749200010001000100010001090780001217372860628813310200010001000200010002872928825116100110001000100003010000001000000013143943569443202966200743152381318696428251156581282514192100010002867528770288172879128788
6200428799223030002800112100046782857710178512000100010001000100010909800092172028458288223102000100010002000100028809287871161001100010001000000100000010000200133199522693932021378201973170381825707128248156611293714180100010002873128906288022884928803
620042879622402700290000100047482864800179392000100010001000100010903800002174028547288313102000100010002000100028719288201161001100010001000000100000010000200131639295690631481369202013262382013767628295155171263114467100010002882428823289942887228682
620042889322302800300000100046162862701179202000100010001000100010902800052178428433287463102000100010002000100028700287301161001100010001000020100000010000200130939322695131661664202663143381521716828282154511267514308100010002890828817288632870228802

Test 2: throughput

Count: 8

Code:

  st1 { v0.d }[1], [x6]
  st1 { v0.d }[1], [x6]
  st1 { v0.d }[1], [x6]
  st1 { v0.d }[1], [x6]
  st1 { v0.d }[1], [x6]
  st1 { v0.d }[1], [x6]
  st1 { v0.d }[1], [x6]
  st1 { v0.d }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)0318191e1f22243f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160206400483100100004022916160251601001008000080000100800008000050018406726400004002140043400421996232001616010020080000800002001600008000040251400421180201100991001008000080000100800003408006200800062345110116114004580000800001004004440043402444004340043
160204400423100000004002716161372516010010080000800001008000080000500183971264000040021400424004219959320000160100200800008000020016000080000400494004211802011009910010080000800001008000034968000002800022345129125114004580000800001004004440043400434004340043
160204400493121081910040033161602516010010080000800001008000080000500183971264000040021400494018619959720000160100202800008000020016000080000400424004311802011009910010080000800001008000036080002011800022345110116114003980000800001004005040050400494004340043
1602044004331000153004003316160251601001008000080000100800008000050018397126400004002140042400431995932000116010020080000800002001600008000040049400421180201100991001008000080000100800003408000222800022345110116114004680000800001004004340044400434004440053
1602044004331100030040028161602516010010080000800001008000080000500183971264000040023400424004319962162000816010020080000800002001600008000040042400431180201100991001008000080000100800003408000002800022345110116114003980000800001004004940043400504005040043
160204400423100003004002716160251601001008000080000100800008000050018397126400004002440043400491996232000016010020080000800002001600008000040042400421180201100991001008000080000100800003408000202800022345110116114004580000800001004004440043400434004340050
16020440042310006598004002800025160100100801168000010080000800005001860340640000400214004340049203603200071601002008000080000200160000800004004240049118020110099100100800008000010080000340800020280002205110116114003980000800001004004440043400444004340043
16020440048301006900400271616025160100100800008000010080000800005001839712640000400214004240042199593200001601002008000080000200160000800004004840043118020110099100100800008000010080000340800020880002205110116114003980000800001004004340044400434004440043
16020440042300000300400271616025160100100800008000010080000800005001839712640000400214004340042199593200001601002008000080000200160000800004004240043118020110099100100800008000010080000008000202800022345110116114004680000800001004025140043400434004440044
16020440042321000301400340160251601001008012480000100800008000050018397126400004002140048400421995932000716010020080000800002001600008000040043400481180201100991001008000080000100800003408000202800022345110116124003980000800001004004340044400434004440050

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)1e1f223f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160026400423110213040228161602516001010801168000010800008000050183971264092840021400484004319982320023160234208000080000221600008000040043400421180021109101080000800001080000340800020280002234502061666400468000080000104005940059400604024740044
1600244004331100004003416168525160010108000080000108000080000501839712640000400244004340049199823200291600102080000800002016000080120400424005311800211091010800008000010800003496800020280002234502052565400558000080000104005940060400584006040043
1600244004931000004003416002516001010800008006010800008000050183971264000040027400434004919984320028160010208000080000201602408000040042400431180021109101080000800001080000008000201180000034502041666400498000080000104006040044400434004440044
16002440049310003040027161602516001010800008000010800008000050183971264000040024400434004919982320028160010208000080000201600008000040042400481180021109101080000800001080000340800021280002234502061656400478000080000104005940060400594006040043
16002440049310006040028161602516001010800008000010800008000050183971264000040021400424004219985320023160010208000080000201600008000040043400421180021109101080000800001080000340800020580002234502041657400478000080000104005540043400444004340050
160024400423110030400281616025160010108000080000108000080000501839712640000400214004940043199823200221600102080000800002016000080000400424004211800211091010800008000010800000080002028000200502051676400498000080000104006240061400504005140043
1600244004331000304003416160251600101080000800001080000800005018397126400004002140273400432038472002316001020800008000020160000800004004340042118002110910108000080000108000034080002228000020502051676400498000080000104005940043400444004340043
16002440042300009140028161602516001010800008000010800008000050183971264000040021400484004219982320022160010208000080000201600008000040042401951180021109101080000800001080000340800020280002234502061676400558000080000104006140050400504004340043
1600244004230000304003316160251600101080000800001080000800005018397126400004002140043400421998232002216001020800008000020160000800004004240043118002110910108000080000108000000800021280002234502051656400568000080000104006040058400594004440044
160024400483000090400271416025160010108000080000108000080000501839712640000400214004940043199823200231600102080000800002016000080000400424004911800211091010800008000010800153408000221780002234502061656400558000080000104005940062400484006140043