Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (multiple, post-index, 2D)

Test 1: uops

Code:

  st3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 7.000

Integer unit issues: 1.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 3.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22233a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6600828732224003010000400473128501031643570001000300030001000300030005000330552400001218226672836628674310700030003000700090002852628576116100110001000300440230030133000393101345596487023322205119505320138231547442816910001512012127133313000300010002854128582285592872228639
66004286552211101100012400485628464331655570001000300030001000300030005000330342400001118227622829128687310700030003000700090002848628562116100110001000300449130031163000393001323897037034323714919383317938141847502814110001497612163135673000300010002876228666286222857328601
6600428557221011111000400476228604031661570001000300030001000300030005000330292400001510227412848428593310700030003000700090002868128625116100110001000300439130030263000393101334393917044322505019373324638182157522817610001558812076134133000300010002860028517287362859928632
6600428656221011110000400485228551231655670001000300030001000300030005000330342400001500227272844528654310700030003000700090002854128669116100110001000300349130030133000393201314895626939325405819411314738141754542824510001520112216134903000300010002856328662286902858828630
6600428579234010110000400475328672231684670001000300030001000300030005000330172400001400226382841928640310700030003000700090002891928763116100110001000300439130031063000493001292895146863309104319979319038121444472862510001645213155144553000300010002913529370291752910129348
660042925522730005000010046752914933172347000100030003000100030003000500033040240000506227042895429254310700030003000700090002916229232116100110001000300009030000003000090001306194526961311704820144321438291344492860910001631812846142183000300010002927729193294102932229288
660042928622720101000020046472912300171757000100030003000100030003000500033018240000600227362903329303310700030003000700090002929229312116100110001000300006030000003000060001308892847014317405520014313338281643472860110001620312590142063000300010002926429424292862931129379
660042936722820101000010046602913000172827000100030003000100030003000500033027240000100227372895329466310700030003000700090002941229293116100110001000300006030000003000060001299492946935311725220047312038271149482864310001620312812143463000300010002933428867293682930429385
660042916222710001000040047552922203172107000100030003000100030003000500033036240000110227322888829455310700030003000700090002924729335116100110001000300436230030033000363101318192926911315304620148313938301748462873010001617612818137493000300010002936729304292902937929292
660042934222701010100040045232921130172367000100030003000100030003000500033040240000100227942890829285310700030003000700090002933529222116100110001000300339030030133000393201312593386953313504520102316438231746502846510001593912766143423000300010002933129242291932931729208

Test 2: throughput

Count: 8

Code:

  st3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  st3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  st3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  st3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  st3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  st3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  st3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  st3 { v0.2d, v1.2d, v2.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.5005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2223373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480208120042931000000002110439711200351616625567242801002438452400008010024000024000048049555202401946004012002112004212004339984340041560100200240000240000200560000720000120145120043118020110099100100800008000010024000003600240002002240002163405110317541200408000024000024000080100120043120043120043120043120044
4802041200499300000000690052270120028160902556376380100247606240000801002400002400004804995519567195744601206811200491200423998434006556010020024000024000020056000072000012004312004211802011009910010080000800001002400000340024000210524000223405110317431200398000024000024000080100120049120043120044120060120050
4802041200439310000000030048030120027161602556298680100242475240000801002400002400004804995519567194436011200211200421200433998434002556010020024000024000020056000072000012004312004921802011009910010080000800001002400000340024000200824000223405110317331200468000024000024000080100120050120043120202120043120044
48020412004893000000000300646001200271616025567931801002428832400008010024000024000048049855195671941674012002112004212004839984340025560100200240000240000200560000720000120049120043118020110099100100800008000010024000003400240000001124000203405110417341200408000024000024000080100120043120044120043120050120050
4802041200429310000000090064370120028161602556287280100244283240000801002400002400004804995519567194210501200211200431200483998434002456010020024000024000020056000072000012004912004311802011009910010080000800001002400000340024000200524000223405110317331200408000024000024000080100120043120043120043120043120044
48020412004293000000002190143030120027161602556438980100245874240000801002400002400004804995519567193912001200211200431200423998734002456010020024000024000020056000072000012004212004911802011009910010080000800001002400000340024000200524000223405109217331200408000024000024000080100120043120043120043120043120044
480204120042930000000003005883012002716163652556796280100246295240000801002400002400004804985519567194174001200211200431200433998434002456010020024000024000020056000072000012004912004211802011009910010080000800001002400000340024000200224000223405110517331200398000024000024000080100120044120053120044120044120043
4802041200439310000000123007454012003416160255640948010024466824000080100240000240000480498551956719543500120021120042120049399843400255601002002400002400002005600007200001200431200431180201100991001008000080000100240000000024000200224000223405110417431200468000024000024000080100120050120044145546120060120043
48020412004993000000002130045490120028161602556676980100246905240000801002400002400004804995519567194686901200211200431200423998434003056010020024000024000020056000072000012004212004311802011009910010080000800001002400000340024000200524000223405110317531200408000024000024000080100120043120049120044120050120050
4802041200429310000000090092611120027161602556975280100245711240000801002400002401084804995519567193867001200211200431200433998434002456010020024000024000020056000072000012020412004211802011009910010080000800001002400000360024000200224000223405110317351200398000024000024000080100120044120043120358144821120044

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.5006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0f18191e1f222324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
4800281200509301100000210007771112003316160255670768001024779424000080010240000240000480049551956719459031120025012004212004339984340040560010202400002400002056000072000012004912004211800211091010800008000010240000000124001644022400021634005020317331200408000024000024000080010120052120061120053120054120050
48002412005093100000001900050120120043160025572040801662464252400008001024000024000048004555198361950975112002101200491202023998834002456053820240000240000205600007200001200471200511180021109101080000800001024001517350024000244018240002001405020316331200398000024000024000080010120043120195120043120049120061
4800241200429311200011519000360511200341600255632728001024827324000080010240000240000480049551956719421921120034012004212004339984340029560010202401202400002056000072000012004312004211800211091010800008000010240000034012400147052400021634005019317331200408000024000024000080010120051120058120059120053120043
480024120051930001000018000656401200361616025564543800102453622400008001024000024010848004955197881943557112002101200491200503998834002456001020240000240000205600007200001200571200502180021109101080000800001024001515360024000257021240002238005020317431448088000024000024000080010120043120043120049120044120198
48002412004293110000001400023331120027016025566455800102445462400008001024000024000048004955196871939305112002101200431200493998634002456001020240000240000205600007200001200431201951180021109101080000800001024000000002400023105240002160005020225231200398000024000024000080010120060120051120204120061120044
480024120042930000000030007128012002716160255653698001024216024000080010240000240000480048551956719438181120024012004212019439984340024560010202400002401202056000072000012004212004311800211091010800008000010240000034002400023703240062234005020317231200408000024000024000080010120043120049120043120043120044
4800241201959300000000000079970120028161602556645680166243612240060800492400002400004800495519567194491611200210120207120042399843400355710982024000024000020560000720000120042120043118002110910108000080000102400000341642202400021200240002234005020317331200398000024000024000080010120043120049120043120043120043
48002412004393100000000000736701200270160335662018001024566424000080010240000240000480049551956719597221120025012004212004939990640025560010202400002400002056000072000012005012004911800211091010800008000010240015143600240062500182400022361405020317231200398000024000024000080010120044120049120043120043120054
48002412005093010020001900043351120027000257199488001024342824000080010240000240000480049551956719430061144582012004312004239984340029560010202400002400002056000072000012004212004311800211091010800008000010240000034002400165500240002234005020317331200398000024000024000080010120050120043120043120050120044
4800241200589300010000210006561012004316002556281580010245995240000800102400002401084800495519567194508011200240120042120042399843400255600102024000024000020560000720000120049120043118002110910108000080000102400000340024000655087224000220005020317331200408000024000024000080010120043120043120050120044120044