Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (single, H)

Test 1: uops

Code:

  st3 { v0.h, v1.h, v2.h }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0f18191e1f2223243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
62006296622381900210001211000458129415011851720001000100010001000109008000600216962930229554310200010001000200030002948029416116100110001000100003010000001000030013243950469133179646210303262381420514728892159871357514845100010002952029634295982956629490
620042951923717012100001810004754293801018558200010001000100010011090280005002171229291294943102000100010002000300029519293712161001100010001000020100200010000220133269470694631721448209523332381512505328881162011334514840100010002949229558296442956329433
6200429490237260016000018013300047302923100185352000100010001000100010906800000721721291122954331020001000100020003000295092931611610011000100010000201000002201000030013139947970233137742210163309381416434528778162741306614776100010002959729591296752961229502
62004293592371300150000120100470929208101851320001000100010001000109108000500217312904229338310200010001000200030002932729458116100110001000100003010000001000030013167936269403141648208653362381414464928676164091348115110100010002947529533295592956029412
62004294252371710170000181000464329168001858520001000100010001000109078000600217582915929403310200010001000200030002943429414116100110001000100002010010001000020013225941169133146650208113302381918464728763162211338214981100010002947729540294772950029359
620042959623823002000003010004629294490018694200010001000100010001091080003002169829401297263102000100010012002300029674297601161001100010001000030100010610000202809132919058693833551248209823394381218454929350159341353115485100010003006829916297562960530023
62004294802392101300000331000469028785111800420001000100010011000108978000300216752874729165310200010001000200030002896028964116100110001000100002010000001000000013200928668993114347204173264381717554928521157001296914809100010002934929239292422948329192
62004295262342511180000000004643289660018118200010001000100010001093680002002170328974292933102000100010002000300328988292192161001100010001000020100000010000200131179331693931141050204293290381620454628543157681301914486100010002911928979291042908029112
620042916223418011400001200014682287950018182200010001000100010001090580000002174528798291843102000100010012000300029146290981161001100010001002020100000610000300131509290693430771354205683333382122564828500159091310314646100010002909529096291072916529201
6200429187235200018000000000465029084001817120001000100010001000109118000000217482895229242310200010011000200030002917429050116100110001000100002010000001000020013133917468553087847204553333381421474828610160941299414874100010002915229134290592915229206

Test 2: throughput

Count: 8

Code:

  st3 { v0.h, v1.h, v2.h }[1], [x6]
  st3 { v0.h, v1.h, v2.h }[1], [x6]
  st3 { v0.h, v1.h, v2.h }[1], [x6]
  st3 { v0.h, v1.h, v2.h }[1], [x6]
  st3 { v0.h, v1.h, v2.h }[1], [x6]
  st3 { v0.h, v1.h, v2.h }[1], [x6]
  st3 { v0.h, v1.h, v2.h }[1], [x6]
  st3 { v0.h, v1.h, v2.h }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160206400433110000000030282840028161602516276810081061800001008000080000500183971264363740021400434004219959032000016010020080000801202001600002400004004240043118020110099100100800008000010080000042008000210280002242005110116114003980000800001004004440044400434004440044
16020440043310000000003019824002801602516429510081160800001008000080000500183971264434740021400434004319959032000016010020080000800002001600002400004004340044118020110099100100800008000010080000042008000200580002046005110116114004080000800001004004440044400434004540044
16020440043310000000000012944002816160501620681008175880000100800008000050018397126503874002140043400421995903200001601002008000080119200160000240000400424004211802011009910010080000800001008000004200800020011058000220005110116114003980000800001004004440044400474004440044
160204400433100000000031234740027161602516337310083984800001008000080000500183971265077340021400434004319959032000116010020080000800002001600002400004004340043118020110099100100800008000010080000042008000200280002242005110116114004080000800001004004340044400444004440044
160204402713120000000091023254002701602516430410080974800001008000080000500183971264981640021400434004319959032000116010020080000800002001600002400004004240042118020110099100100800008000010080000042008006000080002242005110116114004080000800001004004340044400444004440044
16020440249311000000003033364002816002516335210081061800001008000080000500183971264344040021400434004219959032000116010020080000800002001600002400004004240042118020110099100100800008000010080000042008000200280002242005110116124003980000800001004004440044400434004440044
1602044004331100000000001275400270160251613791008106080000100800008000050018397126452184002140042402471995903200001601002008000080000200160000240000400434004211802011009910010080000800001008000004200800020008000220005110116114004080000800001004004540043400444144843057
1602044004231100000000301671400271600251615611008101980000100800008000050018397126502494002140042400421995903200011601002008000080000200160000240000400434004311802011009910010080000800001008000000008000200080002242005110116114003980000800001004004340043400434004340043
16020440043310000000003032494002816160251611391008097680000100800008000050018397126494674002140042400421995903200001601002008000080000200160000240000400434004311802011009910010080000800001008000000008000200280002242005110116114004080000800001004004440043400444004440044
160204400433100000000040114640028161602516399810082814800001008000080000500183971265028640021400424004319959032000116010020080000801212001600002400004004340043118020110099100100800008000010080000042008000200280000242005110116114003980000800001004004440044400434004340044

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2224373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)rob full (74)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600264004330000000003001050040027161602516105510800388000010800008000050183971264493604002140043400431998203200231600102080000800002016000024000040043400431180021109101080000800001080000042008000230580002000050201016710400408000080000104004440043400434004340044
1600244004330000000004003024040028160025161165108359680000108000080000501839712640834040021400434004219982032002316001020800008000020160000240414400434004211800211091010800008000010800000000800022028000024200502061666400408000080000104004440043400444004440044
16002440062311110000017002066140038016125163582108370080000108000080000501840048644597040026400544005319989032003516001020800008000020160000240000400544004811800211091010800008000010800151542218001600168000224200502081668400398000080000104004440044400564004440044
160024400423210000000300171104002816002516269310818438000010800008000050183971265083204002940043400431998203200231600102080000800002016000024000040043400431180021109101080000800001080014154400800160023800022000502061667400408000080000104005340053400554005440053
16002440043310000000030012030400271616025161756108148380000108046480000501839712648624040021400434004219982032002316001020801208000020160000240000400434004211800211091010800008000010800001444008001601228000224200502081699400408000080000104004340043400444004440043
16002440052310000000030029000400271616025161079108003880000108000080000501839712643560040021400424004219982032002316001020800008000020160000240000400424004311800211091010800008000010800000420080002101478000204200502061658400408000080000104004440043400444004340044
160024400423100000000310449004002816002516322110808698000010800008000050184007264520904002640054400521998803200341600102080000800002016000024000040054400511180021109101080000800001080014154400800161014880002244005020716811400398000080000104004440044400444004440043
160024400463100000000300809040028161602516509510843718000010800008000050183971265672804002140043400431998203200221600102080000800002016000024000040043400431180021109101080000800001080000042008000200208000204200502081696400408000080000104004440043400434004440043
160024400433110000000300177204002716161251633761084312800001080000800005018397126401200400214004240043199820320022160010208000080000201600002400004004340042118002110910108000080000108000004200800020058000024200502061674400418000080000104004440044400434004440044
1600244004431000000003001210040028161602516120510807868000010800008043250183971264964904002140044400421998203200231600102080000800002016000024000040043400421180021109101080000800001080000042008000200988000224201502071688400398000080000104004340044400434004540044