Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST4 (multiple, 16B)

Test 1: uops

Code:

  st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 12.000

Issues: 12.000

Integer unit issues: 0.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 8.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f222324373a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
7200729446237039012910001000046082938233150061200080004000800040005190171629142469229156293923101200040008000800020000293032938911610011000100040000120400610140010120126130879320689331641459194423210380520645228687162761288712970400080002933329415294942937929397
720042950723702700260000200004612293693315145120008000400080004000519087161412466829160294753101200040008000800020000293902929711610011000100040000120400000140014120013234941468783218156219368338838078575828645160861297412754400080002924729470292862936229427
72004294792361260024000020000466829384331521612000800040008000400051898716125247272915529225310120004000800080002000029405293551161001100010004000500400100163400111200132019564694430921763194273280380312636228727161741281412858400080002943529368294652938229472
72004297212370210022100182000045802929040151581200080004000800040005191071612424739293522946431012000400080008000200002937429412116100110001000400001204001001400011200130659231687530931864193733216380113595728631159801290312770400080002944129429293712939929416
72004294792360290022100020000464229418031526412000800040008000400051906715881324750292012935231012000400080008000200002934729373116100110001000400001204006000400111200129739537686331561759193763287380411636128714162141287712830400080002947429468293372940229600
72004294862360200019000010000470229438341513012000800040008000400051899716378247512929529473310120004000800080002000029429294951161001100010004000012040000004000400013098947168913122156919375330138139596228644161691304513067400080002943329455294052942529519
72004294172361250025000121000047542933544152601200080004000800040005189771618724737293112936531012000400080008000200002935429367116100110001000400051204000000400001200130809365690531341261193443332381315576528703162731276912747400080002950429438294712940729439
72004294012360210023000010001467429282441519012000800040008000400051901716277246562920829435310120004000800080002000029271293991161001100010004000012040000020440000010130879298689531861359193723294380512565928587160501294412956400080002937429482293762942929362
72004294262370220027100000100461429315031509012000800240008000400051928716161024684292912938831012000400080008000200002933629409116100110001000400001204004002400011200130459360689631581356194093336381018616328856163421283912901400080002941129439293352949029361
7200429448237024103100002000146432935244152561200080014000800040005189671599524676292532923931012000400080008000200002949529397116100110001000400001204000003400141200131719424691331821052193383367381012525828778162681287412803400080002943829365294682940529451

Test 2: throughput

Count: 8

Code:

  st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.4966

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f2224373a3f46494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9abacafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
96020720061315581010000919008475922000891616109115919251053100100732428320000100640238320000500924526110812573019999919994720180937152338930960100200320000640000200640000160060020088920170311802011009910010080000800001003200741536248340320014010143200021436140000510900117221997200320000640000100197087201557198024200400201424
96020419972615590101000090008275712010351616146215750251043545100729815320000100640000320000500919448510811253019909619917019807638375340138960100200320000640000200640000160000020068319919911802011009910010080000800001003200000342522503200020001432006223400000510900117121996110320000640000100200913200867200919200571201679
96020419750115560101000020088814020205116016671625925104132710072585432006010064000032000050092683961075173102001751998201976294062034103296010020032000064024020064000016000001989951970801180201100991001008000080000100320000034256500320062000232000023400000511100117221991160320000640000100202464199190200830197839199729
9602042000881551010100112500872691198417161615451779825104960110072108832000010064000032000050093170271075970702009982003102001973948333988496010020032000064000020064000016000002003891997851180201100991001008000080000100320000034249950320002000232000223400000511100217122015800320000640000100199427200463200157202175200544
96020420311215600000000050088524120146416016101554025105137610072480732000010064000032000050093301881081873311999711983441998083853833930396010020032000064000020064000016006002013742005791180201100991001008000080000100320060034255250320002000232000423400000511200117311999770320000640000100200831199219199555197062200598
9602042001771567010000012000798680201759160146117038251042513100724358320000100640000320000500927230610760333020029119848820107342173339791960446200320000640000200640000160000020177120051511802011009910010080000800001003200600402567103200021001732000203400000511100217212000241320000640000100201201201693199689202123200009
960204200673156901101525344400087222019867716161417180441311043731100719593320180100640952320432500925690110758597020186419945220218440958294019496010020032000064000020064000016000002005741994771180201100991001008000080000100320000034266430320002000032000223400000510900117122015020320000640000100199591197457201227199770201898
9602042003241544010100095008842511979831601739162402510469771007265563200001006400003200005009279957107771030201101200204199688386163419489601002003200006400002006400001600000198876200599118020110099100100800008000010032000000250630320002100232000223400000510900217221987880320000640000100200937197703197653200267200751
96020420105015410101000680082952120147516013421668625104657110072271032000010064000032000050092805291081463001999821971291996704377233801496010020032000064000020064000016000002011081979521180201100991001008000080000100320000034252570320002000232000023400000511100117212001740320000640000100199017198434201703199961199656
96020420195515560000000122008689202013521616139116240251042711100716210320000100640000320000500932939510788552020060020077920143138314340078960100200320000640000200640000160000020013919845611802011009910010080000800001003200000026598032000010023200022000000510900117221996600320000640000100200586199344200423201146201068

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.4960

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0f18191e1f2224373f46494e4f5051schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
9600272014341550000000012210867922008081616148816200025104350210731295320000106400003200005093227691070280202009580200655200196395773418559600102032000064000020640000160000020080819911421800211091010800008000010320000002694403200021856320002034050196121771220028232000064000010201313201182203633200700202156
9600242009751551000000013220086989199465161615931795702510504241072806132000010640000320000509327361107336010199622020015219847540292339334960010203200006400002064000016000002007522017331180021109101080000800001032000003425038032000001100320002234050313644272720262932000064000010205973205268200571202364198656
960024201897156200010335282660088755198160016145417339010710451191072621632024010640714320324509177675107868520199509020136419696241116339414960010203200006400002064000016000002001991971581180021109101080000800001032000003424880032000202320002234050190121751119917332000064000010199187202124200922197684200497
9600242009391559000000012200864551987221616155816028025105007810727848320000106400003200005092530131073636501972770202097200392382153399019600102032000064000020640000160000020155919998111800211091010800008000010320000034246880320002053200022340501913121712720063232000064000010202081200450198648200316201396
96002420016215570000000020085249200533161614881636802510422491072833832000010640000320000509310748108786080199328319793620141339990340970960010203200006400002064000016000002008321990071180021109101080000800001032000003426256032000005320002234050196717121220123132000064000010199192200898201257199055197473
960024198236153500000000800834781999191616147217002025104382010722506320000106400003200005093020431079867211992810196522200083403593405149600102032000064000020640000160000020040420143911800211091010800008000010320000034263560320002023200002340502031217131420170732000064000010198246197119200237200638198009
96002419753115380000000080080593199028161613541665202510485371072799332000010640000320000509217306108062130199491020124420012941597341010960010203200006400002064000016000002013941999811180021109101080000800001032000003426850032000002320002034050193121751220075332000064000010198152198017199812200438200814
9600241988921550000000002109288219846216161539165044251043597107259633200001064000032000050928098510814069019876701984612011143974834170996001020320000640240206409601601800199006198045418002110910108000080000103201802342495123224620412683216822340506819155361820162832000064000010200643201137200572200491202077
96002419942415530000000020086996201662161613401645402510478251073026932006010640476320432509263984107587360198883020111019812740101341652960010203200006400002064000016000002004272001541180021109101080000800001032000003426572032000202320002234050193121751320207232000064000010200933200837199619200054201489
9600241996831544000000040817600850072009021601398175840251047922107298313200001064000032000050918869710839022020125702010412007464236233964196001020320360640480206404801600000198176198070118002110910108000080000103201220342648923200023832000223405019131217121320062032000064000010201621199997198389197954199270