Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, 2 regs, 8B)

Test 1: uops

Code:

  st1 { v0.8b, v1.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22233a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6200629287227018001500018110465029109001831020001000100110001001109118016821696291242928214102000100010002000200029240293501161001100010001001731100101110002311013041970769193190636207273228381511323628654161261340214749100010002931629467292502948129323
62004294272281132213221150177004718293010018410200410001001100010001096980001021815291442947078420001002100120042000293482948151610011000100010061011002038911003101101310894226904317453620751328638118413628628160521322014936100010002943229598294502934929629
620042950522812010172211561770047372933700183862000100010011001100110905800852187329357297551210200410001000200020002925929251116100110001000100220110010141000101146113157951068833150835207953246381111403928537161001313215195100010002870028878289132862828335
62004286162211201017100620050392818600172812000100010001000100010907800072177628031284573102000100010002000200028162284021161001100010001000000100000010000000013680103587137328733319725340838198383228000140891149612449100010002818228116283082841528146
620042818721011400111003620051972840310174092000100010001000100010911800012180128184284913102000100010002000200028415281551161001100010001003301100101110001012013740101117052340564119715338238157393927961140991219612989100010002856528201285172842628185
6200428414211120001500039620047612850500172382000100010001000100010904800092177828255280053102000100010002000200028265282351161001100010001002221100101110001210013810102157119341873319709328638147352728057146381196313478100010002812428124283792840228404
62004281112131171011100320050872819800172132000100010001000100010909800020217482803128280310200010001000200020002832228235116100110001000100223110010111001131201390799897072339184019653343438125333727900144201240713672100010002882528913289382886328871
62004288442231181070008120048082892301172342000100010001000100010908800092141427996282313102000100010002000200028304280731161001100010001000030100000010000200013476100787146340063619591335638078323327917146251198013788100010002834028213281872828628274
620042833120912000171003200515228314101730020001000100010001000109078000192172328082281973102000100010002000200028285283611161001100010001003232100101110001212013862101407138338953919616335438076353827797149521189013083100010002831328191280522815228197
620042831121211600190003200512728263001738820001000100010001000109048000102176528230280473102000100010002000200028186282191161001100010001002201100101110000000013792100307213335783219656338838068333328144146501211913258100010002829428217281872843828213

Test 2: throughput

Count: 8

Code:

  st1 { v0.8b, v1.8b }, [x6]
  st1 { v0.8b, v1.8b }, [x6]
  st1 { v0.8b, v1.8b }, [x6]
  st1 { v0.8b, v1.8b }, [x6]
  st1 { v0.8b, v1.8b }, [x6]
  st1 { v0.8b, v1.8b }, [x6]
  st1 { v0.8b, v1.8b }, [x6]
  st1 { v0.8b, v1.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)1e1f24373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafldst x64 uop (b1)bcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020640051311101000018011901400381616125163755100820618000010080000800005001840456647349014002840052400541996732000916010020080000800002001600001600004005240054118020110099100100800008000010080014154400800160117800020164414151101161140050080000800001004005540055400544005540063
16020440054310101000018021881400391616725162928100828508000010080000800005001840096647434014002940053400521997532002116010020080000800002001600001600004006340054118020110099100100800008000010080014154401800160018800020164414151101161140051080000800001004005440055400544005440063
16020440053311101000619038371400381616225162567100826538000010080000800005001840456649254014002540052400541996732001216010020080000800002001600001600004005440047118020110099100100800008000010080015154201800160016800020164414051101161140048080000800001004005540055400554005140048
160204400503111000000170122414003816012516265210083645800001008000080000500184002464771901400294006240054199663200121601002008000080000200160000160000400534005211802011009910010080000800001008001415441011800160014800020164414051101161140048080000800001004006340051400644005440055
16020440055310101000019024721400481616125162490100826768000010080000800005001839976649223014002940051400541996732001216010020080000800002001600001600004005440051118020110099100100800008000010080014154200800140116800020164414151101161140049080000800001004005540056400554005540053
16020440055311100001018038251400391616725163416100825698000010080000800005001840096645145014002940054400541996532001016010020080000800002001600001600004005140053118020110099100100800008000010080015144400800140114800020164414051101161140051080000800001004005440052400494005240056
16020440051310101000017014451400351616125161740100836238000010080000800005001840048649305014002840054400511996532000916010020080000800002001600001600004005240054118020110099100100800008000010080015144401800160221800020144414051101161140051080000800001004005440054400534005340056
16020440051311101000017014811400391616125163894100834528000010080000800005001839904644499014002940054400511996532000916010020080000800002001600001600004005240054118020110099100100800008000010080014154401800140116800020164414051101161140060080000800001004005540055400554005540053
16020440054311101000019025721400391616025162611100820618000010080000800005001839808651378014002840053400521996632002016010020080000800002001600001600004005540055118020110099100100800008000010080014144400800160118800020164414151101161140051080000800001004005440054400504005440054
16020440051310101001019024801400471616125162573100831828000010080000800005001840456646706014003840051400501996732001216010020080000800002001600001600004005440051118020110099100100800008000010080014154400800160117800020164414151101161140050080000800001004005240053400524005440055

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)03mmu table walk data (08)191e1f22373f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600264004832200001539140033161614716043510816778000010800008010850183971265317240025402734004320128320023160010208000080120221600001600004050040043118002110910108000080000108000034998000001122800000005020131691140046008000080000104004340044402484004440043
16002440043311000303225400281616025161282108033680000108011680000501839712651547400214024140049199829200221600102080000800002016000016000040042402421180021109101080000800001080060340800620280002234050209169840039068000080000104024340044400434004440043
1600244004331000030507540231016025164443108403480000108000080000501839808655947402134004940043201323200231602342080000800002016000016000040043400491180021109101080000800001080000340800020580002234050208168840040008000080000104004440043400494004340043
160024400493101003044934002816161372516450210804248000010800008000050183971265296640023400424004819982320022160010208000080000201600001602404004240043118002110910108000080000108000034080002011238000223405020101691040045008000080000104004340044400434004440049
16002440243310011323048140027016452516234310844938000010800008000050183971265132340021400494004319982320023160010208012080000201600001602404004340048218002110910108000080000108000000800021110580002200502091691040039008000080000104004440043400444004340050
16002440042311001230491400341616525165085108395680000108000080000501839712647380400214004340042199823200221600102080000800002016000016000040042400491180021109101080000800001080000340800020280002234050209169940039008000080000104004340044400434004440043
16002440042311006304231400271616025160611108104980000108000080000501839712645407400214004340042199823200221600102080000800002016000016000040048400421180021109101080000800001080000340800020280002234050209169940039008000080000104004440043400444004340050
1600244004231100030734400271616025161880108057780000108000080000501839712648713400214004340048199843200291600102080000800002016000016000040049400421180021109101080000800001080000340800020580002234050209169940039008000080000104004440043400444004340049
160024400423110003084540027161602516184610843058000010800008000050186034065352540021400434004219982320439160010208000080000201602401600004004240042118002110910108000080000108000034080002213335800022341502091699400407208000080000104004440043400444004340043
160024400433000003044904002816160251624701081211800001080000800005018397126412674002140043400431998232002316001020800008000020160000160000400424004211800211091010800008000010800004208000245800022420502091798400403408000080000104004440044400444004440044