Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, 1 reg, 1D)

Test 1: uops

Code:

  st1 { v0.1d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f223a3f464951schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
6100528877232314001000010463828411112383510001000100050001215948281152883531010001000200028676286061161001100010001000030100000010003001309392966891310916221246317838102568662821515765131801503110002886028778288222901728834
610042877423100100300013201469528476112370210001000100050001515934282332873931010001000200028757286701161001100010001000030100100010000001316692636856311516621212327238142268682822215355129821497310002883328881288252879728811
6100428868232002001000000477528511102372610001000100050001715926281372886331010001000200028634286231161001100010001000000100000010003001302894256940317806221228323238131969722820015580131781492210002889228741287962888428763
6100428801232001011000000477028554112387110001000100050001415931280262878531010001000200028817287651161001100010001000020100000010003001316492946965312106921231319838182970652828715482130271489710002885728895288842885829005
6100428809230002001000000469828529112377610001000100050001215995281172890031010001000200028809287931161001100010001000030100000010000001307993146923307906921174318138182661712834315671132161527910002894528924288502891028844
6100428845233001003000000464928528112395310001000100050001215970281902895231010001000200028799287231161001100010001000030100000010003001304192436942319806721316319838172662662823315879130821523010002881928772288632873328813
6100428862232003002000000476928446112385110001000100050001715923282662890731010001000200028740288061161001100010001000030100000010000001319491516835311036721061314738082463702818415584127671479110002874328770286602871828711
6100428743223001002000001466428333012370310001000100050001715918280582865931010001000200028597286721161001100010001000030100000010003001330895586907317217221099312338152464652812915401128391502510002873028709287042862428653
6100428630222003000000000472228310012369210001000100050001715932280002876931010001000200028519286251161001100010001000030100000010000001303996066981319416421158315238062366672817115412129991492510002879028667285962862628823
6100428696224001002000000463728358102366810001000100050001815942280872864831010001000200028655284871161001100010001000000100000010003001326894786999320117420958325038102966632807815481130391498910002864128797286682884628692

Test 2: throughput

Count: 8

Code:

  st1 { v0.1d }, [x6]
  st1 { v0.1d }, [x6]
  st1 { v0.1d }, [x6]
  st1 { v0.1d }, [x6]
  st1 { v0.1d }, [x6]
  st1 { v0.1d }, [x6]
  st1 { v0.1d }, [x6]
  st1 { v0.1d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)0309l2 tlb miss data (0b)181e1f22233f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)a4st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020540054310000031040027161612580100100800001008000050018393524001740043400422995633000180100200800002001600004004240043218020110099100100800008000010080062422880002022067800620420511031611400390800001004004340043400434004340043
802044004229900003004002500125801001008000010080000500183947240018400404012229953330000801002008000020016000040042400421180201100991001008000080000100800000080002002800022420511011611400370800001004004440043400434004340044
802044004230000003004002816160258010010080000100800005001839448400174004040042299533300008010020080000200160000400424004211802011009910010080000800001008000042080002002800022420511011611400400800001004004340043400444004340055
802044004230000003004002816028258010010080000100800005001839472400174004340043299553300018010020080000200160000400424004211802011009910010080000800001008000042080002000800022420511011611400390800001004004340044400434004340044
802044004029900003004002816160258010010080000100800005001839448400174004240042299553300008010020080000200160000400424004311802011009910010080000800001008000042080002002801822420511011611400390800001004004340044400434004340041
802044004030000003004002716160258010010080000100800005001839448400154005440042299563300008010020080000200160000400434004211802021009910010080000800001008000042080002000800022420511011611400390800001004004340044400434004340041
8020440043300000030040027160025801001008000010080000500183944840017400424004229955330000801002008000020016000040043400431180201100991001008000080000100800004208000200080002000511011611400390800001004004140055400434004140044
80204400433000000000400251616125801001008000010080000500183935240017400424004229956330000801002008000020016000040042400431180201100991001008000080000100800004208000200080000000511011632400390800001004004340044400414004140044
802044004330000000004002816160258010010080000100800005001839448400174004240042299673300008010020080000200160000400434004311802011009910010080000800001008000042080002002800022420511011611400400800001004004340044400434004340043
802044004029900000004002716161258010010080000100800005001839448400174004240054299563300008010020080000200160000400424004311802011009910010080000800001008000042080002002800022420511011613400400800001004004440041400434004340041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire uop (01)cycle (02)031e1f23243f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss st (a2)st unit uop (a7)l1d cache writeback (a8)acafbcl1d cache miss st nonspec (c0)cfd5map dispatch bubble (d6)d9ddfetch restart (de)e0? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800254005431112000400271616025800101080000108000050183947204001740040400422997533002080010208000020160000400434004211800211091010800008000010800004280002088000224250204168354003980000104004440044400434004340043
80024403533100300400271616025800101080000108000050183944804001740042400422997833002380010208000020160000400424004311800211091010800008000010800004280002028000224250205160554004080000104004440043400444004340043
800244004231115300400251616025800101080000108010850184892204001840042400422997733002280010208000020160000400534004211800211091010800008000010800003480002028000223450205160544003980000104036040435404354004340434
800244004232406070040027161632725800101080000108000050183942404002540042400502997533002280010208000020160000400424004211800211091010800008000010800003480002028000223450205160354004680000104004340041400434004940043
80024400423000301400251616025800101080000108000050184214204001740042400402998433002280010208000020160000400424004911800211091010800008000010800003480002008000023450205160554003980000104004340043400414004340043
800244004030012300400271603425800101080000108000050183935204002540042400502998533002280010208000020160000400424005011800211091010800008000010800003480002028000223450205160574003980000104004340043400434004340041
80024400402990900400351616025800101080000108000050183942404001740042400422997733002280010208000020160000400504004211800211091010800008000010800003480002088000203450204160554003780000104004340043400434004340041
8002440040299090040025161602580010108000010800005018394240400174004040040299753300228001020800002016000040042400421180021109101080000800001080000348000202800022050203160554003980000104004340043400434004340043
80024400423000901400271616025800101080000108000050183942404001740042400402997733002280010208000020160000400504004211800211091010800008000010800003480000058000223450207160444004680000104004340051400434004340051
80024400423000301400270160258001010800001080000501839424040017400424004029977330022800102080000201600004004040042118002110910108000080000108000008000202800022050205160454003980000104004140043400434004140043