Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (post-index, 2D)

Test 1: uops

Code:

  ld3r { v0.2d, v1.2d, v2.2d }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 6.006

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 3.006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f23243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
650052900123302900320011178800451228499000165626012100030092000100030002000500010002358571722869288532878532860002000300030036000287272879511610011000100002000062002010020024060001307793466834316897319822319638072468732838910001590812787140312000300010002892428836290252886428906
6500428929233029002400100120004788284400121658060091000300020001000300020005000100063568010228382872528922310600020003000300060002876628776116100110001000020000420000000200042600013184926869023160127819810321438112869682821510001577112831139532000300010002890728844288732888428826
650042885823202100220000070004662284850001663860001000300620001000300020005000100033568611229012870228859310600020003000300060002886928943116100110001000120000420000000200000400012979930469313167136919765317938122974732832710001585612922139692000300010002907228947288812899729155
650042885123302200260000040004638284880001676160061000300620001000300020005000100003567811228592878328945331600020003000300060002881728916116100110001000020000420000002200040000013318944868523027117419842317338161975842832910001590812479140872000300010002886029045289672894828938
65004290172330240030001005000459828539000166986006100030062000100030002000500010000356756229172887428930310600020003000300060002889528872116100110001000020000420020040200020400013224949668373097176719922314538132770672832110001586312807140292000300010002894528942290222901628980
65004290122330220028001005000469428490000166126006100030062000100030002000500010000356080228642870128963310600020003000300060002880229017216100110001000020020020000020200040000013230919068643071126319990315838202778732833310011596712940138552000300010002900128789290912897828762
6500428914232033002700110700046522849601016577600010003006200010003000200050051000035677522896289252901031060002000300030006000291012902311610011000100002000042000000020004240001331295746893313377319549312038201674702826310011585312909138922000300010002918828861288002874828933
65004290362340350031000005000493628569002164966006100030062000100030002000500010000356835229402866628836310600020003000300060002882428772116100110001000020000020000106200000000013209929169483195167319967320338202377612823710001573312795139702000300010002875928906287612893128775
65004286662310270028001000000461228570000165756006100030062000100030002000500010000357025228812871628679310600020003000300060002872328659116100110001000020000420000000200040000013398948769173175127319684328538192473722823210001545512795138752000300010002895328880288412878428870
65004286702320211031001100000455328869000170806000100030062000100030002000500010000356747229282894429128310600020003000300060002907229111116100110001000020000420020000200246400012896921668903040117020004310738131975782826810001614412974140782000300010002887528949288732899528938

Test 2: throughput

Count: 8

Code:

  ld3r { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3r { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3r { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3r { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3r { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3r { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3r { v0.2d, v1.2d, v2.2d }, [x6], x8
  ld3r { v0.2d, v1.2d, v2.2d }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)67696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400205800706201000000002210028002731414025480169801002400721600008010024000016000048049996088431233500800238004280042003244801002001600002400002002400004800008004280042118020110099100100800008000001001600121240160049011471600366146011005109117128003908000099160000240000801008004380043800438004380043
400204800426210000000002300280027215150254801728010024007216000080100240000160000480499960029312031108002380042800420032448010020016000024000020024000048000080042800421180201100991001008000080000010016001012401600120104816003661474010005109217218003908000009160000240000801008004380043800438004380043
40020480042621100010000530008002731500254801738010024007216000080100240000160000480499960030312334108002380042800420032448010020016000024000020024000048000080042800421180201100991001008000080000010016001211401600480104716003761484011005109117128003908000090160000240000801008004380043800438004380043
4002048004262010000000054001800273141402548017280100240072160000801002400001600004804999600293120199080023800428004200324480100200160000240000200240000480000800428004211802011009910010080000800000100160011104016004600051160037604740101051091172180039080000109160000240000801008004380043800438004380043
4002048004262010000000011001800272141502548017280100240069160000801002400001600004804999600303120092080023800428004207324480100200160000240000200240000480000800428004211802011009910010080000800000100160011124016004800047160036612940111051091172180039080000910160000240000801008004380043800438004380043
400204800426201100100005300280027215150254801728010024007216000080100240000160000480499960019312334708002380042800420032448010020016000024000020024000048000080042800421180201100991001008000080000010016001010401600110002116003661474210105109217118003908000099160000240000801008004380043800438004380043
400204800426211000101001000280027214140254801698010024006916000080100240000160000480499960884312334308002380042800420032448010020016000024000020024000048000080042800421180201100991001008000080000010016001112401600471004716000061474011205109117118003908000099160000240000801008004380043800438004380043
40020480042621111001100360028002720150254801728010024007216000080100240000160000480821960952312020508002380042800420032448010020016000024018620024018648000080042800421180201100991001008000080000010016001212401600480001116003701484011005109217128003908000099160000240000801008004380043800438004380043
400204800426201000000005300280027314140254801698010024006116000080100240000160000480499960959312332508002380042800420032448010020016000024000020024000048000080042800421180201100991001008000080000010016001112401600480014916002260483311105109217118003908000090160000240000801008004380043800438004380043
40020480042620110000000540028002721500254801158010024001816000080100240000160000480499960874312345408002380042800420032448010020016000024000020024000048000080042800421180201100991001008000080000010016000011401600460014616003761484011005109117118003908000009160000240000801008004380043800438004380268

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)79map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025800586201100000053010018002721212025480064800102400001600008001024000016000048004996033831210651580023800428004200324480010020160000240000202400004800008004280042118002110910108000080000010160000025016000000301604006121330015019516317338003908000006160000240000800108004380043800438004380043
40002480042620000000001100002800272121202548006480010240054160000800102400001600004800499603283121073158002380042800420032448001002016000024000020240000480000800428004211800211091010800008000001016001011400160046004816003661474010105019511317338003908000099160000240000800108004380043800438004380043
400024800426201101110053000028002721415025480082800102400721600008001024000016000048004996088731233371580023800428004200324480010020160000240000202400004800008004280042118002110910108000080000010160011100016004600461600366147401000501959326338003908000099160000240000800108004380384801578004380043
40002480042621110000005200001800270012025480064800102400541600008001024000016000048004996033131199941580023800428004200324480010020160000240000202400004800008004280042118002110910108000080000010160011102501600293031600306122331100501958317338003908000099160000240000800108004380043800438004380043
400024800426201001150064000018002721212025480082800102400541600008001024000016000048004996034031211551580023800428004200324480010020160000240000202400004800008004280042118002110910108000080000010160011110016004600481600376110401110501958517338003908000099160000240000800108004380043800438004380043
400024800426211100000053001018002721212025480379800102400821601048001024000016000048004996128931246560080023800428004200324480010020160000240000202400004800008004280042118002110910108000080000110160000033016003700301600006037410005019004173480039080000010160000240000800108004380043800438004380043
400024800426200000100056000028002721212025480073800102400661600008001024000016000048004996068431218530080023800428004200324480010020160000240000202400004800008004280042118002110910108000080000110160000033016003700291600006104100050190031733800391800001414160000240000800108004380043800438004380043
4000248004262100101000560000280027212002548001080010240055160000800102400001600004800499599963121853008002380042800420032448001002016000024000020240000480000800428004211800211091010800008000001016000000016003700316003761304100050190031734800390800001410160000240000800108004380043801658004380043
4000248004262100000000430000280027201202548001080010240000160000800102400001600004800499600293120101008002380042800420032448001002016000024000020240000480000800428004211800211091010800008000001016000003301600370037160037000330005019003173380039080000140160000240000800108004380043800438004380043
400024800426200000001115617600028014021212548448039180118240219160216800642403241602164806939636813135182008011582219821974140211770485518020161542242439202403244800008005480042218002110910108000080000010160102041016013900135216024161294100050330032533801320801081410160000240000800108018080155801558027180275