Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3 (multiple, 4H)

Test 1: uops

Code:

  ld3 { v0.4h, v1.4h, v2.4h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 5.000

Issues: 5.009

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 3.009

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f181e223a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6500528592213101301001010150982804802162025012301220003000200010000357930082294402841128352310500020003000200060002812828339116100110001000120043420050291520004242113411975070383454578194573158382616586527897140781233413560200030002860328502280392850628542
6500428324213411111000204938282200016327501530092000300020001000035638008228900283582847531050002000300020006000282212850911610011000100002004302002002520000242613809989572283306161192013197382218585927918146991228113697200030002850228382285082850428579
65004285982135111110002147142825310162585003300920003000200010000356431002297902849328410310500020003000200060002842728328116100110001000020023420050341420004442013765978870343330057193623210382027636727972150261245214173200030002864728564286282856328343
65004285652137111110002049132829800163245009300920003000200010000357553182296602845528609310500020003000200060002842928326116100110001000020023420030282520024262113175967871083420064193283320382318626228014151421200314216200030002854928429284422830728370
65004283952134111110004047822807200161965012301220003000200010002357460082299402851328574310500020003000200060002852728456116100110001000020033420020332220024242113455998071843341263192203177382113626028040141641243213887200030002829928575286692850928237
6500428333213211111000504943280480016156500930032000300020001000035628118229870284092838431050002000300020006000284732826111610011000100002002362005032720004262113297967770103279162194713221382318565928060146751213013871200030002854828564285492849428489
6500428281214611111000204860280742116155500930092000300020001000035724000229590283072841031050002000300020006000285142822811610011000100002002242004002720004202213407978271173306158194373346382119575527944150901241913749200030002852328474283722846828578
650042862221351111000030509328130001631850123009200030002000100003572900822982028198284523105000200030002000600028278281721161001100010000200324200203912200042421133631003872053170259191683183381914616728022153461207813527200030002861528381283392845628257
6500428321212611010000299150752830200162125009300920003000200010000357611102293102839628258310500020003000200060002833328188116100110001000020034420020022200004421133441004171653360059191323197381716606628050146211196113850200030002812128506282932842528478
650042834121241111110031496827964001630650093009200030002000100033563750022968028301283523105000200030002000600028255282211161001100010000200234200403712200002622137081006370853355267193613198381612596527870144371225213064200030002839428564283752815128543

Test 2: throughput

Count: 8

Code:

  ld3 { v0.4h, v1.4h, v2.4h }, [x6]
  ld3 { v0.4h, v1.4h, v2.4h }, [x6]
  ld3 { v0.4h, v1.4h, v2.4h }, [x6]
  ld3 { v0.4h, v1.4h, v2.4h }, [x6]
  ld3 { v0.4h, v1.4h, v2.4h }, [x6]
  ld3 { v0.4h, v1.4h, v2.4h }, [x6]
  ld3 { v0.4h, v1.4h, v2.4h }, [x6]
  ld3 { v0.4h, v1.4h, v2.4h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f223a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
40020580054599101005701080026255325400120100240068160000100240000160000500801386288025818002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160015130160015025216003961524313151092172280038131311600002400001008004280042800428004280042
400204800415991100012002800262503254001741002400771600001002400001600005008000422884893180022800418004103234001002001600922400002001600004800008004180041118020110099100100800008000001001600131301600530151160039611201305109217228003813011600002400001008004280042800428004280042
400204800416001110058012800262053254001791002400741600001002400001600005008013872884892180022802498004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600131201600540152160039605201215109217228003801311600002400001008004280042800428004280042
4002048004159910100130128002625532540017410024007416000010024000016000050080004228848931800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016001413431600530155160039615201315109217228010301311600002400001008004280042800428004280042
40020480041599110005800280026250325400120100240068160000100240000160000500801380288660218002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800001100160013150160052011216000001524313151092172280038131301600002400001008004280042800428004280042
4002048004160011000580108002625532540016810024002116000010024000016000050080137228848951800228004180041032340010020016000024000020016000048000080041800411180201100991001008000080000010016001413431600530155160039611343131510921722800380001600002400001008004280042800428004280042
400204800415991110058002800262550254001791002400711600001002400001600005008013872884892180022800418004103234001002001600002400002001600004800008004180041118020110099100100800008000001001600131343160013015116000061524313151092172280038131301600002400001008004280042800428004280042
40020480041600111005801280026255025400168100240019160000100240000160000500800042288027818002280140800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160013144316001402541600396151431325109217228003801311600002400001008004280042800428004280042
40020480041599110001301280026255325400118100240071160000100240000160000500801383288024918002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160015154316005112701600390152431305109217228003801311600002400001008004280042800428004280042
40020480041599101001200280026200325400118100240074160000100240000160000500800042288489318002280041800410323400100200160000240000200160000480000800418004111802011009910010080000800000100160014144316005300521600390113012151092172280038131301600002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)dbddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
400025800695990001005700028002620120254000731024000016000010240000160000508013862880278080022080041800410032340001020160000240000201600004800008004180041118002110901010800008000011016001314016005400521600396152431310501931703280038013130160000240000108004280042800428004280042
40002480041599101000120002800262012025400073102400631600001024000016000050800853288196808002208004180041003234002352016000024000020160000480000800418004111800211090101080000800000101600000351600320032160000613235000501931702380038014100160000240000108004280042800428004280042
4000248004160000000042000280026012120254000101024006316000010240000160000508008532880000180022080041800410032340001020160000240000201600004800008004180041118002110901010800008000001016000003516003600016000061324000050193170338003801400160000240000108004280042800428004280042
400024800416000000004201028002620120254000101024000016000010240000160000508003742881968080119080041800410032340001020160000240000201600004800008004180041118002110901010800008000001016000003516000010361600366003500050192170338003801400160000240000108004280042800428004280042
40002480041599000000420000800262120025400073102400631600001024000016000050800000288000008002208004180041003234000102016000024000020160000480000800418004111800211090101080000800000101600121343160051015116003961510131050193170338003800130160000240000108004280042800428004280042
4000248004159911100013000080026205325400089102400791600001024000016000050800048288025808002208004180041073234000102016000024000020160000480000800418004111800211090101080000800000101600000351600322032160000613240000501931703380038010140160000240000108004280042800428004280042
40002480041599000000380002800261121202540001010240063160000102400001600005080037728800000800220800418004100323400010201600002400002016000048000080041800411180021109010108000080000010160000035160036003216003561320000501931703380038014100160000240000108004280042800428004280042
40002480041599000000420002800260121202540007310240063160000102400001600005080085328833651800220800418004100323400010201600002400002016000048000080041800411180021109010108000080000010160000035160036003616003600364000050193170338003800100160000240000108004280042800428004280042
4000248004160000010042000280026201202540001010240063160000102400001600005080085328819680800220800418004100323400010201600002400002016000048000080041800411180021109010108000080000010160000035160000000160032600400005019317023800380000160000240000108004280042800428004280042
40002480041599101000130002800262553254000891024007916000010240000160000508013862884997080022080041800410032340001020160000240000201600004800008004180041118002110901010800008000011016001313431600540113160039005243123050193170328003801400160000240000108004280042800428004280042