Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 4H)

Test 1: uops

Code:

  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.008

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.008

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f181e223a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
66005293002192025000001461528874001691360124008200040002000100004751810230152912629310310600020004000200080002906529083116100110001000020006200002200042412833923368523070775200943060381311525528391163471334214717200040002927429336292522920729244
66004292722192118000001458328875121685160124012200040002000100034752632298329211292393106000200040002000800029205291251161001100010000200042000002000404128619244681130751154201493078380411575528470162891319514834200040002937429264293062923029313
66004292882192419000051461628795201686260124012200040002000100004752282297529139292293106000200040002000800029170290921161001100010000200042000002002424129479273686430421549200793092381013555528420159731340214926200040002933429269293502928529256
6600429293220261900005146712884300168456008400820004000200010000475205229742905829203310600020004000200080002899729162116100110001000020004200000200040413078930568543103948200513083380810485228457163591327514884200040002934729327292842931629268
660042934021922150000614619288762016831600840002000400020001000047532623005290762928631060002000400020008000291892906111610011000100002000420000020004041276392036871303054720040307938118514428405163601324914738200040002923429429293572943729206
6600429317220211710005045432876900168696008400020004000200010000475064230272905229304310600020004000200080002919429236116100110001000020000200000200000412912926568363063105420102308738098555228473162851352414825200040002930529274292652921929292
660042925222022210000014583288260016899600840082000400020001000047528423038290992932231060002000400020008000291542918211610011000100002000420000020004061293191846811302875320077308138108585628379161751317314753200040002937029314293692934329342
6600429429219202400007145422885300169266008400820004000200010000475364229772911329307310600020004000200080002917529185116100110001000020004200000200040412931920268713059135320092310038159596328466162471327414908200040002926129315292672928629261
66004292922192316000051460528850021687760004008200040002000100004749652300129091292453106000200040002000800029255291941161001100010000200042000002000404129429149687830551451200183074380712545928451163421337514930200040002926629255292692926429322
6600429271220202300000145602884300168676008400820004000200010000475284229992912229321310600020004000200080002913229169116100110001000020004200003200040413040918868363100944200403114381212557128480157281331614863200040002930929213292452927329237

Test 2: throughput

Count: 8

Code:

  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e1f223a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
48020580085600100104201280049201252548010010032007216000010032000016000050080037796000001800458006480064003474801002001600003200002001600006400008006080060118020110099100100800008000011001600000016005301521600006132350510911711800381141001600003200001008006580079800618004280042
48020480041599011005401080080049212120254801761003200241600001003200001600005008000003840000180022800648006500342480322200160000320000200160000640000800478006911802011009910010080000800001100160000035160032000160000613200510911711800381101001600003200001008006580066800658006580065
480204800606000001000008004901212025480176100320084160000100320000160000500800853960000018004580064800410034248010020016000032000020016000064000080064800411180201100991001008000080000110016000000160000003616003661000510911711800610141401600003200001008006580065800658006580065
480204800645990000038000800492120025480184100320084160000100320000160000500800853960000008002780041800600034748010020016000032000020016000064000080064800601180201100991001008000080000010016000000160036003616003600320051091171180061013041600003200001008004880048800708004880070
480204800476001110038000800491121202548018410032000016000010032000016000050080000096000001800458004180064003464801002001600003200002001600006400008006480060118020110099100100800008000001001600000016003600016003660035051091171180061001001600003200001008006580065800428006580065
480204800646000000042002800490121202548017610032008416000010032000016000050080085338400001800458006480041003464801002001600003200002001600006400008004180060118020110099100100800008000001001600130016003600481600366132400510911711800380141401600003200001008006580066803348006180042
480204800646000000042002800490121202548018410032008416000010032000016000050080085310879996180022800648004100323480100200160000320000200160000640000800418004111802011009910010080000800000100160000035160000003616000061360051091171180061001401600003200001008007080061800658004280065
480204800646000000038010800492121202548010010032008416000010032000016000050080085338400001800458006480064003464801002001600003200002001600006400008006480060118020110099100100800008000001001600151335160036001316000061134305109117118003800001600003200001008006580065800658006580065
480204800646000000042010800292121202548012410032006416000010032000016000050080118610880752180045800418004100346480100200160000320000200160000640000800648006011802011009910010080000800000100160000001600360036160036005101251091171180038001001600003200001008004880065800658004280065
48020480041600000004201280049201202548018410032000016000010032000016000050080085338400001800458004180065003464801002001600003200002001600006400008004180060118020110099100100800008000011001600000351600360036160036603240051091171180038001401600003200001008006580065800618006580065

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03090e0f191e22233f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)daddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480025800696001000381080045212120254800861032007616000010320000160000508003749600000180041800608006034248001020160000320000201600006400008006080060118002110910108000080000010160000351600000032160032610350050192617027188005711010160000320000108006180061800618006180061
480024800606000000381080045212002548008610320076160000103200001600005080037296000001800418004180060342480010201600003200002016000064000080060800601180021109101080000800000101600000160032000160032603200050191717016278005711010160000320000108004280061800618006180061
48002480060600000012510800452121202548008610320076160000103200001600005080040296000001800418006080060342480010201600003200002016000064000080060800601180021109101080000800000101600003516003200321600326132350050191517028158005711010160000320000108006180061800618006180042
4800248006059900003810800452121202548008610320076160000103200001600005080037492800121800418006080060342480010201600003200002016000064000080060800601180021109101080000800000101600003516003200321600326132350050192717014258005711010160000320000108006180061800618006180060
48002480041600000038108004520120254800861032007616000010320000160000508003773840000180041800608006034248001020160000320000201600006400008006080060118002110910108000080000010160000016003200321600320132350050191517026188003811010160000320000108006180061800618004280061
4800248006060000103810800452121202548008610320076160000103200001600005080000096000001800418006080060342480010201600003200002016000064000080060800601180021109101080000800000101600003516003200321600326132350050192617027308005711010160000320000108006180061800618006180061
4800248006060000003810800262121202548008610320000160000103200001600005080037496000001800418006080060342480010201600003200002016000064000080060800601180021109101080000800000101600003516003200321600320132350050192517027188005711010160000320000108004280061800618006180061
480024800606000000381080045201202548008610320076160000103200001600005080037496000001800418006080060342480010201600003200002016000064000080060800411180021109101080000800001101600003516003200321600326132350050192617025278005711010160000320000108006180147800618004280061
4800248004159900003810800452121202548008610320076160000103200001600005080037738400001800418006080060342480010201600003200002016000064000080060800411180021109101080000800000101600003516003100321600326132350050192617028188005711010160000320000108006180061800428006180042
480024800606000000381080045212120254800861032007616000010320000160000508003729600000180041800608004134248001020160000320000201600006400008004180060118002110910108000080000010160000351600320001600326132350050192817027288005711010160000320000108006180061800618006180042