Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3 (multiple, 16B)

Test 1: uops

Code:

  ld3 { v0.16b, v1.16b, v2.16b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.008

Integer unit issues: 0.000

Load/store unit issues: 3.000

SIMD/FP unit issues: 3.008

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f191e1f2223243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6600528677214240128011060000505428563001160226008300630003000300015004356776229982827228431310600030003000300090002821428338116100110001000130000630040043004513900130551001671153298976191283369381617636627994145881206413086300030002845328444283572825728324
6600428369212300128001060000503328152000163756009300930003000300015004357267230132868628306310600030003000300090002827128303116100110001000030000630040013004513600132489969711533051260193193423381214565527944146951224013177300030002843528298284322841128336
6600428872212260129000090000505528120331159426006300630003000300015004357576230812824528312310600030003000300090002836028384116100110001000030000630010013001313900136851005470403370136719229328038219646027874142321187913457300030002884328381284872845528451
66004283892142800300000901005104280400301640160063009300030003000150003571112307728341283603106000300030003000900028161282091161001100010000300006300400430045119001397710061723833131158191183347381719576027882147221189813160300030002831428769283602843828351
6600428247212240024010090000500428190000160196006300930003000300015000357456230212826228414310600030003000300090002821228631116100110001000130000630010113004513600136061004571003377954191483329380813615827966157121187913301300030002830528392284422827828310
66004284182112400290000601004995281880031642360063006300030003000150003576482301928411284643106000300030003000900028267283581161001100010000300006300100130045149001369110038710933851254192103355381717616727924146291191013267300030002843828321283692838428325
6600428788212270027000060000463828102001158556006300930003000300015004356881230102869028460310600030003000300090002861828465116100110001000030000630040013001516600129639954716832881358196143408381215505627877147851270013130300030002844728801284642822928684
6600428765213240027000090000499028213033159026008300630003000300015000357551230192819128742310600030003000300090002828228373116100110001000030000630040043001501600138621008369463366146019167340038188575927949144711200412994300030002835028279283972874128468
66004284122152300250010900004797281720301593260093006300030003000150043573132298428276283763106000300030003000900028275281921161001100010000300006300400130015136001362310080684133051165191673314381715595727985157321189913291300030002833528350283302824528411
660042844021330003001109000051112797000116123600630063000300030001500035694623058281872835131060003000300030009000281702819011610011000100003000063001001300151360013858993871393407955191053391382015526227955147551253213378300030002842628475284202870828830

Test 2: throughput

Count: 8

Code:

  ld3 { v0.16b, v1.16b, v2.16b }, [x6]
  ld3 { v0.16b, v1.16b, v2.16b }, [x6]
  ld3 { v0.16b, v1.16b, v2.16b }, [x6]
  ld3 { v0.16b, v1.16b, v2.16b }, [x6]
  ld3 { v0.16b, v1.16b, v2.16b }, [x6]
  ld3 { v0.16b, v1.16b, v2.16b }, [x6]
  ld3 { v0.16b, v1.16b, v2.16b }, [x6]
  ld3 { v0.16b, v1.16b, v2.16b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e222324373a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480205800505991000000001001180042001502548010110024001124000010024000024000050035338494862939080040080080800671403494801002002400002400002002400007200008006780049118020110099010010080000800000100240018214202400571315924003901373800510911711800410602400002400001008005880063800678006780045
48020480062599111000006400000800513181814254801361002400392400001002400002400005003515364553297518002508004480062100344480100200240000240000200240000720000800588005811802011009901001008000080000010024000003902400420003240039511742171510911711800479002400002400001008005080068800688005080068
480204800675991000000042100208006400180254801381022400142400001002400002400005003545551486242118004808006780067003324801002002400002400002002400007200008006780067118020110099010010080000800000100240018170024005600060240039513745005109117118005910002400002400001008006380045800638006380046
480205800626011110000064100248005731818025480118100240030240130100240000240000500354297955560300800430800448006210034448010020024000024000020024000072000080062800571180201100990100100800008000001002400000390240000000024004050560171510911711800649902400002400001008006880068800688005080068
480204800676001000000000000280047315002548014910024003924000010024000024000050035429793601919180043080062800621003444801002002400002400002002400007200008006280044118020110099110010080000800000100240000038024004200041240040515642170510911711800479002400002400001008006780068800688006880068
480204800495991000104047100008004731915102548010710024000924000010024000024000050035388075724897180025080056800621003304815532002400002400002002400007200008004480057118020110099110010080000800001100240000039024004200002400385037450051091171180059101002400002400001008006380063800458006380063
4802048004460010000000501000180050315010254801371002400422400001002400002400005003505352486372918004808006780067150349480100200240000240756200240000720000800678006711802011009901001008000080000010024001917420240017101602400395137000510911711800410602400002400001008004580063800638006380045
480204800626001000000047000028004731518025480142100240042240000100240000240000500350535248636981800480800678006700332480100200240000240000200240000720000800678005011802011009901001008000080000010024001818426524005710259240039503645005109117118004101002400002400001008004580058800588006480063
480204800525991101000065100038197931501415748014710024000524000010024000024000050034958935698290080031080067800660034948010020024000024000020024000072000080050800671180201100990100100800008000001002405371800240017101202400400136000516311711800596002400002400001008007380045800638005880063
48020480062599101111006400002800550181814254801391002400392400001002400002400005003542979553509708002508006280062003444801002002400002400002002400007200008005780057118020110099010010080000800000100240000039024004300042240039513745005109117118005901002400002400001008006380045800578006380045

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0040

retire uop (01)cycle (02)03l1d tlb fill (05)09l2 tlb miss data (0b)0e0f191e2224373a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480025801316010000004710218004721515025480054102400052400001024055824000050352515757433801800438006280066103444803742024000024000020240000720000800628006211800211091101080000800000102400000024004202782400425142455019517448005910100240000240000108006580071800828006380063
480024801456000000104700118002921515025480052102400422400001024000024000050167644155329801800478004480062143444800102024000024000020240000720000800628006211800211091101080000800000102400004502400000297240041500455019417348005910100240000240000108006380067800638006780063
48002480343601100000471021800472415102548005510240044240000102400002400005026451183603705180043800628006210344480010202400002400002024000072000080062800621180021109110108000080000010240000002400420258240042604245501941734800591400240000240000108006380063800638004580067
4800248035560000000047003180029015151025480055102400472400001024000024000050285322657479851800258004480062034448001020240000240000202400007200008006280062118002110911010800008000001024000045024004215124004251420501941744800410100240000240000108004580063800498008580045
4800248032060000011047000180029041510254800551024004224000010240000240000501676441359592318004380062800620344480010202400002400002024000072000080044800621180021109110108000080000010240000450240041028824004251045501941734800410100240000240000108006380063800638006380063
480024803426010000004710218004701501025480055102400422400001024000024071250350858656916491800438004480062103264800102024000024000020240189720000800628006211800211095101080000800000102400004502400420270240042004345501941743800591400240000240000108004580063800638006380063
48002480200600000000010218004720151025480052102400062400001024000024000050351506236032460800438006280062103264800102024000024000020240000720000800628006211800211091101080000800000102400004502401311243240043514205019417448005910100240000240000108178081499800708006380063
480024803636010000000100180047215150254800561024004324000010240000240000501676441553297518002580062800621034448001020240000240000202400007200008006280062118002110911010800008000001024000045024000002582400425042455019417438005914100240000240000108006380045800458035180067
480024801206000000007100608002920151025480277102401722400001024000024017850352515757391981800438006580044123484800102024000024000020240000720000802138007711800211091101080000800001102400004502401741452400415141455019417448007410103240000240000108004580063800458006380063
48002480214600000110470011802312151502548005210240043240000102400002400005035562015519827180043800448038303444800102024000024017720240000720639800498006211800211090101080000800000102400004502400425160240000504245501941743800590100240000240000108006380045800638006780063