Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDURSW

Test 1: uops

Code:

  ldursw x0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10053893100114110237921218162510001000100014774374374217323210001000100039471111001100010000100003910000003910396135390073216223916041000390395390395390
100438930000145002374012121625100010001000148383893942123232100010001000389561110011000100011000001039000010356135000732162238610641000395375395395380
1004394300000451023592121802510001000100015609399399221324010001000100039982111001100010000102019010581002210006035430073216223916041000395390390390390
1004391310000411003790121216251000100010001531839939920432571000100010003998111100110001000010202042101900159103861393900732162239110641000395395390395395
10043943100114510237920121625100010001000153574033812043257100010001000399641110011000100001019194210561002110386135390073216223910041000395395395395395
10043943100104500237901218162510001000100015288374394197324710001000100038971111001100010000100003910000003910350035430073216223916041000395395395395395
1004394310000661023670181814251000100010001536239437421232471000100010003895611100110001000010000391040000421051613943007321622391101001000375375375395382
100438331110019710338421818142510001000100015062394389217325210001000100037471111001100010000100003910390003510000139430073216223940041000395395395396375
10043743000004510235920120251000100010001537539939822432391000100010003986411100110001000010192042103500001000610000732162239110041000395395395375375
1004374300000000237920121525100010001000148383943942163252100010001000389711110011000100001000039105700162103861574219073216223969021000382400399400390

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldursw x0, [x6, #1]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0054

retire uop (01)cycle (02)03090e0f1e22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057005152410010001070039697825971025401043010310002301001000061617533414701496697407005470054646503649644010030200100006020010000700543511402011009910010000301001000011001000011000000100001002610271116981730003130010000301007003670036700557005270060
4020470054525000901107002069782597132540104301031000030100100006160143342398149669710700357005164650364956401003020010000602001000070035351140201100991001000030100100000100100001100000010000110261017111697983000013131010000301007005570036700557005570036
402047005452500011070020697905969525401003010310000301001000061604133423981496695507003570035646503649584010030200100006020010000700513511402011009910010000301001000001001000001000015110000100261017111698143000313101310000301007003670052700527003670153
402047005452400089710700206978559713254010430100100013010010000616175334239814966971070035700356465036496640100302001000060200100007003535114020110099100100003010010000010010000110000001000011026101711169818300031301310000301007003670055700367003670037
4020470054524000110700396978259695254010430100100013010010000616175334147014966955070051700516465036493840100302001000060200100007005435114020110099100100003010010000010010000010000001000011026101711169798300030101010000301007005270055700557005570055
402047005452500010170020697855971025401003010310001301001000061617533422541496697407005470054646503649424010030200100006020010000700543511402011009910010000301001000001001000001000000100000102610171116981730003130010000301007003670055700557003670055
4020470054524000000700206978259713254010430103100013010010000616041334147014966974070054700516465036494140100302001000060200100007005435114020110099100100003010010000010010000110000001000010026101171169798300031310010000301007005570036700557003670055
4020470035525000000700396978559695254010430103100003010010000616041334147014966955070054700546465036496040100302001000060200100007003535114020110099100100003010010000010010000110000001000011026101711169814300001013010000301007005570055700367005270036
4020470054524000835007003969785597132540104301031000130100100006160413343049149669740700547005164650364963401003020010000602001000070035351140201100991001000030100100000100100001100000010000110261017112697983000313101310000301007006070098701127003670055
4020470054524000889017002069764597132540104301031000130100100006161753341470149669740700387003564631364954401003020010000602001000070054351140201100991001000030100100000100100000100000010000110261017111697983000300010000301007005270036700557005570055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0054

retire uop (01)cycle (02)03mmu table walk data (08)090e0f1e22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257005152500111107003669778598082540014300131000130010100006170683342254049669747005470054646533649794001030020100006002010000700513511400211091010000300101000010100001100000001000311252027111698173000310101010000300107003670055700557003670052
400247005452500007000700396977559818254001430013100003001010000617068334239804966974700357005464672364979400103002010000600201000070035351140021109101000030010100001010000110000050100000025201711169814300031301310000300107005570055700557006070067
40024700545240100100700396977859796254001030013100013001010000616991334225404966974700357005464672364979400103002010000600201000070091351140021109101000030010100001010000110000000100000025201711169798300031301310000300107005570055700367005570102
400247005452500001007003969778598232540014300101000130010100006170183342398049669747003570035646723649794001030020100006002010000700353511400211091010000300101000010100001100001191000011252017111697983000313101010000300107005770036700527003670059
40024700545250000110700396977859803254001430013100013001010000617018334147004966974700547005464672364979400103002010000600201000070054351140021109101000030010100001010000110000040100000025201711169817300031301310000300107005570055700367005570059
400247005452400000007003669778596952540010300131000130010100006170683341470049669717003570051646533649604001030020100006002010000700513511400211091010000300101000010100001100000401000011252017111698173000313101310000300107005570055700557005570057
400247005452400001007003969778597102540014300131000130010100006170183341470049669557003570051646723649604001030020100006002010000700353511400211091010000300101000010100000100000301000011252017111698173000313131310000300107005570036700557005270059
400247005152400001107002069778596952540014300131000130010100006170183342203049669557005170051646723649794001030020100006002010000700443511400211091010000300101000010100000100000401000011252017111698173000313131010000300107005570055700367005570056
40024700515240000100700396977859730254001030010100003001010000617018334239804966974700547003564653364979400103002010000600201000070054351140021109101000030010100001010000110012016100000125201711169798300030101310000300107005570036700747003670056
40024700545250110010700396974359713254001430013100003001010000617068334239804966974700547005464653364979400103002010000600201000070054351140021109101000030010100001010000110000040100000125201711169814300031313010000300107005570055700367005570052

Test 3: throughput

Count: 8

Code:

  ldursw x0, [x6, #1]
  ldursw x0, [x6, #1]
  ldursw x0, [x6, #1]
  ldursw x0, [x6, #1]
  ldursw x0, [x6, #1]
  ldursw x0, [x6, #1]
  ldursw x0, [x6, #1]
  ldursw x0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f181e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267232000000411012670721818025801001008000010080000500116780814923642267222672216645316680801002008000020080000267247111802011009910080000100800000100800000390800351035800356135390051101161126719662800001002672326723267232672326723
802042672220000004110126707218181225801001008000010080000500116779004923684267422672216645316680801002008000020080000267267111802011009910080000100800001100800000390800351035800356135390051101161126719662800001002672326723267232672326723
802042672220000004110126707218181225801001008000010080000500116780814923642267222672216630316680801002008000020080000267317111802011009910080000100800000100800000390800350035800356135390151101161126726600800001002672326723267232672326723
802042672220000004110126707218187925801001008000010080000500116681804923642267222672216645316680801002008000020080000267307111802011009910080000100800000100800000390801670035800356135390051101161126719662800001002672326723267232672326729
802042672220000004110126692218181225801001128000010080000500116780804923642267222672216645316680801002008000020080000268307111802011009910080000100800001100800000390800350035800356135390051101161126719662800001002672326813267232674226708
80204267152000000410012670720181125801001008000010080000500115974704923642267222672216645316680801002008000020080000267257111802011009910080000100800000100800000390800350035800356135390051101161126728662800001002672326723267232672326723
802042672220000004100126707218181225801001008000010080000500116681804923642267232672216645316680801002008000020080000267367111802011009910080000100800000100801322390800350038800356135390051101161126719662800001002672326723267232672326723
802042672220001104110126707018181225801001008000010080000500116652504923642267222672216645316680801002008000020080000267257111802011009910080000100800000100800000390800350035800356135390051103161126708662800001002672326723267232672326723
802042672220000004100126707218181125801001008000010080000500116780804923642267222670716645316680801002008000020080000267307111802011009910080000100800001100800000390800350035800356135390051101161126719662800001002672326723267232670826723
802042672220000004110126707218181225801001008000010080000500116709414923645267222672216648916680801002008000020080000267337111802011009910080000100800000100800000390800350035800356135390051101161126719662800001002672326723267232672326708

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0f191e223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526736200101100210226721277111258001010800001080000501168286492365726736267361668131671680010208000020800002673785118002110901080000108000001080021204308005910161800396159431915020071656267331313580000102685526750268082674626737
800242673620010010067022672137723258001010800001080000501167298492365626736267361668231671780010208000020800002673685118002110901080000108000001080019204308005800021800406158431915020041644267341313580000102686226748268102674726738
800242673620011100021132672127722258001010800001080000501166960492365726714267151668131671680010208000020800002671586118002110911080000108000001080020194308005900060800406159431905020651654267331313580000102671626737267372673826741
80024267362001001002113267213771925800101080000108000050116696049236572671426715166813167168001020800002080000267368611800211091108000010800000108001920430800590012180040615901905020041644269921313580000102684426741267402674326738
800242673720010110067032672130725258001010800001080000501167736492365626736267371668131671680010208000020800002671585118002110901080000108000001080019204308006004674800396160431905020031655267331313580000102691626742267422674226740
800242673720011000066122670027719258001010800001080000501167791492388626740267721665931669480010208000020800002673685118002110901080000108000001080020194308005910361800396158431925020041644267331313580000102674226740267472675626737
80024267362001010006702267210772125800101080000108000050116530449236562673626737166813167168001020800002080000267378611800211090108000010800000108001920430800600006080040615901805020041656267691313080000102674226813267392674726737
80024267142001110002111267213772025800101080000108000050116530449236562673726736166603167178001020800002080000267368511800211090108000010800000108002021008005900061800390159431925020051666267121313580000102681626743267802672626737
8002426715200100000671326721377322580010108000010800005011677914923635267362673616681316694800102080000208000026736851180021109010800001080000010800192043080058100218000060190190502006165526734013580000102684726744268522674826738
800242671520011110067122672117023258001010800001080000501168286492365726736267361668231671780010208000020800002673685118002110901080000108000001080019204308001901161800406159431905020041644267121313080000102673826805267432682626737