Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDUR (32-bit)

Test 1: uops

Code:

  ldur w0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)090e0f1e22243a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10053983010060001383171530172510001000100015288399398220325610001000100039881111001100010000100058105454105461544373616663956631000399399400400399
10043982110060001385171530172510001000100015284398398220325610001000100040181111001100010001100058105454105461544373616663966631000399399399402399
10043983000061001384171530172510001000100015284398398220325610001000100039881111001100010001100058105454105461544373516763956631000399399399399399
10043983000060101383171530172510001000100015284398398220325610001000100039881111001100010000100058105454105461544373616663956631000399400399399400
10043982000060101383171530172510001000100015305398398220325710001000100039881111001100010000100058105454105461554373616673956631000399399399399399
10043983000060101383171530172510001000100015284398398221325610001000100039881111001100010000100058105454105461544373616663956631000399399399399399
10043983000060001383181530172510001000100015284398398220325610001000100039883111001100010000100058105454105461544373616663956631000399399399400399
10043983000060001383171530172510001000100015284398398220325610001000100039881111001100010000100058105454105461544373516663986631000399399399399402
10043983000060101383171530172510001000100015312398398221325810001000100039881111001100010000100058105454105461544373616663956631000399399399399399
10043983000063101383171530162510001000100015312398398220325610001000100039881111001100010000100058105454105561544373616653956631000399399399399399

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldur w0, [x6, #1]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570053525111000010100700386978459712254010830103100023010010000616078334235004966961070053700416465236495640100302001000060200100007005335114020110099100100003010010000110010003211000202410000111110261027111698043000606010000301007005570110700607005470054
4020470053525101000120000700386978459712254010830103100013010010000616032334235004966973070053700536465136494440100302001000060200100007005335114020110099100100003010010000110010002201000101110000111110261017111698163000666010000301007005470054700547005470054
4020470053525111000010001700416978459712254010830106100023010010000616032334235004966973070053700536463736495640100302001000060200100007005335114020110099100100003010010000010010001201000200110000111110261017111698163000360610000301007007370043700547005470054
4020470053525110000020001700266978659712254010830103100023010010000616032334235004966973070053700536464936494440100302001000060200100007005635114020110099100100003010010000110010003211000201110000111120261017111698043000306610000301007005470054700547005470057
4020470041524101000020100700386978459712254010830106100013010010000616032334235004966961070053700536464936495640100302001000060200100007005335114020110099100100003010010000110010002111000101410000111120261017111698163000660610000301007004270054700547005470042
4020470053525100000010101700266978459712254010830106100023010010000616032334235014966973070053700536464936495640100302001000060596100007005635114020110099100100003010010000010010002311000201110000111110261017111698163000666610000301007007470060700567005470054
4020470053525101001010101700266970259712254010430106100023010010000616078334235004966973070041700536464936495640100302001000060200100007004135114020110099100100003010010000010010003111000201110000111110261017111698163000666610000301007005470042700547005470054
4020470053525101000020000700386970259712254010830106100023010010000616032334235014963939070053700416464936495640100302001000060200100007004135114020110099100100003010010000010010002301000201110000110100261017111698163000666610000301007004570042700547005470054
4020570041524111000020001700386970259701254010430106100023010010000616032334176904966973070041701496464936494440100302001000060200100007006235114020110099100100003010010000010010003111000101110000111120261017111698163000660610000301007005470054700547004270057
4020470041524111000020101700386978459712254010830106100023010010000616078334235014966973070053700536463736494440100302001000060200100007004135114020110099100100003010010000010010002201000102110000011100261017111698163000600610000301007005470042700547004270054

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0060

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257005752511110102000070026697845971925400143001610002300101000061699533426860496696170057700416467836498340010300201000060020100007004135114002110910100003001010000110100012110001115100001111000252007716569820300060101010000300107006170042700617006170061
4002470057524100100020000700496978459701254001830016100013001010000617072334709004966980700577006064678364985400103021510000600201000070044351140021109101000030010100000101000221100010011000011110002520057165698233000313101310000300107004270102700947006870062
400247004152510010002010070045697025970125400183001610002300101000061707233426860496698070041700576467836496640010300201000060020100007004135114002110910100003001010000010100023110001021100001111200252006716569823300061010010000300107005870042700617006170042
400247006052510000007000070045697845971925400143001610001300101000061707233417690496698070041700606467536496640010300201000060020100007006035114002110910100003001010000110100011110002001100001111100252006715569820300060101010000300107006170061700617006170061
40024700605251000000200007002669781597192540014300161000230010100006170723342686049669857006070060646783649664021830020100006002010000700573511400211091010000300101000011010001111000100110000110100025200571566982330006010010000300107005870061700617006170058
40024700575241011000101017002669784597162540018300161000530010100006170723342686049669807005770060646833649854001030020100006002010000700653511400211091010000300101000011010002101000200110000111100025200671656982330006130010000300107006170042700617006170061
400247006052410111002000070042697815971625400183001610002300101000061707233426860496698070057700576467836498540010300201000060020100007004135114002110910100003001010000110100021110001021100001101100252005716669823300060101010000300107005170061700427006170061
4002470060524101100010001700266970259719254001830016100013001010057616995334268604966977700577006064659364966400103002010000600201000070060351140021109101000030010100000101000330100010011000011012002520057166698233000313101010000300107006170042700587005870061
400247005752511110002010170026697845970125400183001310002300101000061707233417690496698070057700606467836498540010300201000060020100007006035114002110910100003001010000010100032110001112110000111120025200571666982030003010010000300107006170061700587006170058
4002470060525101010020000700466978459701254001430016100013001010000617045334254204964030700597012464675364982400103002010000600201000070060351140021109101000030010100001101000111100010011000011111002520067165698233000310101010000300107004270061700617004270058

Test 3: throughput

Count: 8

Code:

  ldur w0, [x6, #1]
  ldur w0, [x6, #1]
  ldur w0, [x6, #1]
  ldur w0, [x6, #1]
  ldur w0, [x6, #1]
  ldur w0, [x6, #1]
  ldur w0, [x6, #1]
  ldur w0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526722200014110126707018186258010010080000100800005001168388149236422672226707166453166808010020080000200800002672271118020110099100800001008000001008000039800352358003561353951101161126719662800001002672326776268222672426734
8020426732200014110126707018181525801001008000010080000500116780814923642267072672216645316680801002008000020080000267227111802011009910080000100800000100800003980035235800356103951101161126704662800001002672326723267082672326726
8020426722201004100026707218181125801001008000010080000500116780814923642267222671016649316680801002008000020080000267227111802011009910080000100800000100800000800350358003661353951101161126719602800001002672326811268052672426733
802042672220000501012670721802258010010080000100800005001159747149236452672226722166453166808010020080000200800002672256118020110099100800001008000001008000039800351358003561353951101161126719602800001002672326808268322672826732
8020426722200004110126707218181225801001008000010080000500116652504923642267222672216645316680801002008000020080000267227111802011009910080000100800000100800003980035335800350103951101161126719660800001002672326791268152672926739
80204267072000041011267072180225801001008000010080000500116780814923642267222672216645316680801002008000020080000267227111802011009910080000100800000100800000800353358003561353951101161126719662800001002672326778268142672526878
8020426725200004100126707218012258010010080000100800005001167808149236422672226722166303166808010020080000200800002672271118020110099100800001008000001008000039800351358003500353951101161126719062800001002672326845268192672526728
80204267072000041011267142181817258010010080000100800005001166818149236422672226722166303166808010020080000200800002672271118020110099100800001008000001008000039800351358003561353951101161126719660800001002672326723267292673726725
80204267292000000112670721818325801001008000010080000500116780814923642267222672216645316680801002008000020080000267227111802011009910080000100800000100800003980000335800350135051101161126719602800001002672326833267992672526723
80204267302000041011266922181813258010010080000100800005001166525149236422672226722166453166658010020080000200800002672271118020110099100800001008000001008000039800355358003601353951101161126719062800001002672326858268112672626738

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526738215100008010326710200172580010108000010800005011677340492363902673526736166823167188001020800002080000267358211800211091080000108000011080020190800571106280038615742190050209166112673499080000102673926720267382673826735
80024267322141100165102268120018125800101080000108000050117020404923652026715267321667731669580010208000020800002673381118002110910800001080000010800211908005700162800376157019115020151611112672999280000102671526733267152673326733
800242671521510000651022683821818152580010108000010800005011676600492365202671426732166783167138001020800002080000267148111800211091080000108000001080019194280057121628003861574219005020616872672990280000102672626718267332671626715
800242673220710101210012671720181625800101080000108000050116766004923661026745267151668431671380010208000020800002671464118002110910800001080000010800191942800190002180000615742191050207166112672900080000102673326733267152671526734
80024267332071100065000267232018162580010108000010800005011676600492365202671426732166603166958001020800002080000267328111800211091080000108000001080021200800190012180000615742191050207167132672909280000102671526733267332671626734
80024267332071000021002268142180162580010108000010800005011676600492067202673426732166603166958001020800002080000267328211800211091080000108000011080020200800571002180038605701920502013161182671299080000102671526733267332671626734
8002426733204111006610326844200152580010108000010800005011675990492363502673226715166773166948001020800002080000267328111800211091080000108000001080019194280058101218003800574219105020516672671299280000102673426716267152673326716
80024267152041100021103268442181815258001010800001080000501167299049236530267352671516679316712800102080000208000026732811180021109108000010800000108002020428005710059801680157019005020816762672999280000102673326733267332671626716
800242673320610000230002681121818125800101080000108000050117020404923827026718267191668131671280010208000020800002673282118002110910800001080000010800192142800191112180000005742192050206166132671290280000102671626715267332671526716
8002426732200111006510326813018015258001010800001080000501167660049236520267142673216678316713800102080000208000026732641180021109108000010800000108002020428005700159800380157019005020916982671299080000102673326733267332673326715