Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDUR (64-bit)

Test 1: uops

Code:

  ldur x0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005403201104410238321121625100010001000152740394394217325210001000100039477111001100010001100043103803810386139447371677395131071000399399399399399
100439830000440013832111925100010001000152080398398217325610001000100039877111001100010000100043103903810386139447361677395141471000399395399399399
100439830000440013792111925100010001000152741398398221325610001000100039877111001100010000100044103903810386138447361677395101071000399399399399399
1004398300004400138321121625100010001000152081398394217325210001000100039477111001100010000100044104003810386138447371677395141471000399399395399399
100439830000440013832111925100010001000152740398398221325610001000100039877111001100010001100044103903810386138447371677395141471000399399399399399
100439830000440013832111925100010001000152081398398221325610001000100039877111001100010000100043103803810386138447371666395101471000399399395395399
10043983000044001383212121925100010001000152671398398217325610001000100039877111001100010000100043103803810386138447371677395101071000399395395399399
100439830000440013832111625100010001000152741394398221325610001000100039877111001100010000100044103803910386138447361676395141471000395395395399399
100439830000440013832111925100010001000152081398398221325610001000100039877111001100010000100044103803810386138447371677395141471000399395395399399
100439830000450013792111925100010001000152741398398217325610001000100039877111001100010000100043103803810386138447371677395141471000399399399399399

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldur x0, [x6, #1]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0051

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40205700515251100001010700366978259749254010030100100013011110007615549334242304966971070035700516470066500040118302301001060260100107008735114020110099100100003010010000010010000110000001000011111261911611698613000310101010000301007005270052700527005270052
40204700515251100001010700366978259749254010030103100013011110007615549334242304966971070051700516470066500040118302301001060260100107012935114020110099100100003010010000110010000110000001000011111261911611698613000000010000301007003670052700527005270059
40204700515251100001010700366976459710254010430100100003010010000616014334147004966971070051700356463136495440100302001000060200100007013435514020110099100100003010010000010010000110000101000010000261027122698143000310101010000301007005270052700527003670052
4020470035524000000101070036697825969525401003010310001301001000061601433414700496697207005170051646473649544010030200100006020010000700643511402011009910010000301001000001001000011000000100001100026102712269814300031001010000301007005270052700527005270052
40204700515250000001301070036697825971025401043010310001301001000061617533422540496697107010970054646483649544010030200100006020010000701033511402011009910010000301001000001001000001000000100001100026102712269814300031010010000301007005270036700527005270052
40204700515250000001010700366978259695254010030103100003010010000616014334147004966971070051700516463136495440100302001000060200100007013535114020110099100100003010010000010010000010000001000011000261027122697983000310101010000301007005270052700367005270052
40204701145250000000000700396978259710254010430103100003010010000616023334147004966971070051700516464736495440100302001000060200100007014735114020110099100100003010010000010010000110000031000010000261027122698193000010101010000301007005270052700527005270052
4020470051525000000601070036697825969525401203010310001301001000061617533422540496697107003570035646473649544010030200100006020010000701403551402011009910010000301001000001001000011000010100001100026102712269814300030101010000301007005270052700527003670036
40204700355250000001010700366978759695254010430103100013010010000616014334225404966971070051700516464736493840100302001000060200100007016435114020110099100100003010010000010010000010000001000010000261027122698143000310101010000301007005270052700527005270036
40204700515250000001000700366978259695254010430103100013010010000616014334225404966971070035700516464736493840100302001000060200100007014935114020110099100100003010010000110010000110000001000000000261027122698143000310101010000301007003670052700527003670052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40025700535241110001101700396977759712254001430016100023001010000617009334235004966973070053700536467136497840010300201000060020100007005335114002110910100003001010000010100032110003001100001111002520047146698163000666610000300107005470054700547005470089
40024700475240000001100700736972859706254001430013100013001010000616952334206204966967070047700476467036497240010300201000060020100007004735114002110910100003001010000010100000110000000100001010002520047144698103000366610000300107004970048700487004870048
40024700475240000001100700806974659709254001430013100013001010000616952334206204966967070047700476466536497240010300201000060020100007004735114002110910100003001010000010100000110000000100001010002520047164698103000366610000300107004870052700487004870048
40024700475250000011100700336972859706254001430013100013001010000616952334206204966967070047700476466536497240010300201000060020100007003535114002110910100003001010000010100000110000000100001010002520047144698103000366010000300107004870048700487004870048
40024700475250000001100700336973059706254001430013100013001010000616952334206204966967070047700476466536497240010300201000060020100007004735114002110910100003001010000010100000110000000100001010002520047144698103000360610000300107004870048700487004870048
40024700355250000001100700616972859706254001430013100013001010000616952334206204966967070035700476466536497240010300201000060020100007004735114002110910100003001010000010100000110000000100001010002520057144698103000066610000300107004870048700487004870402
40024700485250000015681007007669730597071244001430013100013058410000616952334211004966968070052700506466936497540010300201000060020100007004735114002110910100003001010000010100000010000000100001010002520047144698103000366610000300107004870048700487004870048
40024700475250000001100700716973259706254001430013100013001010000616952334206204966967070049700476466536497240010300201000060020100007003935114002110910100003001010000110100000110000000100001010002520047164698103000366610000300107004870048700487004870048
40024700475250000006000700406972859706254001430013100013001010000616952334206204966967070047700476471136497240010300201000060020100007004735114002110910100003001010000010100000110000000100001010002520047145698103000366610000300107004870048700487004870048
4002470047525000000110070032697435970625400143001310001300101000061695233420620496696707004770047646653650334001030020100006002010000700473511400211091010000300101000001010000011000000150100001010002520057144697983000366610000300107004870048700367004870048

Test 3: throughput

Count: 8

Code:

  ldur x0, [x6, #1]
  ldur x0, [x6, #1]
  ldur x0, [x6, #1]
  ldur x0, [x6, #1]
  ldur x0, [x6, #1]
  ldur x0, [x6, #1]
  ldur x0, [x6, #1]
  ldur x0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)030e0f18191e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267292000000440012671621122225801001008000010080000500116652504923651267312673116654316689801002008000020080000267317711802011009910080000100800001008000043800380388000061394351101161126728014107800001002673526732267282673226807
8020426709200000044001267122012225801001008000010080000500117703804923651267312673116654316665801002008000020080000267317711802011009910080000100800001008000043800000388003861384451101161126728014147800001002685126738267352673826732
802042673120000004410126716211222258010010080000100800005001165556049236532673126731166503166858010020080000200800002673177118020110099100800001008000010080000438003803880038613944511011611267280074800001002681726741267332673226728
8020426741200000044001267120112332580100100800001008000050011671270492380527028269041665431668980100200800002008000026731771180201100991008000010080000100800004380038038800386139445110116112670400147800001002682526737267352680326732
802042670720000004500126716211222580100100800001008000050011671270492365126731267311665431668980100200800002008000026731771180201100991008000010080000100800004480039039800386104451101161126728014104800001002672826732268822678426732
80204267272000000451002669221129525801001008000010080000500116555614923651267272673116654316689801002008000020080000267317711802011009910080000100800001008000044800380398003961394451102161126728014107800001002673226732267322673226714
802042673120000004500126716212122225801001008000010080000500116712704923651267272672716650316685801002008000020080000267317711802011009910080000100800001008000043800390398003801394451101161126704014107800001002684126729267312673926732
80204267312000000440002671621122258010010080000100800005001168312149236512673126731166543166858010020080000200800002670777118020110099100800001008000010080000080039038800006039435110116112672800107800001002682226736267372673426736
80204267312000000440012671630129125801001008000010080000500116555604923647267312672716654316665801002008000020080000267317711802011009910080000100800001008000043800390398003861384451101161126728010147800001002683826737267372674126728
80204267312011100440012671621123258010010080000100800005001165556049236272672726731166543166858010020080000200800002673177118020110099100800001008000010080000438003903880038603805110116112672801007800001002682026737267322677026732

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f1e223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025267282001000571126712212119258001010800001080000501167501104923647267392673416672316688800102080000208000026731771180021109108000010800000108000004380000038800386139445020171617172672401410780000102673226732267322673226732
800242673120700006210267160111925800101080000108000050116884300492365126829267451668131668880010208000020800002673177118002110910800001080000010800000080038054800380039445020151614172672801010080000102673226732267092673226732
8002426708200000060026693210192580010108000010800005011671240049236512680226735166813167118001020800002080000267275611800211091080000108000001080000008003800800006039445020171616162672501410080000102670926732267322673226794
8002426731200000086012671620002580010108000010800005011671240149236532683226740166823166888001020800002080000267085611800211091080000108000001080000043800000388000060384450201716161226728000780000102670926709267092673226732
800242673120000009210267162110258001010800001080000501166750004923651267272673116672101684080010208000020800002670877118002110910800001080000010800000448003803880038613844502017161717267280014780000102672826732267322673226732
800242673120000004400266932010258001010800001080000501166750004923651267082673116652316722800102080000208000026731771180021109108000010800000108000000800380388003961043502017161718267240140780000102673226732267332673226736
8002426708200000030112671621212025800101080000108000050116712400492362826728267311667631671980010208000020800002673180118002110910800001080000010800000438016803880038603844502015161017267280140080000102670926732267322673226728
8002426731200000044102669300120258001010800001080000501167124004923651267082670816676316696800102080000208000026708801180021109108000010800000108000004380000038800386039445020141617926730000780000102673226709267282670926732
8002426731200000059012671621019258001010800001080000501167201004923628268402671216682316711800102080000208000026731771180021109108000010800001108000000800380388000061044502016161717267050014080000102673226732267322673226732
80024267312001000561126713011216258001010800001080000501167124004923628267312672816652316688800102080000208000026731771180021109108000010800000108000004380038038800386039445020161618102672501414080000102673226732267282670926732