Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDURSH (64-bit)

Test 1: uops

Code:

  ldursh x0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03090e0f1e22243a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)l1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100539431004510137921212162510001000100015037039439421632521000100010003967711100110001000100043103903910396139437311611391101041000395395395395395
100439430004500137921212162510001000100015018039439421732521000100010003947711100110001000100043103903910396139437311611391101041000395395395399399
1004398301044001383211192510001000100015208039839822132561000100010003987711100110001000100044103803810386139447311611395141471000399399399399399
1004398301144001383211192510001000100015208039839421632561000100010003987711100110001000100043103804510386139447311611395141471000399399399399399
10043983000440013792112192510001000100015274039839822132561000100010003947711100110001000100043103803810386139447311611395141041000395399399399399
1004398300044001383211192510001000100015265039439822132561000100010003987711100110001000100043103803810386139437311611395141471000399399399399399
10043982000440013832112192510001000100015276039839822132861000100010003987711100110001000100044103803810386138447311611395141441000399402399399399
10043983000441013832112162510001000100015267039839822132561000100010003987711100110001000100043103803810386139447311611391101041000395395395395395
1004394301045001383211192510001000100015267039439421732521000100010003947711100110001000100043103903810386139447311611391141471000399395395399399
10043983010440013792112192510001000100015267039839822132561000100010003997711100110001000100043103803810386139447311611395141071000399399399399399

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldursh x0, [x6, #1]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)03090e0f18191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40205700475241100010007003269764596982540100301031000130100100006160053342552149669677003570047646313649504010030200100006020010000700473511402011009910010000301001000001001000011000000100001000261037111698033000396910000301007005170036700517003670048
40204700475250110000107003569735597092540104301031000030100100006160053342206149669557005070050646463649534010030200100006020010000700503511402021009910010000301001000001001000011000010100000100261017111697983000000910000301007005170036700367003670036
40204700505240000010007003569781597092540104301031000130100100006160053342206149669677005070050646463649504010030200100006020010000700503511402021009910010000301001000001001000001000000100001100261017111698133000306910000301007003670051700367003670036
40204700505240000010007003269764596952540104301001000130100100006161753342206149669757005070050646463649504010030200100006020010000700503511402011009910010000301001000001001000011000000100001100261017111698133000360910000301007005170036700367005170051
40204700505250000010007003269781597092540104301001000030100100006160053341470149669557005070047646313649534010030200100006020010000700473511402011009910010000301001000001001000011000000100001000261017111697983000390910000301007003670051700517004870051
40204700355250000010007003269764597092540104301001000130100100006160053342206149669677003570050646463649534010030200100006020010000700473511402011009910010000301001000001001000011000000100001100261017113698283000006610000301007004870051700517005170036
40204700475250010000017003569781596952540104301001000130100100006160053341470149669557005070035646313649534010030200100006020010000700503511402011009910010000301001000001001000011000000100001100261017111698133000396610000301007005170051700367005170051
40204700505250000010017003569764597062540104301031000130100100006160053342062149669707003570050646313649534010030200100006020010000700353511402011009910010000301001000001001000001000000100000000261017111697983000390910000301007009970061700517005170051
40204700745250100010007004069781596952540100301001000130100100006160053342206149669557003570035646433649534010030200100006020010000700353511402011009910010000301001000011001000011000010100000100261017111698133000096910000301007005170036700517005170051
40204700475240000060017002069764597092540100301031000130100100006160053341470149669707003570035646483649384010030200100006020010000700473511402011009910010000301001000001001000011000000100001000261017111697983000009010000301007005170051700367003670051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0081

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)0e18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257005152410110007010017004269781597162540018300161000230010100006171983341961496734870057700576467536498240010300201000060020100007005135114002110910100003001010000110100000110000018810000110110252067153698203000310101010000300107005870058700427005870058
400247008852410110002000017004269781597162540018300161000830010100006170453342542496697770057700606466636498240010300201000060020100007005735114002110910100003001010000010100011110001400310000101000252037135698213000610101010000300107005870058700587005870058
40024700965251001000201000700426978159717254006630016100013001010000616991334225449669717005170051646693649764001030020100006002010000700513511400211091010000300101000001010000011000046911000001111025203713569823300060101010000300107004270058700587004270147
4002470057525101000020100070042697815971625400183001610010300101000061704533425424966977700417006464675364982400103002010000600201000070057351140021109101000030010100000101000111100010312710000111110252047142698213000610101010000300107005870058700587004270058
400247006352500000001010007002069775597102540014300161000230010100006170453342542496697770057700576467536498240010300201000060020100007004435114002110910100003001010000010100021010002029710000111100252047143698203000610101010000300107006070058700587005870058
400247010852500100002010007004269781597162540018300161000230010100006170453342542496697770057700626467836498240010300201000060020100007005735114002110910100003001010000010100013110002101471000000100025205713469814300001001010000300107005270052700527005270145
40024700575241001100201001700426978159716254001830016100023001010000617045334254249669777005770057646753649824001030020100006002010000700573511400211091010000300101000001010002311000203161000011112025204712469804300061010010000300107005870058700587005870152
40024700575251000000701001700456970259701254001830016100013001010000617027334235049669717003570051646533649764001030020100006002010000700513511400211091010000300101000001010000011000084211000011112025204713569823300061001010000300107005870042700587005870058
400247009352500000005235210007003669702597162540018300161000130010100006169913342254496697170035700356466936497640010300201022060020100007005135114002110910100003001010000010100000110000021631000011111025204714369820300030101010000300107004270042700587005870058
40024700785270000000565010007003669775597102540014300161000230010100006170453342542496697770057700576467836496640010300201000060020100007005735114002110910100003001010000010100012110002190310000001000252057124698223000310101010000300107005870058700427005870058

Test 3: throughput

Count: 8

Code:

  ldursh x0, [x6, #1]
  ldursh x0, [x6, #1]
  ldursh x0, [x6, #1]
  ldursh x0, [x6, #1]
  ldursh x0, [x6, #1]
  ldursh x0, [x6, #1]
  ldursh x0, [x6, #1]
  ldursh x0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)030e0f191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267232000004100012669221800258010010080000100800155001177116492364202672726727166503166858010020080000200800002673671118020110099100800001008000001008000039800390398003961353905110116112672401060800001002672826728267282672826728
8020426722200010000022670730181225801001008000010080000500116780849236470267272672716650316686801002008000020080000267147111802011009910080000100800000100800003980039135801696135430511011611267240062800001002672826728267082672326728
802042672720000000102267120121211258010010080000100800005001167231492364702670726727166503166658010020080000200800002672771118020110099100800001008000001008000039800390398003960354305110116112672400100800001002672826723267282672326723
80204267272000004101022671221212162580100100800001008000050011665254923647026707267271665031668580100200800002008000026739561180201100991008000010080000010080000398003933580039010430511011611267240660800001002672826728267282672826728
8020426707200000450001267222121816258010010080000100800005001165789492364202672726722166503167278010020080000200800002674171118020110099100800001008000001008000039800390398003961354305110416112672401064800001002672826728267232672326723
8020426722200000450001267122121216258010010080000100800005001159747492364702670726727166303167418010020080000200800002672771118020110099100800001008000001008000039800350398003561364305110216112672401002800001002670826728267082672826728
802042672720000045000026707212120258010010080000100800005001165789492364702672726727166303166858010020080000200800002672256118020110099100800001008000001008000039800350398003961393905110116112672401004800001002672826728267282672826728
802042672720000045000126712012121625801001008000010080000500116723149236470267072672716630316685801002008000020080000267357211802011009910080000100800000100800003980039039800356104305110116112670406104800001002672826728267232672826728
80204267272000004500022671220011258010010080000100800005001167231492362702672226722166453166858010020080000200800002672971118020110099100800001008000001008000039800000358003561353905110116112672401064800001002672826728267282672826708
802042672720000045000226692212012258010010080000100800005001167231492364702672726727166503166858010020080000200800002673671118020110099100800001008000001008000039800350398000001394305110116112672401064800001002670826708267282670826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)090e0f1e1f223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526737200000104401126693201162580010108000010800005011683220492365102670826731166523167118001020800002080000267317711800211091080000108000011080000000800381041800006139000502021161717267280010780000102673226732267092673226709
8002426739200000104401126724010162580010108000010800005011675010492365102673126708166763166888001020800002080000267087711800211091080000108000001080000043080038105080000613944005020161717162672801410780000102673226709267092671426731
80024267082000000044010266932111925800101080000108000050116973704923651026733267081667631668880010208000020800002673156118002110910800001080000010800000008003800680038603944005020161617162672401410780000102673226732267322672926732
8002426749201000004500126716012002580010108000010800005011671240492365102670826731166763166888001020800002080000267087711800211091080000108000001080000044080000104880038603944005020171617172670501414780000102682226716267092672926741
8002426740200000000001267160118825800101080000108000050116884314923628026728267271665231671180010208000020800002673177118002110910800001080000010800000008000000448003861394400502019161717267281010480000102673226729267092670926728
80024267152010000044000267202011625800101080000108000050116712414923651326732267351667631668880010208000020800002670856118002110910800001080000010800000430800001056800386139000502017161717267280140080000102673226709267292673226732
80024267212000000000112669301272125800101080000108000050116712404923651026731267271667631670880010208000020800002672877118002110910800001080000010800000440800391057800000100005020171618172672801414780000102673226732267092673226728
8002426743201000104500026713212116258001010800001080000501168843049236280267312673116676316711800102080000208000026731561180021109108000010800000108000004308003820418000061044005020181617172672801414780000102673226732267322670926729
8002426737201000000000267162001625800101080000108000050116688604923628026731267081667631668880010208000020800002673156118002110910800001080000010800000008003910458000061394400502019162119267050010780000102673226728267322670926714
80024267272000001045011267162110258001010800001080000501167201149236280267312670816676316711800102080000208000026708561180021109108000010800001108000004308000020158003901394300502012161817267280014780000102673226731267092673226713