Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDURSB (64-bit)

Test 1: uops

Code:

  ldursb x0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e223a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10053893001110480237921212162510001000100015037039439421732521000100010003947111100110001000010003910390039103961353973116113969921000400400400399400
100439931101114112379212121625100010001000149890394389212325210001000100039472111001100010000100039103900391039613539731161139110641000395395395395395
100439430000104502379212121625100010001000149890389389212325210001000100039471111001100010000100039103900391039613539731161138610641000395395395395395
100439430000004502379212121625100010001000150370394394216325210001000100039471111001100010000100039103900391039613543731161139110641000395395395395395
1004394300001145123792121216251000100010001503703893942163252100010001000394711110011000100001000391039003910396139437311611391101041000395395395395395
1004394300001145023792121216251000100010001498903893892173249100010001000394711110011000100001000391039003910396135437311611391101041000395395395395395
1004394200000046113792121216251000100010001501803993942173252100010001000394721110011000100001000391039003910396135437311611386101021000395395395395390
100438930000114511374218181225100010001000148380394394217324710001000100038971111001100010000100039103900391039613539731161139110641000395395395390395
1004394200001145023792121216251000100010001503703943942173252100010001000394711110011000100001000431039003910396135437311611391101041000395395390390390
1004389300011141123743121216251000100010001503703943942163252100010001000394711110011000100001000391039003910396135437311611386101041000395395395395395

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldursb x0, [x6, #1]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)030e0f18191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570047524110010107002069781597482540104301031000730403100076155673342615049669707003570035646996649844011830230100106026010010700503521402011009910010000301001000001001000001000055010000101111261901600698553000096610000301007003670051700517005170105
4020470072524000010107002069786596952540104301031000130100100006160053342206049669707003570047646433649384010030200100006020010000700503511402011009910010000301001000001001000011000011210000100000261017111698133000306910000301007003670051700367005170090
4020470050524000010007003569781596952540104301001000130100100006160053341470049669757005070050646463649534010030200100006020010000700353511402011009910010000301001000001001000001000050010000101000261017111697983000390910000301007004870051700487005170070
4020470050524000010107002069786597092540104301031000130100100006160053341470049669707003570050646463649534010030200100006020010000700503511402011009910010000301001000001001000011000036010000101000261017111698133000396010000301007003670051700487003670101
4020470050525000000107002069786597092540100301031000130100100006160053341470049669677004770047646433649384010030200100006020010000700503511402011009910010000301001000001001000001000041310000100000261017111698133000096010000301007003670051700517005170101
402047003552500001000700206976459695254010430103100013010010000616005334147004966970700507005064643364938401003020010000602001000070050352140201100991001000030100100000100100000100002010000101000261017111698133000090010000301007005170036700367003670099
4020470047524000010007002069764597092540100301031000030100100006160053342062049669707005070050646463649384010030200100006020010000700503511402011009910010000301001000001001000011000037010000101000261017111698133000399010000301007005570051700377005170048
4020470052525000010107002069781596952540100301031000130100100006160053341470049669557005070050646313649384031030200100006020010000700353511402011009910010000301001000001001000011000027010000101000261017111698133000300910000301007005170051700367003670101
4020470050524000000007003569764597092540104301031000130100100006161753342062049669707005070050646313649384010030200100006020010000700503511402011009910010000301001000001001000011000029010000100000261017111697983000306910000301007005170051700517005170077
4020470050524000000007003569786597092540100301031000030100100006166093342110049669677005070050646463649504010030200100006020010000700503511402011009910010000301001000001001000001000051010000100000261017111698133000396010000301007005170051700367005170081

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257005652410011022110070041697805971525400183001610002300101000061703633424940496697370056700566467136496640010300201000060020100007005635114002110910100003001010000110100011010001010110000111122520017111698163000396010000300107004270057700577005470057
40025700565241100002600170041697805970125400183001610002300101000061699533424940496697670041700416465936498140010300201000060020100007005635114002110910100003001010000010100012110002001110000110112520017111698183000699910000300107005770057700577005770057
40024700565241010004400170041697805970125400143001610002300101000061699533424941496697670056700566467436498140010300201000060020100007005635114002110910100003001010000010100021110002000110000111102520017111698243000606910000300107005770057700577004270057
40024700565251100003800070026697775970125400143001610002300101000061703633424940496696170061700566475836498140010300201000060020100007005335114002110910100003001010000010100013010001011410000011122520017112698193000669010000300107005770054700547005470042
40024700565251011004900070041697025970125400143001610002300101000061703633424940496698470056700566467136498140010300201000060020100007004135114002110910100003001010000010100012110001000110000111112520017111698193000699610000300107005770057700577005470054
40024700565251111003800070041697805971525400183001610002300101000061703633424940496697670056700416467136496640010300201000060020100007005635114002110910100003001010000010100012010001001110000111122520017111698193000396010000300107006270058700547005770057
40024700545251011006810070038697025970125400143001610002300101000061700933423500496697670056700416467136497840010300201000060020100007005635114002110910100003001010000010100012110001001110000111112520017111698043000390610000300107005470042701587005770057
40024700415251101005910170041697025970125400183001610001300101000061700933424940496696170056700416467136503640010300201000060020100007005635114002110910100003001010000010100013110002001110000111102520027111698193000609910000300107005770042700577004270057
400247005652511110040000700416978059701254001830016100023001010000617036334249414966961700567005664671246496640010300201000060020100007004135114002110910100003001010000010100011010002000110000111102520017111698193000669610000300107005770042700587004270057
40024700565241101003510070026697025971525400183001610002300101000061699533424941496697670056700416476736496640010300201000060020100007005635114002110910100003001010000010100011110003010410000011102520027111698193000306910000300107005770057700427005770054

Test 3: throughput

Count: 8

Code:

  ldursb x0, [x6, #1]
  ldursb x0, [x6, #1]
  ldursb x0, [x6, #1]
  ldursb x0, [x6, #1]
  ldursb x0, [x6, #1]
  ldursb x0, [x6, #1]
  ldursb x0, [x6, #1]
  ldursb x0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03mmu table walk data (08)0e0f181e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802052672320001000100267070181812258010010080000100800005001167808492362732682826742166593166658010020080000200800002672271118020110099100800001008000011008000039800000358003661350511011611267300660800001002672326708267232670826708
8020426707200010041000267072181212258010010080000100800005001166525492364202670726722166453166808010020080000200800002672256118020110099100800001008000001008000039800351358003561039511111611267230060800001002672326723267232672326723
802042672220000004110126707201212258010010080000100800005001166525492362702670726722166453166808010020080000200800002672271118020110099100800001008000011008000008003503580000613539511011623267260062800001002672326723267232672326723
8020426707200000041101267072018124980100100800001008000050011678084923627026722267071664531666580100200800002008000026722711180201100991008000010080000010080000398000003580035613539511011621267040602800001002672326723267082670826708
80204267222000000410002670721818025801001008000010080000500116652549236270267222670716645316680801002008000020080000267225611802011009910080000100800000100800000800350358000001360511021611267190062800001002670826723267082672326728
8020426707200000041001267072018025801001008000010080000500116652549236420267072672216630316680801002008000020080000267227111802011009910080000100800000100800003980035008003560043511111611267230662800001002672326708267232672326723
80204267072000000411012670701801225801001008000010080000500116652549236420267072670716645141668080100200800002008000026876721180201100991008000010080000010080000398003500800000000511011611267320660800001002670826708267232672326723
802042672220000004110026707201802580100100800001008000050011668184923642026707267071664531666580100200800002008000026722711180201100991008000010080000010080000398003500800356100511011631267810660800001002672326723267232672326723
80204267222000000410002670720012258010010080000100800005001167808492364702672226722166303166808010020080000200800002672271118020110099100800001008000001008000039800350080035613539511011611267190662800001002672326723267232670826723
8020426707200011001012669221801225801001008000010080000500116652549236270267222672216630316680801002008000020080000267227111802011009910080000100800000100800003980000039800006100511011611267190600800001002672326708267082672326723

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)030e0f1e223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526727200015910268860121219258001010800001080000501166750114923651267082679916891316718800102080000208000026731561180021109108000010800001108000043800390388003861000502013161713267251010480000102670926729267282672826728
800242672820000590126712201219258001010800001080000501167124014923628267322672716672316713800102080000208000026727771180021109108000010800000108000043800380388003860000502013161915267281414780000102672926709267322670926732
80024267272000015112671601119258001010800001080000501166896004923651267082672716693316813800102080000208000026727771180021109108000010800000108000043800380080038013943050201116121626728014780000102673226709267282672826709
80024267312000026911268590121202580010108000010800005011667500149236472673126731166723167138001020800002080000267315611800211091080000108000001080000438003804380038610430502017161213267661414780000102672826732267292672926709
8002426708201004700267160101925800101080000108000050116884301492364826708267081667231669580010208000020800002672856118002110910800001080000010800000800380080038613843050201116131026728140780000102670926709267092673226709
80024267312000059002678021212212580010108000010800005011672010149236512673126731166723167198001020800002080000267315611800211091080000108000001080000438003803880038613900502013161414267251414080000102672926732267322673226732
80024267272001133112671320102580010108000010800005011671240149236282670826731166723166958001020800002080000267315611800211091080000108000001080000438003903980039613900502016161416267051010480000102672826709267282670926729
80024267312010060012669301119258001010800001080198501167201114923651267312670816672316716800102080000208000026731801180021109108000010800000108000043800000388000061394405020121613162672500780000102672926729267322672826709
80024267312000000126716211162580010108000010800005011672010149236512672826708166723167078001020800002080000267085611800211091080000108000001080000080000038800386139440502016161216267241010080000102670926732267322672826709
800242672820000241126693210025800101080000108000050116712401492364726727267081667231671580010208000020800002672856118002110910800001080000010800004380039008004000390050201816141726705710780000102670926728267092672926728