Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (64-bit)

Test 1: uops

Code:

  ldr x0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100538930000065103383218181625100010001000153753983992213256100010001000399811110011000100001019194210581159103861574219173216113969921000400400400400400
100439931100165102384218181625100010001000153753983982213257100010001000399811110011000100001021194210571159103861574219173116113959921000400400399400400
100439931100065002384218181625100010001000153753983992223257100010001000402811110011000100001020204210571158103861574119073116113959921000400399400400400
100439931000065002383218181625100010001000153153993992213257100010001000399811110011000100001019194210570059103861574219173116113969921000400399400400400
100439931001065003384218181625100010001000153753983992213257100010001000399811110011000100001019204210570259103861574119173116113969921000400400399400400
100439931000066003384218181625100010001000153343993982223257100010001000399821110011000100001020214210571059103761574219073116113959921000399400400400400
100439931000065103384218181525100010001000153573993992223256100010001000399811110011000100001020194210571059103861574219073116113959921000400399400400400
100439931010065103385218181625100010001000153493993992213260100010001000402821110011000100001018204210571259103861574219073116113969921000400402400400399
100439831000065002384218181625100010001000153623994002213257100010001000399811110011000100001019204210561060103861574219073116113969921000400400400399401
100439931100065103384218181625100010001000153073983992213257100010001000399811110011000100001019204210571159103961574219373116113969921000400399400400400

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldr x0, [x6]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03mmu table walk data (08)090e0f18191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570035525010100101070035697815970625401043010310001301001000061600533420621496697070050700506464322650164010030200100006020010000700503511402011009910010000301001000011001000001100000001510000101261027111697983000366010000301007003670036700487005170055
40204700525240000001010700356976459709254010030100100013010010000616005334220614966970700357003564646364953401003020010000602001000070050351140201100991001000030100100000100100000110001030010000101261017111698133000306910000301007005170051700517005170051
402047003552500000010107003569781597092540100301001000130100100006161753342206049669707003570050646313649534010030200100006020010000700503511402011009910010000301001000001001000001100000290010000101261017111698153000360910000301007005170048700517003670067
402047003552400000010107003569781597092540100301001000030100100006161753342206149669707005070035646463649384010030200100006020010000700353511402011009910010000301001000001001000000100000650010000000261017111698103000306010000301007003670051700517003670103
402047003552500000000107003569735597092540100301031000030100100006161753341470049669707003570047646313649384010030200100006020010000700503511402011009910010000301001000001001000001100000450310007001261017111698133000390910000301007003670051700487003670062
402047004752500000010107003269781597092540104301031000130100100006160053341470149669707003570035646433649534010030200100006020010000700473511402011009910010000301001000001001000000100000500010000000261017111697983000360010000301007004870036700367003670036
402047004752500000010007003569781597092540100301001000030100100006160153342206149669757003570035646463649384010030200100006020010000700503511402011009910010000301001000001001000001100000610010000101261017111698103000006610000301007004870048700487003670059
402047004752400000060107003269781597092540104301001000030100100006160053342206049669707005570035646433649534010030200100006020010000700353511402011009910010000301001000001001000000100000350010000101261017111698103000066610000301007003670048700487004870110
402047004752500000010107003269735597062540104301031000130100100006160153341470149669677004770047646313649384010030200100006020010000700473511402011009910010000301001000001001000001100000340010000000261017111698133000360610000301007004970048700367003670072
402047003552500000060107003569764597093740100301031000130100100006161753341470149669677003570035646433649384010030200100006020010000700353511402011009910010000301001000011001000001100000650010000101261017111697983000066010000301007003670036700487004870072

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03090e0f1e22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257004752510000070035697285970625400143001310001300101000061698233414700496697070081700516466536497540010300201000060020100007004735114002110910100003001010000010100000110000401000010252037132697983000090010000300107003670048700487004870086
4002470047524100100700206972859706254001030010100013001010000616952334147004966967700357004764653364972400103002010000600201000070047351140021109101000030010100000101000001100003301000011252027133698103000066010000300107004870036700367003670052
4002470104524000110700326972859706254001030010100013001010000617068334206204966967700477003564653364972400103002010000600201000070035351140021109101000030010100000101000001100004201000011252037133698103000366610000300107004870048700487004870071
4002470035524000010700356972859695254001030013100013001010000617077334206204966955700477005064665365097400103002010000600201000070047351140021109101000030010100000101000001100003501000001252027133698103000396010000300107003670048700487003670086
4002470035524000100700206974359695254001030013100013001010000617068334206204966970700357003564653364960400103002010000600201000070047351140021109101000030010100000101000001100005601000001252027153698103000366910000300107004870048700487004870043
4002470047525000110700356972859706254001430013100003001010000616952334206204966967700477004764668364960400103002010000600201000070047351140021109101000030010100000101000001100003201000010252037133697983000066010000300107003670048700367004870083
4002470047524000000700326972859706254001430013100013001010000616982334206204966955700507003564665364960400103002010000600201000070047351140021109101000030010100000101000001100004101000001252037123698103000396610000300107004870051700487003670049
4002470137525000110700326972859695254001030013100003001010000616952334206204966955700477004764653364972400103002010000600201000070035351140021109101000030010100000101000001100007001000011252037133697983000306610000300107005170048700487003670067
4002470047525000000700206976059695254001430013100013001010000616952334147004966967700437005064653364972400103002010000600201000070047351140021109101000030010100000101000000100003601000001252037132698103000306010000300107004870051700487004870134
4002470047525011000700206972859706254001030013100013001010000616952334206204966970700477004764653364972400103002010000600201000070049351140021109101000030010100000101000001100004001000001252037132698133000000010000300107004870048700367004870107

Test 3: throughput

Count: 8

Code:

  ldr x0, [x6]
  ldr x0, [x6]
  ldr x0, [x6]
  ldr x0, [x6]
  ldr x0, [x6]
  ldr x0, [x6]
  ldr x0, [x6]
  ldr x0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)09l2 tlb miss data (0b)0e0f181e223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526728200010000451126712212125025801001008000010080000500116757814923647267272672716650316685801002008000020080000267277711802011009910080000100800000100800004380039104280000613943511011611267240104800001002672826708269452675626736
80204267272000000104501266920121224258010010080130100800005001166525149236472674126734166333166658027820080000200800002672777118020110099100800001008000001008000043801715039800390039435110116112672410104800001002670826728269582673526731
80204267272010000000012671221212702580100100800001008000050011665251492364726707267271665031668580100200800002008000026727771180201100991008000010080000010080000438003920408003961394351101161126704000800001002672826708267282687426735
8020426741200000000451126712012121649802321008000010080000500116723114923647267272672716650316665801002008000020080000267277711802011009910080000100800000100800004380039103980000603905110116112672410100800001002672826728267282682326734
802042672720000000001126712001288258010010080000100800005001166525149236272672726727166503166858010020080000200800002672777118020110099100800001008000001008000043800391039800006104351101161126704004800001002670826728267282684726733
80204267272000001104511266922008425801001008000010080000500116652514923647267072672716650316685801002008000020080000267277711802011009910080000100800000100800004380000103980039610435110116112672410104800001002672826728267372683426736
802042672720000000045112671620128925801001008000010080000500116723114923647267072672716630316685801002008000020080000267277711802011009910080000100800000100800004380039220428003961043511011611267241004800001002670826728267372686626729
8020426727200000000450126692001271258010010080000100800005001165789149236472670726727166303166858010020080000200800002672777118020110099100800001008000001008000043800391039800396139435110116112672410100800001002672826728267382682826732
802042675520000001045112669221204325801001008000010080000500116723114923647267272672716650316685801002008000020080000267275611802011009910080000100800000100800004280039003980039610435110116112672410104800001002672826728269122678026728
802042672720000000045102671220121625801001008000010080000500116652514923627267072672716630316685801002008000020080000267277711802011009910080000100800000100800004380039003980000613943511011611267240114800001002672826728269182674626728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)d9ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526736200111111211032672137774258001010800001080000501168239149236572673626715166603167178001020800002080000267368511800211090108000010800001108002118438001902022800406119431900502071606426734130580000102671526737267372673726743
8002426736200111000701002672220932580010108000010800005011653041492363526737267361665931669580010208000020800002673764118002110901080000108000011080019190800591400153800396158431910502051604626733013580000102671626737267372673826892
80024267432001010002210326722077312580010108000010800005011679391492365726718267391668431671680010208000020801922671585118002110901080000108000001080019194380058000638004001584319105020816086267331313580000102673826738267152681426737
80024267362001100006700326700277552580010108000010800005011671061492365726715267371668131669480010208000020800002673685118002110901080000108000001080020190800581016380040011901910502081608626733130080000102671626737267372691626741
800242674420011000021002267213075525800101080000108000050116773614923656267152673716681316716800102080000208000026715851180021109010800001080000010800211943800590006180000611901910502081608626733130080000102673726737267382681026723
800242671520010100067103267220707125800101080000108000050116696014923656267362673716681316716800102080000208000026736851180021109010800001080000010800192043800610116180039615801900502071605726736013580000102673726737267382681626746
8002426736200100000670032669920792258001010800001080000501165304149236562673726715166813167168001020800002080000267146411800211090108000010800000108002019438001901064800406019019005020616068267331313080000102671626737267382678426746
8002426736200100100671022672220985258001010800001080000501173975149236562673726736166593166958001020800002080000267156411800211090108000010800000108002019080059121618004001580190050206160862673300580000102673726737267372680126746
8002426715200100000211002672229772258001010800001080000501173975149236352673726737166813167168001020800002080000267366411800211090108000010800000108002020438005811060800396019451900502081608626733013080000102673726716267382682526741
80024267232001111116910326721077192580010108000010800005011669601492365726740267361668131671680010208000020800002673685118002110901080000108000011080019204380019131608000061194319205020416057267341313080000102673826737267372678326747