Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (Q)

Test 1: uops

Code:

  ldr q0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005394300001451013792121220251000100010001498936939439421732531000100010003943941110011000100001000043103900391039613943007321611391101041000395395395395395
1004394200000451013792121216251000100010001498936939439422132531000100010003983941110011000100001000043103900381038613943007311611391141441000395395395395395
1004394300001451013792121216251000100010001527437339439421732561000100010003943941110011000100001000043103900391039613943007311611391101041000399399395395395
1004394300000451013802121216251000100010001501836939439421732591000100010003983941110011000100001000043103900391039613943007311611391101041000396395395395395
10043943000004510138321116251000100010001501837339839822132611000100010003983941110011000100001000043103800381038613944007311611395101471000395395399399399
10043983000004401137921119251000100010001501836939839822132561000100010003983941110011000100001000043103800391038613944007311611391101441000395395399395399
1004394300000440113832121219251000100010001501837339839422132611000100010003943941110011000100011000043103800391039613943007311611391101041000395395395395399
1004394300000450113792121219251000100010001503436939839822132521000100010003983941110011000100001000043103800391038613943007311611395101041000395395395399395
10043942000004501137921116251000100010001520837339839822132601000100010003983941110011000100001000043103800381039613944007311611395141041000395399399395395
10043943000114501138321119251000100010001527437339839822132561000100010003993941110011000100001000043104000381038613943007311611395141471000399399399399399

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr q0, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5020512005390010100110020100012002611951510946925601064010210002100003010010000100001079053573575061373971120032012005612005611315131136735010030200100001000060200100001000012005712005311502011009910040100100001000001001000121100020111000011110003210410722119660400040651000040100120042120057120057120057120042
5020412005389910000100074168100012002611951510946925601034010410001100003010010000100001079026573647661364191120017012005312005311315131136305010030200100001000060200100001000012005612005311502011009910040100100001000011001000321100020111000011011003210210722119660400049001000040100120086120042120057120057120057
5020412006590010100000010000012004111951210946625601064010210002100003010010000100001079053573647661334581120029312004112005311317431136745010030200100001000060200100001000012005612004111502011009910040100100001000011001000110100010011000001112003210213522119663400020601000040100120057120057120042120057120057
5020412004190010100000010100012002611951510946625601034010410001100003010010000100001079053573647661334580120017012004112005311313731136735010030200100001000060200100001000012005612005311502011009910040100100001000001001000421100020111000011110003210210722119668400020051000040100120054120054120042120057120057
5020412005389910000000010000012004111951210946625601064010210002100003010010000100001079026573633261364190120029012004112004111314831136705010030200100001000060200100001000012005612005311502011009910040100100001000001001000211100020111000001010003210213532119654400049051000040100120057120057120057120054120057
5020412004189910000100020000012003811951210946925601064010410002100003010010000100001079026573575061364701120017012005312005311315131136735010030200100001000060200100001000012004112005311502011009910040100100001000001001000230100020211000011010003210210722119660400040051000040100120057120057120057120057120042
50204120053899101001000700000120026119515109455256010340104100021000030100100001000010790265735750613345811200170120053120053113137281137155010030200100001000060200100001000012005612005311502011009910040100100001000001001000111100020011000001110003210210722119763400049051000040100120062120042120054120054120057
5020412005689910000000020100012004111951310946925601034010410002100003010010000100001079053573633261365720120017012005312005611314831136735010030200100001000060200100001000012005612004111502011009910040100100001000011001000231100010111000011011003210213522119654400020651000040100120058120055120042120057120057
5020412004189910000000010100012004111951310945525601064010210002100003010010000100001079053573633261365721120017012004112005311313731136705010030200100001000060200100001000012004112005311502011009910040100100001000011001000111100010011000011110003210210776121466401979051000040100122359122047120908122230122390
5020412238191610000202525331121121000122369119513109469526010640102100021000030100100001000011021885754766616318901218130122285122143113960280115108552183327511502114586769211389114591226251219942815020110099100401001000010000010010036711006200537891002711110004463310723120888401901010101000040100120135121418121852122336120060

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e22243f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
500251200628991100101000120020119509109464256001340012100011000030010100001000010795575736236613366211200300120054120054113172311369050010300201000010000600201000010000120051120035115002110910400101000010000110100000110000000100001010000314051073311965040002131001000040010120036120055120055120052120055
50024120054899000000100120039119512109449256001040012100011000030010100001014810795175736236613275711200300120051120051113172311367450010300201000010000600201000010000120054120035115002110910400101000010000010100000110000000100001000000314051074411965040002131391000040010120061120064120042120061120042
50024120060900100000700120020119492109467256001040012100011000030010100001000010795845736236613366211200110120051120035113169311369350010300201000010000600201000010000120051120035115002110910400101000010000010100000110000000100000010000314021072411966940002131391000040010120055120055120055120052120036
50024120035899000000000120036119509109467256001340012100001000030010100001000010795845736380613366201200300120035120035113169311369050010300201000010000600201000010000120051120051115002110910400101000010000110100000110000000100001010000314021075311966940002130121000040010120052120055120055120055120055
50024120054899000000100120039119492109467256001040012100011000030010100001000010795845735455613366201200300120035120051113169311369050010300201000010000600201000010000120035120035115002210910400101000010000010100000010000100100001010000314021073411966940000101091000040010120055120036120055120036120036
500241200548990000001001200201194921094492560010400101000010000300101000010000107958457363806133815112003001200541200351131533113693500103002010000100006002010000100001200511200511150021109104001010000100001101000001100000001000010000003140310744119669400021313121000040010120055120036120055120036120036
5002412003589900000010012003611951210944925600134001210001100003001010000100001079584573623661338150120030012005412005411317231136935001030020100001000060020100001000012005112009311500211091040010100001000001010000001000020310000101000031404107341196694000701091000040010120052120036120036120055120036
500241200548990000001001200391195121094672560013400101000110000300101000010000107958457354556133662112002701200961200541131553113674500103050110106100646002010000100001200541200511150021109104001010000100000101000111100010011000011111003140510742119675400041313121000040010120061120058120042120061120061
500241200608991110001001200201195121094642560013400101000010000300101000010000107952657362366132757112003501200541200541131533113693500103002010000100006002010000100001200391200351150021109104001010000100001101000001100000001000010100003140310744119669400021313121000040010120036120055120055120055120036
500241200548990000001001200391195151094702560016400121000210000300101000010000107963857365246133216112003601200411200601131782211368050010300201000010000600201000010000120041120049115002110910400101000010000010100012110001001100001111000314041073311965040000013121000040010120055120055120055120052120055

Test 3: throughput

Count: 8

Code:

  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267322001011000006500032671721818152580100100800001008000050011670261267172672726727166503166858010020080000200800002672726722218020110099100100800008000010080000390800350039800396135005110116112671901060800001002672826728267282672826735
8020426722201000000000450102267160121216258010010080000100800005001167231126702267272672716650316685801002008000020080000267272670711802011009910010080000800001008000039080039004280039613543051101161126719010104800001002672826708267282672826708
8020426727200000000100410002267122181816258010010080000100800005001165789126702267272672716650316680801002008000020080000267222672211802011009910010080000800001008000039080039003980000613943051101161126719010104800001002672826728267282672326728
802042672720000000000041000126712218121625801001008000010080000500116723112670226727267271663031668580100200800002008000026707267221180201100991001008000080000100800003908003900398003561353905110116112671901062800001002672826708267082672826728
8020426727200000001100410002267122181816258010010080000100800005001167231126702267272670716650316685801002008000020080000267272672211802011009910010080000800001008000039080039003980039013943051101161126728010100800001002672826728267282672826728
802042672720000000100041000026712218121825801001008000010080000500116780812669726728267271665031668580100200800002008000026722267221180201100991001008000080000100800003908003914388003961043051101161126737010104800001002670826728267332672826735
8020426722200000001100410010267070181816258010010080000100800005001166525126702267512672716645316685801002008000020080000267272672211802011009910010080000800001008000039080039024880039613943051101161126707010100800001002672826728267282672826728
802042672720000000100045001226712218121225801001008000010080000500116780802669726722267271664531668580100200800002008000026727267221180201100991001008000080000100800004308003500398003561354305110116112671901060800001002672826728267282672826728
80204267072000000000004500022671221212162580100100800001008000050011672311267102672726727166503166808010020080000200800002672226722118020110099100100800008000010080000390800000039800356104305110116112672401062800001002672826723267232672826728
8020426727200000001000450012267122181216258010010080000100800005001167231126703267272672716650316685801002008000020080000267222672211802011009910010080000800001008000039080039003580039613943051101161126719010104800001002672826728267282672326727

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526736200101110006601012672137702580010108000010800005011682860126712267362673716681316716800102080000208000026737267361180021109101080000800000108002020430800190006480040615943190502041642267341313080000102673726737267372673726750
8002426715200101110006701032672227719258001010800001080000501168283012711926764267371668131671680010208000020800002673626737118002110910108000080000010800201900800591016080041611943181502041654267331313580000102673726737267372674126760
800242675520010000100670003267213771825800101080000108000050115823001267122674026753170643166958001020800002080000267362673611800211091010800008000001080020194308005910121800406158431915020416242673300580000102671626737267372673726814
80024267392001001100067010227312270192580010108000010800005011677910126711267372671416681316695800102080000208000026715267361180021109101080000800000108001919430800180016180000615843191502051626267331313080000102673726737267382671626737
8002426736200101110006701022672830712580010108000010800005011690500126717267502671816664321671980010208000020800002671726737218002110910108000080000010800201945122800601006380039615845190506621624267331313080000102673826738267382671626737
80024267362001011000029701022672227712580010108000010800005011690500126711267362673616681316716800102080000208000026737267361180021109101080000800000108002120450800191016380039015843190502041664267341313080000102671626738267382671526737
80024267362001011100012301022669937019258001010800001080000501171764012671526837267211668131669580010208000020800002673726715118002110910108000080000010800192043080059000618000060594319050202162426734130580000102673826716267472722826928
80024267392071011111070000326722207192580010108000010800005011684720126690267142673616681316716800102080000208000026737267361180021109101080000800000108002020450800590022180041615843190502021624267331313080000102671626737267372671626740
8002426736200111111006600032673137702580010108000010800005011739750126715267842698716697316695800102080000208000026737267141180021109101080000800000108002119430800590022180040615943190502021642267331313580000102673826737267382673726737
800242673620010111110660003267210772025800101080000108000050116721901267122674526715166923166948001020800002080000267362673711800211091010800008000001080020194308001910021800390159431925020416642673400580000102687526744267502673726737