Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (Q)

Test 1: uops

Code:

  ldr q0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)0305080b0e0f1e22233a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
1005394300001451013792121220251000100010001498936939439421732531000100010003943941110011000100001000043103900391039613943007321611391101041000395395395395395
1004394200000451013792121216251000100010001498936939439422132531000100010003983941110011000100001000043103900381038613943007311611391141441000395395395395395
1004394300001451013792121216251000100010001527437339439421732561000100010003943941110011000100001000043103900391039613943007311611391101041000399399395395395
1004394300000451013802121216251000100010001501836939439421732591000100010003983941110011000100001000043103900391039613943007311611391101041000396395395395395
10043943000004510138321116251000100010001501837339839822132611000100010003983941110011000100001000043103800381038613944007311611395101471000395395399399399
10043983000004401137921119251000100010001501836939839822132561000100010003983941110011000100001000043103800391038613944007311611391101441000395395399395399
1004394300000440113832121219251000100010001501837339839422132611000100010003943941110011000100011000043103800391039613943007311611391101041000395395395395399
1004394300000450113792121219251000100010001503436939839822132521000100010003983941110011000100001000043103800391038613943007311611395101041000395395395399395
10043942000004501137921116251000100010001520837339839822132601000100010003983941110011000100001000043103800381039613944007311611395141041000395399399395395
10043943000114501138321119251000100010001527437339839822132561000100010003993941110011000100001000043104000381038613943007311611395141471000399399399399399

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr q0, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0053

retire (01)cycle (02)03050708090a0b0e18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512005390010100110020100012002611951510946925601064010210002100003010010000100001079053573575061373971120032012005612005611315131136735010030200100001000060200100001000012005712005311502011009910040100100001000001001000121100020111000011110003210410722119660400040651000040100120042120057120057120057120042
5020412005389910000100074168100012002611951510946925601034010410001100003010010000100001079026573647661364191120017012005312005311315131136305010030200100001000060200100001000012005612005311502011009910040100100001000011001000321100020111000011011003210210722119660400049001000040100120086120042120057120057120057
5020412006590010100000010000012004111951210946625601064010210002100003010010000100001079053573647661334581120029312004112005311317431136745010030200100001000060200100001000012005612004111502011009910040100100001000011001000110100010011000001112003210213522119663400020601000040100120057120057120042120057120057
5020412004190010100000010100012002611951510946625601034010410001100003010010000100001079053573647661334580120017012004112005311313731136735010030200100001000060200100001000012005612005311502011009910040100100001000001001000421100020111000011110003210210722119668400020051000040100120054120054120042120057120057
5020412005389910000000010000012004111951210946625601064010210002100003010010000100001079026573633261364190120029012004112004111314831136705010030200100001000060200100001000012005612005311502011009910040100100001000001001000211100020111000001010003210213532119654400049051000040100120057120057120057120054120057
5020412004189910000100020000012003811951210946925601064010410002100003010010000100001079026573575061364701120017012005312005311315131136735010030200100001000060200100001000012004112005311502011009910040100100001000001001000230100020211000011010003210210722119660400040051000040100120057120057120057120057120042
50204120053899101001000700000120026119515109455256010340104100021000030100100001000010790265735750613345811200170120053120053113137281137155010030200100001000060200100001000012005612005311502011009910040100100001000001001000111100020011000001110003210210722119763400049051000040100120062120042120054120054120057
5020412005689910000000020100012004111951310946925601034010410002100003010010000100001079053573633261365720120017012005312005611314831136735010030200100001000060200100001000012005612004111502011009910040100100001000011001000231100010111000011011003210213522119654400020651000040100120058120055120042120057120057
5020412004189910000000010100012004111951310945525601064010210002100003010010000100001079053573633261365721120017012004112005311313731136705010030200100001000060200100001000012004112005311502011009910040100100001000011001000111100010011000011110003210210776121466401979051000040100122359122047120908122230122390
5020412238191610000202525331121121000122369119513109469526010640102100021000030100100001000011021885754766616318901218130122285122143113960280115108552183327511502114586769211389114591226251219942815020110099100401001000010000010010036711006200537891002711110004463310723120888401901010101000040100120135121418121852122336120060

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire (01)cycle (02)0305080b0e0f191e22243f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200628991100101000120020119509109464256001340012100011000030010100001000010795575736236613366211200300120054120054113172311369050010300201000010000600201000010000120051120035115002110910400101000010000110100000110000000100001010000314051073311965040002131001000040010120036120055120055120052120055
50024120054899000000100120039119512109449256001040012100011000030010100001014810795175736236613275711200300120051120051113172311367450010300201000010000600201000010000120054120035115002110910400101000010000010100000110000000100001000000314051074411965040002131391000040010120061120064120042120061120042
50024120060900100000700120020119492109467256001040012100011000030010100001000010795845736236613366211200110120051120035113169311369350010300201000010000600201000010000120051120035115002110910400101000010000010100000110000000100000010000314021072411966940002131391000040010120055120055120055120052120036
50024120035899000000000120036119509109467256001340012100001000030010100001000010795845736380613366201200300120035120035113169311369050010300201000010000600201000010000120051120051115002110910400101000010000110100000110000000100001010000314021075311966940002130121000040010120052120055120055120055120055
50024120054899000000100120039119492109467256001040012100011000030010100001000010795845735455613366201200300120035120051113169311369050010300201000010000600201000010000120035120035115002210910400101000010000010100000010000100100001010000314021073411966940000101091000040010120055120036120055120036120036
500241200548990000001001200201194921094492560010400101000010000300101000010000107958457363806133815112003001200541200351131533113693500103002010000100006002010000100001200511200511150021109104001010000100001101000001100000001000010000003140310744119669400021313121000040010120055120036120055120036120036
5002412003589900000010012003611951210944925600134001210001100003001010000100001079584573623661338150120030012005412005411317231136935001030020100001000060020100001000012005112009311500211091040010100001000001010000001000020310000101000031404107341196694000701091000040010120052120036120036120055120036
500241200548990000001001200391195121094672560013400101000110000300101000010000107958457354556133662112002701200961200541131553113674500103050110106100646002010000100001200541200511150021109104001010000100000101000111100010011000011111003140510742119675400041313121000040010120061120058120042120061120061
500241200608991110001001200201195121094642560013400101000010000300101000010000107952657362366132757112003501200541200541131533113693500103002010000100006002010000100001200391200351150021109104001010000100001101000001100000001000010100003140310744119669400021313121000040010120036120055120055120055120036
500241200548990000001001200391195151094702560016400121000210000300101000010000107963857365246133216112003601200411200601131782211368050010300201000010000600201000010000120041120049115002110910400101000010000010100012110001001100001111000314041073311965040000013121000040010120055120055120055120052120055

Test 3: throughput

Count: 8

Code:

  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  ldr q0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)03050708090b0e0f18191e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9fa0a3a5a6a8a9acafb5b6bbdcache load miss (bf)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205267322001011000006500032671721818152580100100800001008000050011670261267172672726727166503166858010020080000200800002672726722218020110099100100800008000010080000390800350039800396135005110116112671901060800001002672826728267282672826735
8020426722201000000000450102267160121216258010010080000100800005001167231126702267272672716650316685801002008000020080000267272670711802011009910010080000800001008000039080039004280039613543051101161126719010104800001002672826708267282672826708
8020426727200000000100410002267122181816258010010080000100800005001165789126702267272672716650316680801002008000020080000267222672211802011009910010080000800001008000039080039003980000613943051101161126719010104800001002672826728267282672326728
802042672720000000000041000126712218121625801001008000010080000500116723112670226727267271663031668580100200800002008000026707267221180201100991001008000080000100800003908003900398003561353905110116112671901062800001002672826708267082672826728
8020426727200000001100410002267122181816258010010080000100800005001167231126702267272670716650316685801002008000020080000267272672211802011009910010080000800001008000039080039003980039013943051101161126728010100800001002672826728267282672826728
802042672720000000100041000026712218121825801001008000010080000500116780812669726728267271665031668580100200800002008000026722267221180201100991001008000080000100800003908003914388003961043051101161126737010104800001002670826728267332672826735
8020426722200000001100410010267070181816258010010080000100800005001166525126702267512672716645316685801002008000020080000267272672211802011009910010080000800001008000039080039024880039613943051101161126707010100800001002672826728267282672826728
802042672720000000100045001226712218121225801001008000010080000500116780802669726722267271664531668580100200800002008000026727267221180201100991001008000080000100800004308003500398003561354305110116112671901060800001002672826728267282672826728
80204267072000000000004500022671221212162580100100800001008000050011672311267102672726727166503166808010020080000200800002672226722118020110099100100800008000010080000390800000039800356104305110116112672401062800001002672826723267232672826728
8020426727200000001000450012267122181216258010010080000100800005001167231126703267272672716650316685801002008000020080000267222672211802011009910010080000800001008000039080039003580039613943051101161126719010104800001002672826728267282672326727

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)03050708090b0e0f191e1f22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002526736200101110006601012672137702580010108000010800005011682860126712267362673716681316716800102080000208000026737267361180021109101080000800000108002020430800190006480040615943190502041642267341313080000102673726737267372673726750
8002426715200101110006701032672227719258001010800001080000501168283012711926764267371668131671680010208000020800002673626737118002110910108000080000010800201900800591016080041611943181502041654267331313580000102673726737267372674126760
800242675520010000100670003267213771825800101080000108000050115823001267122674026753170643166958001020800002080000267362673611800211091010800008000001080020194308005910121800406158431915020416242673300580000102671626737267372673726814
80024267392001001100067010227312270192580010108000010800005011677910126711267372671416681316695800102080000208000026715267361180021109101080000800000108001919430800180016180000615843191502051626267331313080000102673726737267382671626737
8002426736200101110006701022672830712580010108000010800005011690500126717267502671816664321671980010208000020800002671726737218002110910108000080000010800201945122800601006380039615845190506621624267331313080000102673826738267382671626737
80024267362001011000029701022672227712580010108000010800005011690500126711267362673616681316716800102080000208000026737267361180021109101080000800000108002120450800191016380039015843190502041664267341313080000102671626738267382671526737
80024267362001011100012301022669937019258001010800001080000501171764012671526837267211668131669580010208000020800002673726715118002110910108000080000010800192043080059000618000060594319050202162426734130580000102673826716267472722826928
80024267392071011111070000326722207192580010108000010800005011684720126690267142673616681316716800102080000208000026737267361180021109101080000800000108002020450800590022180041615843190502021624267331313080000102671626737267372671626740
8002426736200111111006600032673137702580010108000010800005011739750126715267842698716697316695800102080000208000026737267141180021109101080000800000108002119430800590022180040615943190502021642267331313580000102673826737267382673726737
800242673620010111110660003267210772025800101080000108000050116721901267122674526715166923166948001020800002080000267362673711800211091010800008000001080020194308001910021800390159431925020416642673400580000102687526744267502673726737