Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDP (signed offset, D)

Test 1: uops

Code:

  ldp d0, d1, [x6, #0x10]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b0e0f1e22233a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)92inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200540331011117010338837721251000100010001553503784044031023136100020001000403403111001010001000010202043106100061103961584319073216114021313510001000403403403404404
200440331111006610338836719251000100010001548603774024031013135100020001000405403111001010001000010192043106010160103961594319273116113991313510001000404404406404403
200440221001006610338737719251000100010001552113784034031003135100020001000404403111001010001000010192043105900160103961594319273116113991313510001000404403404403404
200440231110108500338837719251000100010001548603774074041053136100020001000403402111001010001000010191943105911161104061584319173116113991313510001000403404403404404
200440331110006900238827720251000100010001555903784034031003136100020001000403403111001010001000010202143105910161104061584319173116113991313510001000406403403404404
200440331010006600238727719251000100010001555403774024031013136100020001000402402111001110001000110192145105800161104061594319073116114001313510001000403403404404403
200440231011006710338839720251000100010001556003784034021003135100020001000402403111001010001000010192143105910060103961594319073116113991310510001000403404404403403
200440331010006610338739719251000100010001555403774024031013136100020001000402402111001010001000010201943105800061104061584319273116113991313510001000404403404403405
200440231110006710238827719251000100010001555413774024031013136100020001000402403111001010001000010201943105910160104061584319173116114001313510001000404403404404404
200440331100006700238737719251000100010001553513784034021003135100020001000403403111001010001000010202043106200160103961584419273116114001313510001000404403404404404

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldp d0, d1, [x6, #0x10]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)03040508090b0e0f18191e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
6020512005389900000110020100120026119494109458256010340102100011000030100100001000010793685736182611829701200260120050120047111881031124905010030200200001000060200100001000012003512004711502011009910040100100001000001001000000100001001000010000321021213311966840004060100001000040100120057120042120054120054120057
6020412005689901001110020100120020119494109443256010340102100001000030100100001005510796815736659611865611200260120035120050111895031123745010030200200001000060200100001000012005012004711502011009910040100100001000001001000001100000061000010100321021353311965740002690100001000040100120051120036120036120051120036
6020412005089900000000010100120035119503109443256010040100100001000030100100001000010796485736272611839911200263120035120051111881031124215010030200200001000060200100001000012004712004711502011009910040100100001000001001000000100000001000010100321031353311965540002065100001000040100120051120051120036120048120051
60204120050899000000000100100120020119494109458256010340100100001000030100100001000010793875736323611788211200260120035120050111895031124165010030200200001000060200100001000012005012004711502011009910040100100001000001001000201100000001000001112321031003311967040004090100001000040100120057120057120051120036120051
6020412005089900000000000100120020119494109443256010340102100001000030100100001000010793515736566611751811200320120041120056111897031124095010030200200001000060200100001000012004112005311502011009910040100100001000001001000310100010111000011112321031003211966840002090100001000040100120065120051120051120036120054
6020412005089900111000010000120043119530109449256010340104100011000030100100001000010798275739254611855411200320120041120053111900031124095010030200200001000060200100001000012005312005311502011009910040100100001000001001000111100020041000000100321031083311964640002698100001000040100120051120051120036120051120051
6020412003589900000000010000120035119494109455256010040102100011000030100100001000010819415736227611788201200210120035120050111903031124165010030200200001000060200100001000012005012004711502011009910040100100001000001001000001100000001000010100321031003311966240004990100001000040100120057120057120042120057120042
6020412005089900000100010000120020119503109458256010340108100001000130100100001000010791775736131611788211200260120050120050111903031124165010030200200001000060200100001000012005012004711502011009910040100100001000001001000001100000001000001011321031083211964640002095100001000040100120051120051120049120036120048
6020412004789900000000000000120036119494109455256010340102100011000030100100001000010799995736464611855701200260120050120050111903031123745010030200200001000060200100001000012005012004711502011009910040100100001000001001000000100000001000001110321031212311965140004990100001000040100120062120057120057120042120042
6020412004189901001000070000120038119530109461256010640104100021000030100100001000010799955737334611922411200290120056120041111902031124095010030200200001000060200100001000012005612004111502011009910040100100001000001001000220100030041000001111321031213311966840004960100001000040100120042120057120042120057120057

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0053

retire (01)cycle (02)030508090b0e0f181e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5e60696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
60025120052900101100020100120041119489109463256001640014100011000030010100001000010798705736326612585200120047120057120041111909031124535001030020200001000060020100001000012005312005311500211091040010100001000001010002211000100011000011111031401991111966540000068100001000040010120048120048120036120036120036
60024120035899000001020101120041119489109463256001640014100021000030010100001000010798785735593612585200120056120047120047111903031124445001030020200001000060020100001000012003512004711500211091040010100001000011010000011000000001000011112031401991111967140004060100001000040010120054120054120054120054120054
60024120053899111000020100120038119486109461256001640014100021000030158100001000010798785735593612667900120029120053120053111909031124385001030020200001000060020100001000012005612005311500211091040010100001000001010001111000201011000011100031401991111965040002908100001000040010120056120051120051120051120051
60024120047899000000020000120038119486109461256001640012100011000030010100001000010798705736182612585200120027120035120047111903031124445001030020200001000060020100001000012005312005311500211091040010100001000001010002211000100141000011011031401991111967140004665100001000040010120137120606120054120042120042
60024120057899101100310108100120041119489109449256001340012100021000030010100001000010799065735593612350000120034120041120053111924031124535001030020200001000060020100001000012005912004111500211091040010100001000001010001101000100041000011111031402993111997340004065100001000040010120248120057120046120057120042
60024120056899110100010101120038119490109463256001640014100021000030010100001000010799155736182612585200120061120053120041111922031124505001030020200001000060020100001000012005412005311500211091040010100001000001010002211000100211000011111031401991111965940004960100001000040010120057120042120057120042120042
600241200418991110000201011200411194841094492560016400141000110000300101000010000107987857355936125852001200491200411200561119240311243850010300202000010000600201000010000120056120053115002110910400101000010000010100012110001014111000011010031401991111967440002965100001000040010120042120057120057120057120042
60024120056900110100020000120026119489109449256001640014100021000030010100001000010798705736326612585200120039120056120056111924031124535001030020200001000060020100001000012005312005311500211091040010100001000001010001311000100211000011111031401991111967140004968100001000040010120057120057120057120057120057
60024120056899101100020000120041119489109463256001640014100021000030010100001000010799065736326612598500120070120056120053111909031124535001030020200001000060020100001000012004112004111500211091040010100001000001010002211000100011000011111031401991111967140004968100001000040010120057120057120057120057120057
60024120056899111110010100120041119489109463256001640014100021000030010100001000010798705736182612350000120106120061120060111930031124535001030020200001000060020100001000012005612005311500211091040010100001000001010001211000200011000011112031401991111967140004668100001000040010120057120054120042120042120057

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ldp d0, d1, [x6, #0x10]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0053

retire (01)cycle (02)030508090b0e0f18191e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
602051200539001000100010001120041119530109461256010640104100011000030100100001000010794145736914611829711200291200411200561118970311240950100302002000010000602001000010000120056120053115020110099100401001000010000010010002111000100110000111111321011103111966440004068100001000040100120057120042120054120042120042
602041200568991010000020101120038119530109461256010640104100021000030100100001000010794145736326611912201200171200561200411118970311240950100302002000010000602001000010000120056120053115020110099100401001000010000010010003111000201110000111100321011213111967340004098100001000040100120057120057120042120042120057
602041200538991111000070001120041119530109451256010640104100021000030100100001000010794145735593611829701200291200561200561118970311240950100302002000010000602001000010000120056120053115020110099100401001000010000010010003311000202110000111120321011103211966540004068100001000040100120057120057120054120054120057
602041200568991110000020001120041119530109461256010340104100021000030100100001000010794645736326611751801200171200561200411118970311240950100302002000010000602001000010000120053120053115020110099100401001000010000010010002211000101110000111100321011102111996140004995100001000040100120059120057120057120057120042
602041200548991111000070001120041119511109463256010640104100011000030100100001000010794145736182611829701200321200561200561119020311241750100302002000010000602001000010000120053120053115020110099100401001000010000010010002211000301110000011110321011212111966840004690100001000040100120042120042120054120042120054
602041200418991110000010100120038119530109463256010340104100021000030100100001000010794145736326611829701200321200561200561119020311241550100302002000010000602001000010000120056120041115020110099100401001000010000010010001111000302110000110100321011213111966840004008100001000040100120057120057120042120057120042
602041200418991010000050001120041119530109463256010640104100021000030100100001000010794145737910611891201200321200561200561118990311240950100302002000010000602001000010000120056120053115020110099100401001000010000010010002101000301110000011100321011003111966940004095100001000040100120054120057120057120042120054
602041200568991000000010000120041119497109563256010640104100011000030100100001000010794145736326611829701200321200561200671119020311241750100302002000010000602001000010000120056120053115020110099100401001000010000010010003201000301110000111100321001002111966240004060100001000040100120057120057120057120054120054
602041200538991110000020001120038119511109463256010640104100011000030100100001000010794145736182611829701200321200561200561119020311240950100302002000010000602001000010000120056120053115020110099100401001000010000010010002211000110110000011100321011213111966240002060100001000040100120042120054120042120054120054
602041200538991010100070000120041119511109449256010640102100021000030100100001000010794145735593611829701200321200411200531119020311240950100302002000010000602001000010000120056120053115020110099100401001000010000010010002111000101110000110120321011003111966440004900100001000040100120057120057120057120042120042

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0057

retire (01)cycle (02)0305080b0e1e2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
600251200418991100200001200451194841094672560013400141000110000300101000010000107994257363746126051112004001201521200601119283112438500103002020000100006002010000100001200601200411150021109104001010000100000101000111100019041000011010003140199111196784000401012100001000040010120061120061120042120061120061
60024120060899111020001120042119484109467256001640014100011000030010100001000010798705736518612605101200360120060120041111909311245450010300202000010000600201000010000120041120057115002110910400101000010000010100022110002021781000001112003140199111196594000413012100001000040010120061120061120042120058120061
600241200418991110110011200451194901094672560016400141000110000300101000010000107994257363746126051112003601200601200601119283112438500103002020000100006002010000100001200601200571150021109104001010000100000101000321100010111000001112003140199111196784000213012100001000040010120058120061120061120061120058
600241200579001010200011200261194901094652560016400141000210000300101000010000107994257365186123500012002701200541200351119223112451500103002020000100006002010000100001200541200511150021109104001010000100000101000110100021211000011110003140199111196784000413130100001000040010120061120061120061120061120058
600241200609001110200001200451194931094642560016400141000110000300101000010000107991557355936126051012003601200601200601119283112457500103002020000100006002010000100001200601200571150021109104001010000100001101000111100010241000011111003140199111196594000413100100001000040010120058120058120061120042120064
6002412006090311102000012004511949310946725600164001410002100003001010000100001079870573651861260510120017012006012006011192831124575001030020200001000060020100001000012006012005711500211091040010100001000011010002111000201110001111110031401991111965940002131312100001000040010120061120061120061120061120061
60024120060900100010000120042119493109449256001640014100021000030010100001000010799425736374612605111200360120057120057111928311245450010300202000010000600201000010000120060120057115002110910400101000010000010100011010001017100000111100314019911119675400041300100001000040010120042120043120042120061120042
60024120060899111020001120045119493109467256001640014100021000030010100001000010798705736374612605101200360120060120057111909311245450010300202000010000600201000010000120057120057115002110910400101000010000010100021110003011100001111100314019911119678400041300100001000040010120042120061120058120061120058
600241200578991010200001200261194931094672560016400141000110000300101000010000107994257365186126051012001701200601200411119283112454500103002020000100006002010000100001200601200411150021109104001010000100000101000121100010111000001112003140199111196784000413139100001000040010120061120058120063120058120058
6002412005789910102100112004211949310946425600164001410002100003001010000100001079942573637461235000120036012004112005711190931124575001030020200001000060020100001000012004112005811500211091040010100001000001010002211000200110000110110031401991111965940004131012100001000040010120058120061120042120042120112

Test 4: throughput

Count: 8

Code:

  ldp d0, d1, [x6, #0x10]
  ldp d0, d1, [x6, #0x10]
  ldp d0, d1, [x6, #0x10]
  ldp d0, d1, [x6, #0x10]
  ldp d0, d1, [x6, #0x10]
  ldp d0, d1, [x6, #0x10]
  ldp d0, d1, [x6, #0x10]
  ldp d0, d1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)0307080b0e0f18191e1f223a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9fa0a1a3a5a6a8a9acafb5b6bbdcache load miss (bf)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160205267252000001100740022671221212162580100100800001008000050011693872670202670726707665003666680100200160000200800002672226722118020110099100100800008000010080000039080039004280000600430511021622267040106480000800001002672826728267282672826723
16020426732201000000045000267070018122580100100800001008000050011693052670202672726722665003668580100200160000200800002672726722118020110099100100800008000010080000039080000004280000613500511021622267240100080000800001002672826708267082672326723
160204267232000001100000026712212121625801001008000010080000500117618526682026722267076650036665801002001600002008000026727267071180201100991001008000080000100800000008000000380035610390511021622267240100280000800001002672826708267282670826723
160204267302000000000000226712212121625801001008000010080000500117583726682026707267276645036685801002001600002008000026707267221180201100991001008000080000100800000390800350042800356035390511021622267040106480000800001002672826728267232672826723
160204267272000000000450122671221201625801001008000010080000500117472426702026707267276650036685801002001600002008000026707267221180201100991001008000080000100800000390800390039800396035430511021632267240010480000800001002672826728267282672826728
160204267302000000000410122669220012825801001008000010080000500117323126682026727267076650036685801002001600002008000026727267221180201100991001008000080000100800000008003500080039003943051102162226719060080000800001002672826723267232672826708
160204267142000000000450022671221818162580100100800001008000050011701072669702672726707664503666580100200160000200800002670726707118020110099100100800008000010080000039080035003980039613543051102162226719000080000800001002672826728267082670826723
16020426730200000000048010271260120162580100100800001008000050011731832668202670726727663003668580100200160000200800002670726707118020110099100100800008000010080000000800390038003561043051102162226724006480000800001002672826708267282672826723
16020426739201000000045012267122012162580100100800001008000050011701072669702670726727664503668580100200160000200800002672726722118020110099100100800008000010080000000800000042800396035430511021622267240010080000800001002672326728267082672326723
1602042671120100010004100126712201802580100100800001008000050011702032670202670726722665003668580100200160000200800002672726722118020110099100100800008000010080000000800390038800356135430511021622267040106080000800001002672326708267232672326723

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)03090e0f1e22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9fa0a3a5a6a8acafb5b6bbdcache load miss (bf)branch mispredict (cb)cficache miss (d3)d5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600252672820000045001267122120162580010108000010800005011761170267022672726727667236707800102016000020800002672726727118002110910108000080000108000043080039039800396139430502003163326728141048000080000102672826732267282673326728
1600242672720000045001267123121162580010108000010800005011769230267022672726727667336707800102016000020800002672726727118002110910108000080000108000043328017111186800396139430502004163526725101048000080000102672826728267282672826728
1600242670720000045101267122012162580010108000010800005011694631267022672726727667236707800102016000020800002672726727118002110910108000080000108000043080039039800396139430502005163326724101048000080000102672826728267282672826728
16002426733200000450012671221212162580010108000010800005011681571267062672726727667636707800102016000020800002672726727118002110910108000080000108000043080039038800396139430502005163426724101078000080000102672826728267322672826864
1600242673120001045001267122121202580010108000010800005011688240267022672726727667236707800102016000020800002673126727118002110910108000080000108000043080039039800396139430502005165526724101078000080000102673226728267282672826728
160024267292000000001267122121202580010108000010800005011688050266822672726727667336707800102016000020800002672726727118002110910108000080000108000000800398141800396139430502004164426724101048000080000102673226898267352672826728
1600242672720000045001267122121162580010108000010800005011702661267062672726727667236707800102016000020800002673126727118002110910108000080000108000043080040039800396139430502004164426724101048000080000102672826728267282672826728
1600242672920001045001267122112162580010108000010800005011726841267022672726727667636707800102016000020800002672726727118002110910108000080000108000043080038139800396139440502004163426724141048000080000102672826728267282672826728
16002426727200000440012671221212192580010108000010800005011730861267082674026707667236707800102016000020800002672726729118002110910108000080000108000044080040039800396139430502005165526704101448000080000102672826708267282672826728
16002426729200010440012671221212162580010108000010800005011681850267022672726731667236707800102016000020800002673126707118002110910108000080000108000043080039042800396139430502004173326724101048000080000102672826728267282672826728