Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDP (S)

Test 1: uops

Code:

  ldp s0, s1, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030508090b0e0f1e22233a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
2005404311010160101383171518162510001000100015242373398399953131100020001000399389111001100010000102120561072101741053617144191731161139566310001000399405399418399
200439830000006010138317151821251000100010001566937439940410131321000200010003983991110011000100001000058105400054105461544300731161139566310001000399401399402399
200439820000006000138317151820251000100010001555937839839910131311000200010004044041110011000100001000058105400054105461544300731161139566310001000402399401401399
200439830000006010138317151820251000100010001555937940440510131371000200010004054041110011000100001000058105400054105461544300731161139566310001000397399399399399
200439830000006000138317151820251000100010001555137940440410131371000200010004044041110011000100001000058105400054105461544300731161139566310001000399400399402399
200439830000006000138317151820251000100010001555937940440410131371000200010004044041110011000100001000058105400054103561354300731161139566210001000390401399405399
200439820000006000138317151820251000100010001555937940440310131371000200010004034041110011000100001000058105400035105461544300731161139566310001000399399399399399
2004398300000060001383171530172510001000100015236373398398963131100020001000398398111001100010000102020571071102741053617244190731161140166210001000390399399399399
2004398300000060001383171518172510001000100015290373398398953131100020001000398398111001100010000102020571072100741053617244191731161140166210001000381399399399399
2004398300000060101383171518162510001000100015493374398398963131100020001000398398111001100010000102019561072100741053617244191731161140166210001000391393409390399

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldp s0, s1, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)0308090b0e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c2c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
602051202019000000000340100012003511953810949425601034010210001100003010010000100001079566573588861208691120011012005012005011189731123745010030200200001000060200100001000012004712004711502011009910040100100001000001001000001100001001000010000321011351111965840002968100001000040100120048120051120048120051120048
60204120047899000100010000012003511949410945825601004010010001100003010010000100001079539573881661191441120026012005012005011189531123745010030200200001000060200100001000012005012003511502011009910040100100001000001001000001100000001000011000321011351111965540000990100001000040100120048120048120048120090120036
602041200358990001000130000012007411949410944325601034010210001100003010010062100001079539573785661181891120026012005012004711189531123745010030200200001000060200100001000012004712004711502011009910040100100001000011001000001100000001000000000321011001111965540002608100001000040100120051120036120048120051120051
60204120035899000000010000012003211950310945525601034010210000100003010010000100001079330573809661190480120026012004712003511189531124165010030200200001000060200100001000012004712004711502011009910040100100001000001001000001100000001000000000321011350111965540002600100001000040100120051120051120036120051120051
60204120050899000100010010012003611950310945525601004010210001100033010010000100001079557573915261190671120011012003512003511189531124165010030200200001000060200100001000012004712004711502011009910040100100001000001001000001100000031000011000321011351111964640002098100001000040100120051120051120036120051120051
602041200508990001000000000120035119494109443256010040102100011000030100100001000010791265738960611919101200260120050120050112251311241650100302002000010000602001000010000120051120047115020110099100401001000010000010010017011003400919081002911000391743322112166140209665100001000040100122740122546122654122613122652
60204122746919010003031396127280100123012120920110387100960463403571006310064347801162311628115882258039326194819112231201230291230321125624451138665765735214232961080868900117061167212316212322435150201100991004010010000100000100100384010042041179031004811200321011001111965540002998100001000040100123426123527123380123289123313
60204122331899110000000000012002011949410944325601034010210001100003010010000100001084456573646461182411120026012009212005311189531124165010030390200001000060200100001000012005012004711502011009910040100100001000001001000001100000001000001200321011351111964640000905100001000040100120036120148120048120153120036
60204120050899000000010000012006311949410945825601004010010001100003010010000100001079539573852861182161120023012003512003511190331123745010030200200001000060200100001000012005012003511502011009910040100100001000001001000001100000001000010000321011081111964740000900100001000040100120051120051120036120051120036
60204120050899000110010000012004011949410945825601034010210001100003010010053100501079539573588861216681120011012003512004711189531124165010030200200001000060200100001000012017312004711502011009910040100100001000011001000001100000001000010000321011081111965540002998100001000040100120051120051120051120036120036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)03090e0f1e2223243f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c2c5branch mispredict (cb)cdcfd2d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
60025120047899010110012002011948010945525600134001210001100003001010000100001079809573588861255810012002312004712004711191531125655001030020200001000060020100001000012004712004711500211091040010100001000001010000011000000010000100000314004993311966540002605100001000040010120048120097120048120048120048
60024120035899000000012003211948010944325600134001210001100003001010000100001079809573588861255810512002312004712004711191531124445001030020200001006260020100001000012005012004711500211091040010100001000001010000011000000010000110000314004993311966540002605100001000040010120036120048120048120046120048
60024120047899011100012002011948010945525600134001210001100003001010000100001079809573529361255810512002312004712004711190331124445001030020200001000060020100001000012004712003511500211091040010100001000001010000011000000010000110000314002943311965040002660100001000040010120036120048120048120048120036
60024120047899001100012002011948910945525600134001210001100003001010000100001079809573529361255810512002312004712004711191531124445001030020200001000060020100001000012004712003511500211091040010100001000001010000011000000010000110000314003992311966540002665100001000040010120048120048120036120048120048
60024120035899011000012003211948010945525600134001210001100003001010000100001079809573588861255811512002312004712003511191531124375001030020200001000060020100001000012004712004711500211091040010100001000001010000011000000010000000000314003993211965040000065100001000040010120048120048120036120048120048
60024120047899000010012003211948910945525600134001210001100003001010000100001079809573588861255810512002312004712004711190331124445001030020200001000060020100001000012004712004711500211091040010100001000001010000011000000010000010000314003994411966540002665100001000040010120048120048120048120048120048
60024120047900000000012002011948010945525600134001210001100003001010000100001079809573529361255810512002312003512004711191531124445001030020200001000060020100001000012004712004711500211091040010100001000001010000001000010010000100000314002993311966540002660100001000040010120048120048120048120036120048
60024120047899000100012002011948010945525600134001210001100003001010000100001079809573588861255810512002312004712004711191531124445001030020200001000060020100001000012004712003511500211091040010100001000001010000011000000310000110000314003993211966540000665100001000040010120048120036120048120036120048
600251200668990111610012003211948010945525600134001210001100003001010000100001079809573588861243440512002312003512003511190331124445001030020200001000060020100001000012004712004711500211091040010100001000001010000001000000010000110000314003943211966540002665100001000040010120048120052120048120048120048
60024120047899000100012003211948010945525600134001010001100003001010000100001079769573529361255810512002312004712004711191531124445001030020200001000060020100001000012004712004711500211091040010100001000001010000001000000010000110010314003993211966540002665100001000040010120048120048120048120048120048

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ldp s0, s1, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)0304050708090e0f1e22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5dcache load miss (bf)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
602051200611111011110061001200341194941094552560103401021000010000301001000010000107953957358886121387112002612004712005111189531123755010030200200001000060200100001000012005612003811502011009910040100100001000001001000021100001001000011100324941084511965540002908100001000040100120036120048120048120051120051
60204120050103900000111000120020119494109458256010340100100011000030100100001000010791265736035612064601200261200481200501118953112421501003020020000100006020010063100001200521200481150201100991004010010000100000100100000110000001000001000321041354411964640000668100001000040100120050120049120048120048120048
6020412003596400000011000120035119538109505256010040102100011000030100100001000010793305735888612060501200231200471200351119093112508501003020020000100006020010000100001200471200352150201100991004010010000100000100100000010000001000010000321041084411964640002665100001000040100120051120051120048120051120036
6020412004793100000001000120032119494109443256010040102100001000030100100001000010791265735888612165211200821200531200351119033112421501003020020000100006020010000100001200501200471150201100991004010010000100000100100000110000001000001000321041084411965740002968100001000040100120051120085120090120041120051
6020412003589900000011000120035119538109458256010340100100011000030100100001000010795395737856612023811200261200501200501118953112416501003020020128100006020010000100001200521200471150201100991004010010000100000100100000010000401000001000323841004411965740002968100001000040100120051120051120036120051120051
6020412008889900000000010120035119494109458256010340102100011000030100100001000010793305738864611971411200261200501200501118813112374501003020020128100006020010000100001200501200472150201100991004010010000100000100100000110000001000010000321041004411965540002960100001000040100120051120036120036120051120051
60204120050899000000010000120035119539109483256010040102100011000030100100001000010791265739491611837301200261200501200501118953112416501003020020000100006020010000100001200501200961150202100991004010010000100000100100000010000001000000000321041354411965540002060100001000040100120051120051120036120051120051
60204120050899000000010101200351194941094552560100401001000110000301001000010000107953957352936120747012002612003512003511189531124165010030200200001006460200100001000012005012004711502011009910040100100001000001001000001100024631000000000321041084411964640000665100001000040100120058120051120051120038120050
6020412005089900000001010120020119503109455256010340102100011000030100100001000010795395739395611904401200231200501200351118953112416501003020020000100006020010000100001200351200471150201100991004010010000100000100100000110000131000001001321041004411965540002968100001000040100120051120051120036120051120051
6020412004789900000001000120035119538109458256010340102100011000030100100001000010795395735293612133901200261200501200501118953112374501003020020000100006020010000100001200501200471150201100991004010010000100001100100000010000001000001000321041354411965740000660100001000040100120036120049120048120054120051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0057

retire (01)cycle (02)03050708090a0b0e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
60025120057900101101110036701001120042119484109449256001640014100021000030010100001000010798705735593612350001200330120041120057111925033112488500103002020000100006002010000100001200411200411150021109104001010000100000101000230100010021100001111103140999941196784000410109100001000040010120058120058120061120058120058
6002412004189910010101009040100012002611949010946425600134001410002100003001010000100001079915573651861235001120033012005712005711192803112454500103002020000100006002010000100001200601200411150021109104001010000100001101000110100010011100000111103140599571196594000210100100001000040010120058120058120042120042120061
600241200578991001000000257010011200261194931094642560013400141000210000300101000010000107994257363746126051012003601200601200601119280311245450010300202000010000600201000010000120057120057115002110910400101000010000010100021110001000110000111110314059986119681400041009100001000040010120058120058120061120061120058
6002412005789910110000004700100112004511949310946425600164001210002100003001010000100001079870573637461260511120017012005712004111192503112454500103002020000100006002010000100001200571200571150021109104001010000100000101000211100010111100001111103140599651196754000401012100001000040010120058120058120058120058120058
6002412004190010000011002230100112004211949010944925600164001410002100003001010000100001079915573637461260510120033012004112004111192503112454500103002020000100006002010000100001200571200571150021109104001010000100000101000211100020011100001111003140599751196754000410100100001000040010120058120061120058120058120061
60024120057899100000000025601000120026119490109467256001640014100021000030010100001000010799425736518612605111200170120057120057111925031124385001030020200001000060020100001000012005712005711500211091040010100001000001010003111000200010100001111103140499541196594000410109100001000040010120058120058120061120058120061
600241200578991011010000473010001200421194901094672560016400141000210000300101000010000107994257365186126051012001701200571200571119090311243850010300202000010000600201000010000120057120057115002110910400101000010000010100012010001000110000111110314069945119675400021000100001000040010120044120058120058120058120042
6002412007490010100100001000011200421194901094642560016400141000110000300101000010000107991557363746126051012003301200571200411119090311245450010300202000010000600201000010000120041120057115002110910400101000010000010100011110002012110000111100314049965119675400040100100001000040010120042120042120058120042120058
6002512004189910100100002000011200421194901094672560013400121000210000300101000010000107987057363746126051012001701200571200571119250311245450010300202000010000600201000010000120057120057115002110910400101000010000010100012110001049211000001110031404108651196754000410100100001000040010120058120058120058120042120058
6002412005790010100000002010011200421194841094492560016400141000210000300101000010000107994257365186126051012001701200571200411119250311243850010300202000010000600201000010000120057120057115002110910400101000010000110100022110002001110000111110314039955119659400040109100001000040010120061120058120058120061120042

Test 4: throughput

Count: 8

Code:

  ldp s0, s1, [x6]
  ldp s0, s1, [x6]
  ldp s0, s1, [x6]
  ldp s0, s1, [x6]
  ldp s0, s1, [x6]
  ldp s0, s1, [x6]
  ldp s0, s1, [x6]
  ldp s0, s1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)0305080b0e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020526733200000004110126707218181125801001008000010080000500117318302669726722267226645366658010020016000020080000267272672211802011009910010080000800000100800000080000003980035603543051102162226724106480000800001002672826728267282672326723
16020426727200000014100026712212181825801001008000010080000500116888002670226707267276650368018010020016000020080000267272672211802011009910010080000800000100800000438003900358003561354305110216222672406480000800001002672326728267232672326728
160204267072000000000022671620121625801001008000010080000500117462802670226727267276630366858010020016000020080000267272672211802011009910010080000800000100800000398003900398003960354305110216222672406480000800001002672826728267282672326723
1602042672720000000410012682220121225801001008000010080000500117318302670226707267076650366858010020016000020080000267272672211802011009910010080000800000100800000398003900080039613539051102162226719106480000800001002672326728267282672326728
16020426728200000004500126701212121625801001008000010080000500116888012670226722267276650366658010020016000020080000267272672211802011009910010080000800000100800000398003900080039603500511021622267231010480000800001002673126708267082672326723
160204267102000000041002267020121212258010010080000100800005001174628026682267272670766503668080100200160000200800002670726722118020110099100100800008000001008000003980039000800356004305110217222672486480000800001002672826708267282672826728
160204267272000000045100267252120162580100100800001008000050011688800266822670726727664536680801002001600002008000026707267221180201100991001008000080000010080000039800350039800396104205110216222672406280000800001002672826728267232672326708
160204267072000000041101267202018162580100100800001008000050011705290266892673226732665436690801002001600002008000026732267321180201100991001008000080000010080021194280058015980038615742195110216222672960280000800001002672326728267282672826728
16020426722200000004100226716218181625801001008000010080000500117462802670226722267276650366858010020016000020080000267272672211802011009910010080000800000100800000398003900398003961364305110216222672490280000800001002671526733267332671526733
160204267322001110065002267242181816258010010080000100800005001167371026707267322673266543669080100200160000200800002673226732118020110099100100800008000001008000004280035000800356035390511021622267241010280000800001002672326708267282672326728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)030e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)6061696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)branch mispredict (cb)cdcfd0d2d5d6daddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160025267352000041101267212180122580010108000010800005011758371026685267222670766680366878001020160000208000026722267221180021109101080000800000108000003980035000358003561353900502000416045267196628000080000102672326723267232672326723
160024267222000041001268332181812258001010800001080000501170107102668226722267226668036702800102016000020800002672226707118002110910108000080000010800000080035000368003501353900502000716055267040628000080000102672326723267232672326723
16002426722200004110126721018022580010108000010800005011701071026684268902672266680367028001020160000208000026722267071180021109101080000800000108000003980000000358003561353900502000516055267196628000080000102672326723267232672326723
1600242672220000411012683821818122580010108000010800005011701071026697267072672266680367028001020160000208000026722267221180021109101080000800000108000003980035000358000061353900502000516045267046628000080000102672326723267232672326723
16002426722199000000267872181802580010108000010800005011687541026682267222672266680367028001020160000208000026722267221180021109101080000800000108000003980000000358003561353900502000616035267190628000080000102672326723267232672326723
160024267222000001002669221818122580010108000010800005011688801026697267222672266530367028001020160000208000026722267221180021109101080000800001108000003980035000358003561353900502000516055267196628000080000102672326708267232672326723
1600242672220000410012677920181225800101080000108000050117010710266972672226722666803670280010201600002080000267222672211800211091010800008000001080000039800350003580035000000502000616054267046628000080000102672326723267232672326723
1600242672220000000126707218180258001010800001080000501170107102669726722267076668036687800102016000020800002672226707118002110910108000080000010800000398003500008000061353900502000516054267046628000080000102672326723267232672326708
16002426707200004110026693018181225800101080000108000050117583710266972672226725666803670280010201600002080000267072672211800211091010800008000001080000008003500008003561353900502000616065267196028000080000102672326723267082682126708
16002426722200004110026713200122580010108000010800005011701071026697267222672266680367028001020160000208000026722267221180021109101080000800000108000003980035000388003561353910502000516054267206028000080000102672326723267232670826723