Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDNP (S)

Test 1: uops

Code:

  ldnp s0, s1, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030e0f1e22233a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0e6eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200540230061100387170192025100010001000154563573834021003115100020001000402402111001100010000100001055025105561254473116113991025100610001000403383409384384
20044023007010138725419202510001000100014453377402402100313910002000100040238311100110001000010005910250581055605544731161139910551010610001000403384403403403
2004383300610003681740202510001000100015487377402383803135100020001000402402111001100010000100059105505510556055073116113801056100610001000383383384384403
2004402311611013872540202510001000100015456357402385100313510002000100040238211100110001000010005910250251025615544731161139910551010610001000384403403403403
20044023006110138717003251000100010001545635738240210031351000200010004023831110011000100001000591055055105561254473116113801055010610001000384384384384403
20044023106110138717019202510001000100014515358402402100313510002000100038240211100110001000010000105505510556125073116113991055100610001000403403384403384
2004383301251013871701920251000100010001545635840240210031351000200010004023831110011000100001000591055055102561550731161137910251010610001000403403403403403
2004402200610013871740202510001000100015499377402402100313510002000100040240211100110001000010005910550551055015544731161139910551010610001000383403403403384
20043824006110038717419202510001000100015456382402402803135100020001000402382111001100010000100059102505810556125447311611399105500010001000403411403403403
2004402310610013871741920251000100010001549938138340710031351000200010004024021110011000100011000010250251055615544731161139910551010010001000384403403403383

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldnp s0, s1, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)0308090e0f18191e1f22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c2c5cfd5d6ddinst fetch restart (de)e0e6? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
602051200538990010001000120032119538109443256010340102100001000030100100001000010793305736035611788211200230120047120050111905311242150100302002000010000602001000010000120047120047115020110099100401001000010000010010001111000201110000110032101100111196571000040002665100001000040100120048120036120048120036120036
602041200478990000001010120032119503109455256010340102100001000030100100001000010793305735888611788211200230120035120047111903311242150100302002000010000602001000010000120035120047115020110099100401001000010000010010000001000000010000010032101100111196461000040002665100001000040100120048120048120048120048120036
602041200478990000001010120020119494109455256010340100100011000030100100001000010793305735888611788211200110120047120035111903311242150100302002000010000602001000010000120050120035115020110099100401001000010000010010000011000000010000110032101135111196461000040002668100001000040100120036120048120036120048120036
602041200478991001001000120020119538109455256010340100100011000030100100001000010791265735888611788211200230120035120050111881311242150100302002000010000602001000010000120047120047115020110099100401001000010000010010000011000000010000010032101100111196551000040000605100001000040100120048120048120048120048120036
602041200358990000001010120020119538109455256010040102100011000030100100001000010793305735888611903401200260120047120047111894311242150100302002000010000602001000010000120047120047115020110099100401001000010000010010000011000000010000100032101100111196571000040002600100001000040100120036120036120048120048120048
602041200478990000000000120032119494109455256010340102100011000030250100001000010791265735888611825011200230120047120035111895311242150100302002000010000602001000010000120035120047115020110099100401001000010000010010000011000000010000100032102100111196461000040000965100001000040100120048120036120048120048120048
602041200358990000000010120034119545109443256011140102100011000030100100001000010793305735888611788211200110120047120047111903311242150100302002000010000602001000010000120047120047115020110099100401001000010000010010000011000000010000010032101100111196571000040002095100001000040100120104120036120048120048120048
602041200498991000000010120032119507109446256013840102100011000030100100001000010793305735888611803801200140120047120047111897311242150100302002000010000602001000010000120047120035115020110099100401001000010000010010000011000000010000110032101135111196461000040000665100001000040100120048120048120048120048120036
602041200479000000001000120032119538109455256010040100100001000030100100001000010793305735293611759901200230120119120035111881311251750100302002000010000602001000010000120047120047115020110099100401001000010000010010000011000010010000100032101135111196571000040002900100001000040100120036120036120048120048120048
602041200478990011001010120032119538109443256010040100100001000030251100001000010793935735888611788201200230120035120047111903311237450100302002000010000602001000010000120035120047115020110099100401001000010000010010000001000000010000010032101100111196571000040002605100001000040100120048120036120036120048120048

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)030508090b0e0f18191e22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5cfd5d6ddinst fetch restart (de)dfe0e6? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
600251200478990010010010001200201194891094912560013400101000110000300101000010000107980957358886124344012001312005012004711191503112444500103002020000100006002010000100001200471200471150021109104001010000100000101000001100000001000010100031451991121196501000040002065100001000040010120048120048120048120078120048
600241200508990000000011001200201194801094882560010400121000010000300101000010000107976957352936125581012002312004712004711190303112437500103002020000100006002010000100001200471200471150021109104001010000100000101000001100000001000010100031451991121196501000040002665100001000040010120036120048120054120107120048
600241200478990000000000001200321195591094572560013400101000110000300101000010000108191157354376124344012001112004712004711191503112444500103002020000100006002010000100001200471200471150021109104001010000100000101000001100000001000000100031451991121196501000240002660100001000040010120051120036120055120093120052
600241200478990000000011001200351195651094562560013400101000010002300101000010000107980957352936125841012001112003512004711191803112444500103002020000100006002010000100001200351200351150021109104001010000100000101000000100000001000010000031451941121196501000040002665100001000040010120048120048120054120091120036
600241200358990000000010001200201194861094772560013400121000110000300101000010000107976957358886125581112002312004712004711191503112559500103002020000100006002010000100001200351200351150021109104001010000100000101000001100000001000000000031451991121196501000040002665100001000040010120036120048120054120076120048
600241200478990000000010001200201195491094562560013400121000010000300101000010000107976957358886125581012002312004712003511190303112444500103002020000100006002010000100001200471200471150021109104001010000100000101000000100000001000010000031451991121196651000040000660100001000040010120036120036120054120092120048
600241200478990000000091001200201195531094432560010400121000110000300101000010000107976957358886125581012001112004712003511190303112444500103002020000100006002010000100001200471200471150021109104001010000100000101000001100000001000000100031451991321197101000040002965100001000040010120052120036120089120065120036
600241200478990000000011001200321194861094712560013400101000010000300101000010000107980957358886125581012002312003512004711191503112444500103002020000100006002010000100001200471200351150021109104001010000100000101000000100000001000010100031451991121196651000040002000100001000040010120048120048120092120060120070
600241200478990000000010001200811194861094732560013400121000110000300101000010000107980957352936125581012001112004712004711191503112444500103002020000100006002010000100001200471200871150021109104001010000100000101000001100000001000010100031451991121196501000040002005100001000040010120036120048120107120090120036
6002412003589900000000230001200381194861094722560016400141000210000300101000010000107987857361826125852012002912005312005311190903112450500103002020000100006002010000100001200531200411150021109104001010000100000101000321100020111000011112031451991121196711000140002060100001000040010120054120057120054120086120054

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ldnp s0, s1, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)03090b0e0f1e223f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5dcache load miss (bf)branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0e6? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
6020512005189930001112003611949510944325601034010210001100003010010000100001079126573608461182851120027012005112005111188131123745010030200200001000060200100001000012005112005111502011009910040100100001000011001000011000000100001103210110811119656100004000210100100001000040100120058120055120036120052120036
602041200518991010111200361194951094432560103401001000110000301001000010000107912657360846118285012001101200511200351118963112374501003020020000100006020010000100001200351200541150201100991004010010000100001100100001100000010000110321011351111964610000400000100100001000040100120052120036120037120052120052
60204120051899000010120036119495109459256010040102100011000030100100001000010795525736084611759901200110120051120051111896311241750100302002000010000602001000010000120035120035115020110099100401001000010000110010000110000039100001103210113511119646100004000010109100001000040100120057120052120052120052120036
602041200358990000111200201195031094592560103401001000110000301001000010000107955257360846117599012002701200351200351118813112417501003020020000100006020010000100001200511200351150201100991004010010000100001100100000100000010000100321011081111964610000400000100100001000040100120055120066120053120052120052
602041200518990000011200361194951094592560103401001000110000301001000010000107955257362306118285012001101200511200511118813112417501003020020000100006020010000100001200351200511150201100991004010010000100000100100010100000010000010321011081111965610000400021009100001000040100120041120052120052120052120036
602041200519000000574112002011951810945925601034010210001100003010010000100001079552573608461182850120011012003512005111188131123745010030200200001000060200100001000012005112005111502011009910040100100001000011001000011000000100001103210110811119656100004000210100100001000040100120055120052120052120052120052
602041200518990000101200361194951094432560103401021000010000301001000010000107912657360846118285012001101200351200511118963112417501003020020000100006020010000100001200541200511150201100991004010010000100001100100000100000010000110321011081111965610000400001009100001000040100120052120036120052120036120052
602041200358990000111200201194951094592560103401021000110000301001000010000107955257416356118285012001101200351200351118813112417501003020020000100006020010000100001200511200511150201100991004010010000100001100100000100000010000100321011081111964610000400020100100001000040100120052120052120052120036120052
602041200358990000340120020119495109459256010340102100001000030100100001000010795525735293611759901200270120035120051111881311241750100302002000010000602001000010000120051120051115020110099100401001000010000110010000010000001000001032101108111196461000040002000100001000040100120071120053120052120052120036
602041200518990000011200201194951094462560100401001000110000301001000010000107955257360846118285012001101200351200511118963112374501003020020000100006020010000100001200511200511150201100991004010010000100001100100001100000010000110321011351111965610000400000100100001000040100120054120036120052120052120052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)03050708090a0b0e0f181e2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0e6? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
60025120051899000100000110011200421194911094642560013400121000110000300101000010000107985857362306124344011200270120051120051111919031124515001030020200001000060020100001000012005112005111500211091040010100001000001010000011000000100001010031405942211967510001400020109100001000040010120058120058120058120042120042
60024120057899101000000500011200261194901094642560016400141000110000300101000010000107991557363746126051001200330120041120057111925031124385001030020200001000060020100001000012005712005711500211091040010100001000001010002301000301100001111031402172211967210000400000109100001000040010120100120036120052120052120052
600241200518990000000000000012002011948410945925600134001010000100003001010000100001079858573529361257460112003001200511200511119030311244850010300202000010000600201000010000120035120051115002110910400101000010000010100000010000001000010100314029933119669100004000210100100001000040010120036120052120052120052120052
60024120054899000000000136000012002011948410945925600104001210001100003001010000100001079769573608461257460112002701200351200511119220311244850010300202000010000600201000010000120051120051115002110910400101000010000010100000110000001000000100314029932119650100004000010109100001000040010120036120036120055120052120052
600241200518990000000000000012003611948910944325600134001210000100003001010000100001079858573608461243440112002701200351200511119190311245150010300202000010000600201000010000120035120051115002110910400101000010000010100000110000001000010100314029436119651100004000010109100001000040010120052120036120052120052120052
600241200518990000000000100012003611948410945925600134001210001100003001010000100001079858573608461257460112003001200351200511119030311243750010300202000010000600201000010000120041120041115002110910400101000010000110100000110000001000000100314029432119650100004000210100100001000040010120052120036120055120036120052
600241200518990000000000100012003611948410945925600104001210000100003001010000100001079858573608461257460112002701200511200511119190311244850010300202000010000600201000010000120051120051115002110910400101000010000010100000110000001000010000314029922119676100014000410109100001000040010120058120058120058120058120058
600241200578991000000001100112004211949010944925600164001410002100003001010000100001079915573559361260510012001701200571200571119250311243850010300202000010000600201000010000120057120057115002110910400101000010000010100021010001241000011011314029932119675100014000413100100001000040010120052120055120040120052120052
600241200548990000000001100012003611948410945925600134001210001100003001010000100001079858573529361257460112001101200511200511119190311243750010300202000010000600201000010000120051120051115002110910400101000010000010100000110000001000010102381053211611121549100384019110109100001000040010122390122551122438122436122531
6002412255191802101001191310011226211205321102048386032140221100451005633808111601138311481535786450618478000122048012298512269611256604171135075657134446222421134667726113931112912427312314129150021109104001010000100000101000720100001010000101016399929922120869100004000210139100001000040010120055120036120052120052120158

Test 4: throughput

Count: 8

Code:

  ldnp s0, s1, [x6]
  ldnp s0, s1, [x6]
  ldnp s0, s1, [x6]
  ldnp s0, s1, [x6]
  ldnp s0, s1, [x6]
  ldnp s0, s1, [x6]
  ldnp s0, s1, [x6]
  ldnp s0, s1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)03080b0e0f18191e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a7a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0e6eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602052672420000000045000226707201216258010010080000100800005001173519026702267272672266500366858010020016000020080000267072685611802011009910010080000800000100800003980039003980035603505110216222678580039106080000800001002736626730267212670826708
1602042672720000100045396101267120018162580100100800001008000052211701070267022672726707664503668080100200160000200800002670726723118020110099100100800008000001008000039800350035800356135435110216222684180039106280000800001002674626728267232672826728
1602042670720000000041010126692012121625801001008000010080000500116888012670626888268906675036685801002001600002008000026727267221180201100991001008000080000010080000398003900398003901394351102162226728800001010280000800001002672826708267282672326728
1602042672220000000045010126716212120258010010080000100800005001170107126697267272672766500366658010020016000020080000267072672211802011009910010080000800000100800003980035003980035010435110216222673080039106280000800001002672826708267232670826728
16020426727200000000001022673521201225801001008000010080000500117583712669726727267226645036680801002001600002008000026722267071180201100991001008000080000010080000080000000800006035051102162226730800001010480000800001002674226728267082672826723
160204267272000000002101012669921212162580100100800001008000050011746281266972672226722665003668080100200160000200800002670726726118020110099100100800008000001008000039800390035800356104351102162226741800391010480000800001002672826708267282670826723
1602042672720000000045000126712201816258010010080000100800005001176084126702267272672766500366658010020016000020080000267222672211802011009910010080000800000100800003980000003980000613939511021622268058003566280000800001002670826728267082672826728
16020426707200000000450100267192012162580100100800001008035650011685731267022672726707665003666580100200160000200800002672726722118020110099100100800008000001008000008003500358000000354351102162226730800391010280000800001002672826728267282673226745
1602042674020000000045000226712001202580100100800001008000050011746281267022672726712665003697280100200160000200800002670726990118020110099100100800008000001008000039800000035800006135435110216222682380000010280000800001002670826728267282672826728
16020426707200000000450001267070181816258010010080000100800005001170107026682267072672766300366658010020016000020080000267272672211802011009910010080000800000100801303980039003980039013543511021622267358000006480000800001002670826728267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)03080b0e0f181e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)c2cfd5d6ddinst fetch restart (de)e0e6eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600252672320000110410022671221801625800101080000108000050117318312670226727267076672036702800102016000020800002670726776118002110910108000080000110800000080035003980000600390502081657267248000010048000080000102670826728267232670826708
160024267272000000045000267122121202580010108000010800005011701071267022672726727665303668780010201600002080000267272670711800211091010800008000001080000039800390035800396135430502051677267248000010648000080000102672826708267082672826728
1600252670720000000000026692200122580010108000010800005011746281266822672726722665303668780010201600002080000267322681911800211091010800008000001080000039800390035800396135005020716752672580000101048000080000102672826728267232672326708
16002426722200000004100126712218181625800101080000108000050117462812670226727267076673036707800102016000020800002670726789118002110910108000080000010800000398003500080039610430502071677267198003901028000080000102672826728267232672326728
1600242672720000000451012671221801625800101080000108000050117318312670226722267076673036707800102016000020800002672226722118002110910108000080000010800000398000000398000061039050207167526719800390048000080000102672326708267082670826708
1600242670720000110451022670701871225800101080000108000050117462812670226707267076673036711800102016000020800002684526807118002110910108000080000010800000438003500358016561350050205165726724800396608000080000102672826728267282670826728
16002426727200000000001266922181216258001010800001080000501174628126682267252675566730366878001020160000208000026731268781180021109101080000800000108000003980000003980000613500502071677268018003910628000080000102670826728267082672326728
160024267222000000045000267070120122580010108000010800005011688801266822670726707667303670780010201600002080000267222672211800211091010800008000001080000008000000080035613539050205165726704800006048000080000102670826708267232672326723
1600242672720000000450012671221218162580010108000010800005011688801267022672726722666803668780010201600002080000267142673211800211091010800008000001080000039800350008003961039050205167526725800000608000080000102672826708267282670826723
1600242672720000000450002675720181625800101080000108000050117010712669726707267076653036691800102016000020800002681026722118002110910108000080000110800000398003900398000061350050207165726726800396628000080000102672826708267282670826708