Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDP (D)

Test 1: uops

Code:

  ldp d0, d1, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030e0f1e223a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a7acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
200540331146113792121216251000100010001505313693943949231271000200010003943941110011000100001000431039039103961394373116113951010410001000395395395395395
2004394300440138321119251000100010001528413733983989631311000200010003983941110011000100001000431038138103861394473116113951414510001000399399399399403
2004394311440138321119251000100010001528413733983989631311000200010003983941110011000100001000431038038103861394473116113981410710001000399399399403399
2004398300440138321119251000100010001501403733983989631311000200010003983941110011000100001000431038038103861394473116113951010710001000395399399399399
2004398300440138301119251000100010001528413733983989631311000200010003983941110011000100001000431039038103861394373116113951414710001000399395399399399
2004398300440138321119251000100010001528403733983989631311000200010003983941110011000100001000431038038103861394473116114001414710001000399399399399399
2004398200441138321119251000100010001528813693943989631311000200010003983941110011000100001000431038038103861384473116113951414710001000400399399399399
2004398300441138321117251000100010001528813733984429631311000200010003983941110011000100001000431038038103861394473116113951414710001000399395395399399
20043983004401383211216251000100010001528803693983949631311000200010003983941110011000100001000431038038103861394473116113951415710001000399399399399399
20043983114401383211192510001000100015284037339839892313110002000100039839411100110001000010004310380658103861394473116113911614710001000399395399395399

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldp d0, d1, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0057

retire (01)cycle (02)030508090b0e0f18191e1f223a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
602051200578991101100020111200421195121094492560106401041000210000301001000010000107942357363746119172012003301200601200571119033112410501003020020000100006020010000100001200571200411150201100991004010010000100000100100011110003001100001101132102121111197954000210109100001000040100120042120058120058120058120058
6020412005789911100000100011200261195121094642560106401041000210000301001000010000107936857363746119172012003301200571200571119033112410501003020020000100006020010000100001200571200581150201100991004010010000100000100100011010003011100001111132101121111196694000410109100001000040100120058120058120058120058120058
60204120057899100000002011120042119512109449256010640104100021000030100100001000010794235736374611751801200200120041120105111903311242450100302002000010000602001000010000120057120057115020110099100401001000010000110010001211000101110000110123210112111119669400040109100001000040100120058120058120058120059120058
60204120057899101110002011120042119530109464256010640104100011000030100100001000010794235736374611917201200660120065120057111903311241050100302002000010000602001000010000120057120057115020110099100401001000010000010010002311000100110000110123210112111119669400040100100001000040100120058120058120058120058120058
6020412006089911100000201112004211951210946425601064010410002100003010010000100001079423573637461191720120651012050212090011190331124105010030200200001032160522100001000012078612014611502011009910040100100001000001001000820100010311000001113326411273112021740002009100001000040100120058120058120059120058120042
60204120057899112000001011120453119530109464256010640104100021000030100100001000010794235736374611917221200330120057120057111903311241050100302002000010000602001000010000120057120057115020110099100401001000010000010010003211000101110000111103210112111119669400041000100001000040100120058120058120058120058120042
602041200578991000000020111200421195121094642560106401041000210000301001000010000107936857363746118304012003301200571200571119033112410501003020020000100006020010000100001200571200571150201100991004010010000100000100100022110002011100000111232101100121196694000410109100001000040100120058120058120042120058120058
6020412005790011010000110111200261195121094492560103401041000210000302471000010000107942357363746119172112003301200571200571119033112410501003020020000100006020010000100001200571200571150201100991004010010000100000100100011110001001100001111232101121111196694000410109100001000040100120058120058120058120058120059
60204120057899110000002010120042119512109464456010640104100021000030100100001000010793685736374611917211200330120057120057111903311241050100302002000010000602001000010000120041120057115020110099100401001000010000010010002211000301110000111103210112111119651400040109100001000040100120042120058120058120058120042
602041200418991001110020111200431195141094642560103401021000210000301001000010000107936857363746119172012003301200411200571119033112410501003020020000100006020010000100001200411200571150201100991004010010000100000100100021110002011100001111032101121111196694000410109100001000040100120058120058120058120058120058

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire (01)cycle (02)0305080b0e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
60025120057899100000410000012007911948410945925600104001210001100003001010000100001079951573671061332571120027120035120037111922311244850010300202042410000600201000010000120035120035115002110910400101000010000010100000110000000010000101120031400299221196694000210109100001000040010120052120058120058120052120052
60024120457899000000010100012005911948410945925600104001210001100003001010000100001079858573608461243441120027120053120051111919311265150010300202000010000600201000010000120051120051115002110910400101000010000010100000110000000010000101000031400299221196694000210109100001000040010120036120052120036120056120036
60024120051899000000010100012007311948410945925600134001210001100003001010212100001079858573608461257460120027120051120051111919311244850010300202000010000612961000010000120035120051115002110910400101000010000010100000010000000010000100000031400294221196694000210109100001000040010120052120056120329120054120036
600241200359000000000200101120043119486109449256001640014100011000030010100001000010798785736182612350001200321200531200531119243112450500103002020000100006002010000100001200531200531150021109104001010000100000101000331100010011100001111100314002992211967140004665100001000040010120042120054120054120054120057
6002412005389911001001300101120041119486109449256001640014100011000030010100001000010798785736182612585201200291200531200531119093112450500103002020000100006002010000100001200531200531150021109104001010000100000101000221100020011100001111010314002992211967140004665100001000040010120054120054120042120042120042
6002412004189911001002001011200671194861094492560016400141000210000300101000010000107987857355936125852012002912005312004111192131124505001030020200001000060020100001010712005712005311500211091040010100001000001010003111001200111000011112003140021292211967140004665100001000040010120042120054120054120054120042
600241200538991110000100101120043119486109461256001640014100021000030010100001019710798875736278612598301200291200531200531119213112550500103002020000100006002010000100001200531200531150021109104001010000100000101000111100020011100001111000314002992211967140004665100001000040010120054120042120054120054120054
6002412009989911100007001011200771194861094622560016400141000210000300101000010000107987857361826125852012002912005312004211192231124525001030020200001000060020100001000012005612005311500211091040010100001000001010008311000200111000011111003140021042211967140004665100001000040010120054120055120054120042120054
600241200691089101000056000011200541194861094612560016400461000110000300101000010000107987857362786125852012002912005312005311191231124515001030020200001000060658102111005312027212005451500211091040010100001000001010003211000200010100001111100314002992211967140004665100001000040010120054120054120064120437120055
600241200538991110000140010112044511948610946125600164001410001100003057210000100001079878574461561327060120320120053120056111921601124515001030020200001000060020100531000012005312005311500211091040010100001000001010002111000200211000001110003140021192211967140031665100001000040010120054120054120054120054120440

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ldp d0, d1, [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)03050708090a0b0e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
60205120057900101001000020000012004211951210946425601064010210001100003010010000100001079423573559361191721120017012005712005711189731124105010030200200001000060200100001000012005712004111502011009910040100100001000001001000241100020911100001111103211212122119669400040100100001000040100120058120058120058120058120070
6020412005790010100000002010011200421195301094642560106401041000210000301001000010000107936857363746117518112003301200571200411118973112410501003020020000100006020010000100001200571200571150201100991004010010000100000100100023010002084371000011012032112100221196694000410109100001000040100120042120058120058120042120086
6020412005789910100100002010011200421195911094642560106401041000210000301001000010000107942357355936119172112003301200571200571119033112415501003020020000100006020010000100001200571200571150201100991004010010000100000100100022110001080011000011111032112121121196694000410109100001000040100120058120058120058120058120070
60204120035899000000100010100012003611949510945925601034010210000100003010010000100001079552573608461182851120027012005112003511189631124205010030200200001000060200100001000012005112005111502011009910040100100001000001001000000100000311100001111203211217221196754000210109100001000040100120058120042120058120042120083
6020412005789910100100002000011200421195121094642560106401041000210000301001000010000107936857363746117518112010301200571200411118973112410501003020020000100006020010000100001200411200571150201100991004010010000100000100100021110001059001000010100032112108221196564000010109100001000040100120052120052120052120052120080
60204120051899000000000010100012003611950310945925601034010210001100003010010000100001079552573608461183361120027312005112005111188131124175010030200200001000060200100001000012005112005111502011009910040100100001000001001000000100000002110000101000321121082211966540002009100001000040100120052120052120052120036120101
60204120051899000000000000100012003611949510945925601034010210001100003010010000100001079552573608461182851120027012005112005111190031123745010030200200001000060200100001000012005112003511502011009910040100100001000001001000001100000200100000010003211213522119656400020109100001000040100120052120052120052120052120055
60204120051899000000000010000012003611949510945925601034010210001100003010010000100001079552573608461182851120027012003512003511189631123745010030200200001000060200100001000012005112005311502011009910040100100001000001001000001100000300100001010003211210822119656400020109100001000040100120052120105120052120052120076
6020412005189900000000001010001200361194951094592560100401021000110000301001000010000107955257360846118441112002701200511200351118963112417501003020020000100006020010000100001200511200511150201100991004010010000100000100100352110035035089185100311010003604529445121637401960109100001000040100122510122444122511122538120099
6020412003589900000000131301000120037120212110244855604024032610000100003052510157100981091637573608461209391122306012294912315511268158411396658865363282345212108727881175612040124295124301351502011009910040100100001000001001000001100000290682881001910100038183315221196504003610109100001000040100122224123053122803122752120076

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0090

retire (01)cycle (02)0307090a0e0f191e22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5dcache load miss (bf)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
6002512005289910000011001200361194841094592560013400121000110000300101000010000107992157360846125746112002701200511200511119193112450500103002020000100006002010000100001200511200511150021109104001010000100000101000001100000246100001103140699881196694000210109100001000040010120052120052120052120052120052
6002412007289900000010001200361194841094612560013400121000110000300101000010000107985857360846125746012002701200511200511119193112448500103002020000100626002010000100001200511200511150021109104001010000100000101000001100000219100001103140719791196694000210100100001000040010120062120053120052120052120052
6002412011390000000011001200361194841094592560013400121000110000300101000010000107985857360846125746012002701200511200511119193112448500103002020000100006002010000100001200511200511150021109104001010000100001101000001100000255100001103140799971196694000210109100001000040010120052120052120052120052120052
600241200839000000001100120038119484109459256001340012100011000030010100001000010798585736084612574601200270120051120051111919311244850010300202000010000600201000010000120051120051115002110910400101000010000010100000110000023710000110314079978119669400021009100001000040010120052120052120052120052120052
600241200738990000001000120036119484109459256001340012100011000030010100001000010798855736084612574601200270120051120051111919311244850010300202000010000600201000010000120051120051115002110910400101000010000010100000110000015100001103140899771196694000210109100001000040010120052120052120052120052120052
6002412009790000000010001200361194841094592560013400121000110000300101000010000107988557360846125746012002701200511200511119193112448500103002020000100006002010000100001200511200511150021109104001010000100000101000001100000451000011031407999101196694000210109100001000040010120052120052120052120052120052
6002412012289900000011001200361194841094592560013400121000110000300101000010000107989457360846125746012002701200511200541119193112448500103002020000100006002010000100001200511201391150021109104001010000100000101000001100000264100001103140999781196694000210109100001000040010120052120052120052120052120052
60024120090900000100190001200361194841094592560013400121000110000300101000010000107985857360846125746012002701200511200511119193112448500103002020000100006002010000100001200511200511150021109104001010000100000101000001100000225100001103140899891196694000210109100001000040010120052120052120052120052120052
60024120100899001000110012003611948410945925600134001210001100003001010000100001079903573627661257460120027012005112005111192531124485001030020200001000060020100001000012005112005111500211091040010100001000001010000011000000100001103140899991196694000210109100001000040010120052120052120052120052120052
6002412006889900011011001200361194841094592560013400121000110000300101000010000107986757360846125746012002701200511200511119193112449500103002020000100006002010000100001200511200511150021109104001010000100000101000001100000234100001103140799691196694000210109100001000040010120052120052120052120052120052

Test 4: throughput

Count: 8

Code:

  ldp d0, d1, [x6]
  ldp d0, d1, [x6]
  ldp d0, d1, [x6]
  ldp d0, d1, [x6]
  ldp d0, d1, [x6]
  ldp d0, d1, [x6]
  ldp d0, d1, [x6]
  ldp d0, d1, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)0307080a0e0f18191e1f22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16020526731201000010054010126712012019258010010080000100800005001174628026821267272674566543668580100200160000200800002673126727118020210099100100800008000001008000004308003800038800386139430015110216222672801410780000800001002673226728267322673226708
16020426727200000010044010126712212116258010010080000100800005001169085026706267112673966303668980100200160000200800002673126727118020110099100100800008000001008000004308003800038800386139440005110216222672801414780000800001002673226732267322673226708
160204267312000000100000012671221211625801001008000010080000500116908502671526731267076650366858010020016000020080000267312672711802011009910010080000800000100800000430800380003880038610440005110216222672801410780000800001002672826728267082673226728
16020426934200000010045000126712212121625801001008000010080000500116875412671526727267276650366658010020016000020080000267272672711802011009910010080000800000100800000430800390003980038613944000511021622267280140780000800001002673226708267322673226732
16020426731201000010044010126716201219258010010080000100800005001169085026716267312672766303666580100200160000200800002673126727118020110099100100800008000001008000004308003900038800386138430005110216222672801414780000800001002673226732267322673226708
16020426736207000010034400012671621201925801001008000010080000500116908502671426731267316654366658010020016000020080000267312670711802011009910010080000800000100800000008000000038800386136440005110216222672801414780000800001002673226732267322673126732
16020426727201000010044000126716212002580100100800001008000050011686270267022672726731665436685801002001600002008000026727267271180201100991001008000080000010080000043080039000188800386139430005110216222670401410780000800001002672826728267322673226732
160204267272000010100670101267162112192580100100800001008000050011748870267022674926741666036689801002001600002008000026731267071180201100991001008000080000010080000043080054000398000001394400051102162226728000780000800001002670826732267322673226732
16020426727200000010044010126813211192580100100800001008000050011748870267132672726731665036689801002001600002008000026731267271180201100991001008000080000110080000043080000000080038613944000511021622267280010780000800001002673226732267322673226728
1602042673120100001004401012669221119258010010080000100800005001169085026713267312673166503668980100200160000200800002673126727118020110099100100800008000001008000004308000000038800386038440005110216222672401414480000800001002670826732267282673226732

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)03040508090b0e0f18191e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfitlb miss (d4)d5d6dbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160025267452000001001004501002671201211925800101080000108000050116875412670602672726735668236707800102016000020800002672726727118002110910108000080000010800000438003900039800006019431915021114160171726733131358000080000102674126741267192674126719
160024267362011112101004501012671220121825800101080000108000050117462802670202682826709667236707800102016000020800002672726727118002110910108000080000010800000438004000039800006158431915021116160171726711131358000080000102673726737267192671926741
16002426736200110210100000002671221212192580010108000010800005011751621268530267402673666843669480010201600002080000267142674211800211091010800008000001080152200801510012180000613943005021117160171726724141008000080000102672826916267282670826734
16002426727204000000100278800226859001220258001010800001080000501168754126702026738267356672367118001020160000208000026727267311180021109101080000800000108000004380000000080000603943005021116160171726724141048000080000102673226728267322670826708
1600242672720010010010491000326699300192580010108000010800005011677221268680267312673066723670780010201600002080000267272672711800211091010800008000001080000043800390104280039015943190502111616013162672801048000080000102670826728267282670826728
16002426731200100100000450101267122120202580010108000010800005011687541267060267272670766733671180010201600002080000267312670711800211091010800008000001080000043800380003980039603943005021117160171726704101048000080000102672826732268962674226728
16002426731200100100100210103267212771925800101080000108000050116929502670702670726727667236707800102016000020800002672726727118002210910108000080000010800000438000001045800396159441905021117160171726733131358000080000102671526737267372671526738
1600242671420011011010000001267122121202580010108000010800005011746280267070267272672766533670780010201600002080000267272672711800211091010800008000001080000043800390000800006104300502111716016172670401008000080000102672826708267282672826728
16002426707200100100100670103266993071925800101080000108000050116929512671602670726727665336707800102016000020800002672726727118002110910108000080000010800000438000000042800396100005021117160171726724101008000080000102670826732267322672826728
160024267272001001001009300022669937720258001010800001080000501170179026702026801267316672367078001020160000208000026727267271180021109101080000800000108000004380000000080039613943005021117160101626724101048000080000102672826728267282672826728