Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDP (signed offset, Q)

Test 1: uops

Code:

  ldp q0, q1, [x6, #0x10]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030e0f1e22233a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)6061696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)cfd0d2d5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
2005727611450016930121220252000200020002959710702731727405346020002000200072870811100110001000020000203903920396104373002161172410042000709709728728728
200472760045101712012121625200020002000295970070272770842534602000200020007277271110011000100002000020390402039613943730011611773101042000728728729728728
200470860045001713212121625200020002000296240070272870840534602000200020007087301110011000100002000432039002000613943730011611724101042000728728709709728
20047086004500171201212162520002000200028591007027497344083441200020002000708708111001100010000200043200003820396139437300116117250002000728728729728709
2004727500300069301212162520002000200028591007027317274053460200020002000727727111001100010000200002039042203900390730011611724101042000709709728728728
200472760045000712212121625200020002000295970070472773242534412000200020007277271110011000100002000020390392039613943730011611725101002000709709728709711
2004727500000171221212162520002000200029597006837277274253442200020002000728728111001100010000200043203903920006104373001161172401042000728728728709728
20047275000101712212121625200020002000295970068373671442534602000200020007087271110011000100002000020390392040613943730011611724101042000729728709709729
200472850045000712212002520002000200029597007027277274053460200020002000727728111001100010000200002039002000613943730011611705101042000728728728729709
2004708500451006932121216252000200020002957400702734727405346020002000200072772711100110001000020004320390020396104373001161172410042000709728709709709

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldp q0, q1, [x6, #0x10]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0053

retire (01)cycle (02)0305080b0e0f181e1f22243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
60205120053899102000401011200389671810973025701034010410004200003010010000200001042694657335323465684012002901200421200531121413112511601003020020000100006020020062100001203841200572150201100991004010010000100000100200033020003022200002202200321011611119826400020052000040100120054120054120054120042120042
60204120053899101000200011200389671810974225701034010210002200023010010000200001042587357337723465768012002901200531200411121413112511601003020020000100636020020000100001200531200531150201100991004010010000100000100200024220002038200002222100321011611119826400040052000040100120054120054120054120042120042
60204120041899101000200001200269671810974634701064010410001200003010010000200001042691757335323465684112002901200531200411121293112518601003020020000100006020020000100001200411200411150201100991004010010000100001100200022220002002200002222000321011611119826400046052000040100120054120042120054120054120054
60204120041899100000201001200269671810974225701034010410002200003010010000200001042691757335323465336012001701200531200531121293112499601003020020000100636020020000100001200411200531150201100991004010010000100001100200022220003002200002222200321011611119814400046052000040100120143120054120042120054120054
602041200418991111102300011200389671810974225701064010210002200003010010000200001042691757335803465712012001701200591200531121413112511601003020020000100006020020000100001200531200531150201100991004010010000100000100200032220002015200002222000321011611119877400026052000040100120054120056120054120042120042
602041200539021021023133521011200389671810973025701034010410002200003010010000200001042691757335323465713012001701200411200411121413112511601003020020000100006020020000100001200531200531150201100991004010010000100000100200022220003102200002202100321011642119814400046652000040100120042120042120042120054120042
60204120053899111000401011200389670610973025701034010410002200003010010000200001042691757335323465336012002901200411200531121413112499601003020020000100006020020000100001200531200531150201100991004010010000100000100200043220003002200002222000321011611119826400046052000040100120054120054120054120042120054
60204120041899110000200011200439671810974225701064010410002200003010010000200001042587357335323467454012002901200531200411121293112511601003020020000100006020020000100001200531200531150201100991004010010000100000100200032020002012200000222000321011611119826400040052000040100120042120042120054120058120054
60204120053899100000401011200439670610973025701064010410001200003010010000200001042691757335323465684012002901200531200531121293112499601003020020000100006020020000100001200411200531150201100991004010010000100001100200022220002002200002202000321011611119826400046002000040100120054120042120054120054120042
60204120053899110000201001200619671810973025701064010410002200003010010000200001042691757329563465336012001701200531200531121413112511601003020020000100006020020000100001200411200531150201100991004010010000100000100200022220003002200000222200321011611119826400046652000040100120042120042120054120054120054

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0065

retire (01)cycle (02)03050708090a0b0e0f18191e1f2223243a3f43494d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
600251200598990001000000200000120044209664910981225700164001210002200003001010000200001042467857330523465118012003512004312005911217031125396001030020200001000060020200001000012004312005911500211091040010100001000011020000042000200220002222000003140616871198344000401092000040010120044120060120060120060120060
6002412005989900000010004000001200442296649109748257001940016100032000030010100002000010426592573334034652920120025120065120049112176311252960010300202000010000600202000010000120065120065115002110910400101000010000110200024420005017200022422000031408168911984040004101092000040010120070120050120070120066120066
600241200659001100010000600000120050229665510975425700164001610003200003001010000200001042659257341083465756012004112006512006511217631125456001030020200001000060020200001000012006512006511500211091040010100001000001020003242000600420002042200003140816781198244000401002000040010120066120066120066120066120066
600241200659001010010000400000120050229665510975425700194001610003200003001010000200001042659257341083465756012004112006512006511217631125456001030020200001000060020200001000012004912006511500211091040010100001000001020006402000500420002242200003140816781198244000601092000040010120066120066120068120066120066
6002412006589910100101016000001200342296655109754257001940016100032000030010100002000010426592573410834657560120041120049120065112176311254560010300202000010000600202000010000120049120049115002110910400101000010000110200023020006024200022422100031408167811984040006101092000040010120050120069120066120066120067
60024120049899101000000040000012005022966391097382570019400141000320000300101000020000104252005734108346529201200251200661200651121603112529600103002020000100006002020000100001200491200651150021109104001010000100000102000224200041042000224220000314081610711984040006101092000040010120066120066120066120066120066
6002412006589910000100004000001200502296655109754257001940016100032000030010100312000010426592573410834652921120041120065120065112176311254560010300202000010000600202000010000120065120049115002110910400101000010000010200022020006024200022402100031407167611984040004101092000040010120066120066120050120066120066
60024120065899100001000060100012005022966551097382570016400161000320000300101000020000104265925733340346575601200251200651200651121763112545600103002020000100006002020000100001200651200491150021109104001010000100000102000224200050142000224220000314081610811984040006101092000040010120050120066120050120066120066
60024120065900110000000032000001200502296655109754257001940016100032000030010100002000010426592573410834652920120025120065120065112160311252960010300202000010000600202000010000120065120049115002110910400101000010000010200032420005104200022422000031407167811984040006101092000040010120069120066120066120050120050
600241200498991110000000400000120050229663910975425700194001610003200003001010000200001042520057341083465756012004112006512006511217631125296001030020200001000060020200001003312006512006511500211091040010100001000001020003242000400420002042220003140816761198404000610092000040010120066120050120066120066120050

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ldp q0, q1, [x6, #0x10]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0056

retire (01)cycle (02)03080b0e0f18191e1f2223243a3f43494d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
6020512005289900000020100112002800967171097542570103401021000120000301001000020000104268305733052346565511200191200581200561121463112504601003020020000100006095620000100001200591200521150201100991004010010000100001100200000220002000020000202032104161111982940002101092000040100120053120053120044120057120053
602041200568990000002000011200410096721109741257016640102100012000030100100002000010426830573367634658871120036120060120043112131311251460100302002000010000602002000010000120052120052115020110099100401001000010000010020000022000001002000022203210116221198294000201092000040100120053120053120053120053120057
6020412005289900000020000112004120967171097452570103401021000120000301001000020000104275265733052346577111200191200561200431121313112514601003020020000100006020020000100001200521200521150201100991004010010000100000100200000220002000220000022032101161211982940002014132000040100120044120057120057120063120057
6020412005689900000020010012002800967211097452570103401021000120000301001000020000104260475733676346539411200301200521200521121443112501601003020020000100006020020000100001200521200521150201100991004010010000100000100200000220000000220000202032101162111981640002141402000040100120435120045120057120044120044
60204120057902000000400101120028209670810974525701064010210001200003010010000200001042604757334843465394112003212006012005611214431125016010030200200001000060200200001000012005212005211502011009910040100100001000001002000004200000300200002020321011612119816400021410132000040100120044120057120057120057120057
60204120043899000000200001120028209671710974525701034010210001200003010010000200001042752657336763465394112003212005212004311214431125016010030200200001000060200200001000012005212005211502011009910040100100001000001002000002200000000200022020321024922119829400121010132000040100120058120057120046120057120053
602041200569000000002001001200410096717109732257010340102100012000030100100002020010426217573367634656551120032120056120056112144311251460100302002000010000602002000010000120052120052115020110099100401001000010000110020000022000000032000022003210116121198334000201402000040100120057120053120057120057120053
60204120046899000000200001120037009672110973225701034010210001200003010010000200001042717857336763465655112002812005912005611214431125146010030200200001000060200200001000012005912004311502011009910040100100001000001002000002200000700200000220321011622119829400021014132000040100120044120044120044120053120057
60204120056899000000200100120045209672110974525701034010210002200003010010000200001042683057330523465771112002812006012005611214431125146010030200200001000060200200001000012005212005211502011009910040100100001000001002000002200000002200002000321011612119829400021010132000040100120057120053120057120057120057
60204120052899001100200001120041009672510974525701034010210001200003010010000200001042717857330523465655112003212005212004311213131125016010030200200001000060382200001000012005612004311502011009910040100100001000001002000002200020000200002000321011612119829400021010132000040100120057120061120045120044120057

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0048

retire (01)cycle (02)03080e181e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5dcache load miss (bf)branch mispredict (cb)cfd0d2icache miss (d3)d5d6d9ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
60025120048899000140001120020966421097242570013400121000120000300101000020000104251135733292346526301200111200521200351121633112515600103002020000100006002020000100001200481200481150021109104001010000100000102000022000000200002003140000416232119823400020052000040010120053120053120053120036120053
60024120035899000000001200209664210974125700134001010001200003001010000200001042546157332923465263012001112009712003511216331125286001030020200001000060020200001000012005212003511500211091040010100001000001020000220000002000022031400002160221198274000010002000040010120053120053120053120053120036
60024120035899000200001200379664210973725700134001210001200003001010000200001042511357326663465263012002812005212005211216331125326001030020200001000060020200001000012003512004811500211091040010100001000001020000020000002000000031400002160221198274000201002000040010120053120049120049120036120036
60024120035899000201011200209664210974125700134001210001200003001010000200001042397457343743466479012002812003512005211214631125326001030020200001000060020200001000012004812005211500211091040010100001000001020000020000002000022031400002160331198234000210092000040010120049120036120036120049120053
600241200528990001401011200209664210973725700134001010001200003001010000200001042546157326663465263012002812003512003511216331125156001030020200001000060020200001000012005212004811500211091040010100001000001020000220000002000022031401400216033119827400006602000040010120036120036120036120036120036
6002412003589900020000120037966381097372570010400101000020000300101003120000104239745733484346488001200111200521200351121633112532600103002020000100006002020000100001200521200481150021109104001010000100000102000022000000200000203140000216032119810400000652000040010120036120053120053120053120049
6002412003589900020001120037966421097412570013400121000020000300101003320000104239745732666346537901200281200521200561121513112532600103002020000100006002020000100001200351200481150021109104001010000100000102000002000003200002203140000216122119810400020052000040010120049120053120053120036120036
600241200528990009010012003793975109737257001340012100002000030010100002000010423974573348434648800120028120035120052112163311251560010300202000010000600202000010000120048120048115002110910400101000010000010200002200000020000020314000021612211982740000101052000040010120053120053120053120049120049
6002412004889900017000112003793975109741257001040012100012000030010100002000010425461573348434652630120011120052120035112163311251560010300202000010000600202000010000120052120035115002110910400101000010000010200002200000020000200314000021602211981040000101002000040010120036120036120036120053120049
6002412005289900020001120037939751097372570010400121000120000300101000020000104239745732666346488001200241200351200351121593112532600103002020000100006002020000100001200481200481150021109104001010000100001102000022000000200002203140000216123119827400020692000040010120053120036120036120053120053

Test 4: throughput

Count: 8

Code:

  ldp q0, q1, [x6, #0x10]
  ldp q0, q1, [x6, #0x10]
  ldp q0, q1, [x6, #0x10]
  ldp q0, q1, [x6, #0x10]
  ldp q0, q1, [x6, #0x10]
  ldp q0, q1, [x6, #0x10]
  ldp q0, q1, [x6, #0x10]
  ldp q0, q1, [x6, #0x10]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6674

retire (01)cycle (02)0307090e0f18191e2223243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
160205533964000000004400015338321212162516010010016000010016000050023345351533845339453374332963333521601002001600002001600005339853394118020110099100100800008000011001600004316003803916003961394305110116115337101041600001005339553375533955339553395
160204533984000000004500005337921212025160100100160000100160000500233396215350853395533943331733335216010020016000020016000053374533741180201100991001008000080000010016000043160038038160000613943051101161153391101001600001005337553395533955337553375
160204533744000000004500005335921211625160100100160000100160000500233453515347853410534043329633333216010020016000020016000053374533741180201100991001008000080000010016000043160039042160039613943051101161153391101041600001005339553399533755337553395
1602045339840000000045000153379212121925160100100160000100160000500233396215343853394533953331733333216010020016000020016000053398533741180201100991001008000080000010016000043160000039160039613943051101161153391101041600001005337853399533755339553569
160204533944000000004500015337921121625160100100160000100160000500233396215343353394533943331733333216010020016000020016000053394533941180201100991001008000080000010016000001600390411600396139005110116115339110041600001005339553375533755339553375
160204533944000000004500015337921212162516010010016000010016000050023339621535395340353383333283333521601002001600002001600005339453394118020110099100100800008000001001600004316000003916003960394305110116115339101041600001005339553395533755339953396
160204533944000000000000153379201202516010010016000010016000050023339620533825337453394332963333321601002001600002001600005339453394118020110099100100800008000011001600004316003903916003960390051101161153391141001600001005339553375533955340053395
1602045339440000000045100153383012121625160100100160000100160000500233289705351553394533943329633333216010020016000020016000053374533941180201100991001008000080000010016000043160039039160039613943051101161153391101001600001005339553375533755339953395
1602045339440000000045100053379312016251601001001600001001600005002333962153509533745342133320333332160100200160000200160000533945339411802011009910010080000800000100160000016000003916003961390051101161153397101041600001005339553403535485354553399
160204533943990001009000015335931201625160100100160000100160000500233396215338053394533743332333336016010020016000020016000053394533941180201100991001008000080000110016000043160039082716003961394305110116115354301001600001005356353379533955339953395

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6675

retire (01)cycle (02)030508090a0b0e0f18191e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6dbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
160025533994001110100006501035338401818162516001010160000101600005023339261533560533995339833344333379160010201600002016000053399533811180021109101080000800000101600192142016005920121160038615801905020171607135339602962160000105339953400533995340053400
16002453398400110010000650101533842180162516001010160000101600005023339261533560533995338133344333361160010201600002016000053381533811180021109101080000800000101600192000160057101211600386157401925020131601214533960092160000105340053382534005339953400
160024533993991100000006501035338321818162516001010160000101600005023339261533560533995339833344333378160010201600002016000053398533981180021109101080000800000101600202042016001900059160038615501905089131601312533960092160000105340053400534005340053400
16002453381400101010000210102533842181812516001010160000101600005023329971533560533985338133344333378160010201600002016000053398533991180021109101080000800000101600202142016005610221160038015742190502081601211533960990160000105338253382534005338253400
1600245338140011101000021000153384200025160010101600001016000050234981715337405338153399333443333791600102016000020160000533995339811800211091010800008000001016002121420160058102591600000119421915020121601010533960090160000105339953383534005339953399
160024533994001100000002101005338301818162516001010160000101600005023329971533740533815339833344333378160010201600002016000053398533981180021109101080000800000101600192042016005700059160038015742191502012160910533780990160000105338553400533825340053400
1600245339940010100000021000253366201812516001010160000101600005023553671533740533985339933343333379160010201600002016000053399533981180021109101080000800000101600192042016001900059160038605701915020121601212533780002160000105339953382533825340053382
1600245338140011001000065000353383018181725160010101600001016000050233299715337405338153398333263333781600102016000020160000534015339911800211091010800008000001016002019420160057101591600380157421915052121601112533960092160000105338253400534005339953399
16002453381400111010000210101533660018025160010101600001016000050233999915336005339953382333433333981600102016000020160000533995338111800211091010800008000001016001920420160058000241600380118421815037122501212533960090160000105339953382535565355053542
160024535504001110110007700035338701818184916014010160000101600005023452851533740536955355233344333378160010201600002016000053398533811180021109101080000800001101600201900160019100211600386157401915020131601113533810990160000105339953400534005339953382