Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

TBX (two register table, 8B)

Test 1: uops

Code:

  tbx v0.8b, { v1.16b, v2.16b }, v3.8b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 2.000

retire (01)cycle (02)03080b1e3f4e51inst issue (52)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a8accfd5d6ddinst fetch restart (de)e0? fp/simd (ee)f5f6f7f8fd
200440373100015636872520002000200055168004018403740373447337702000200060004037403711100110000073316223787200040384038403840384038
20044037310006136872520002000200055168004018403740373447337702000200060004037403711100110000073216223787200040384038403840384038
20044037300006136872520002000200055168004018403740373447337702000200060004037403711100110000073216223787200040384038403840384038
200440373100025136872520002000200055168004018403740373447337702000200060004037403711100110000073216223787200040384038403840384038
20044037300006136872520002000200055168004018403740373447337702000200060004037403711100110000073216223787200040384038403840384038
20044037300006136872520002000200055168014018403740373447337702000200060004037403711100110000073216223787200040384038403840384038
20044037300006136872520002000200055168004018403740373447337702000200060004037403711100110000073216223787200040384038403840384038
20044037300006136872520002000200055168004018403740373447337702000200060004037403711100110000073216223787200040384038403840384038
20044037310006136872520002000200055168014018403740373447337702000200060004037403711100110000073216223787200040384038403840384038
20044037300006136872520002000200055168014018403740373447337702000200060004037403711100110000073216223787200040384038403840384038

Test 2: Latency 1->1

Code:

  tbx v0.8b, { v1.16b, v2.16b }, v3.8b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)031e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa8acbranch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
2020440037300840613968725201001002000010020000500571768004001804003740037371720337495201002002000020060000400374003711102011009910010010000100000710021622397870200001004003840038400384003840038
202044003730000613968725201001002000010020000500571768004001804003740037371720337495201002002000020060000400374003711102011009910010010000100000710021622398250200001004003840038400384003840038
202044003730000613968725201001002000010020000500571768014001804003740037371720337495201002002000020060000400374003711102011009910010010000100000710121622397870200001004003840038400384003840038
202044003730000613968725201001002000010020000500571768004001804003740037371720337495201002002000020060000400374003711102011009910010010000100000710121622397870200001004003840038400384003840038
2020440037300007263968725201001002000010020000500571768004001804003740037371720337495201002002000020060000400374003711102011009910010010000100000712121622397870200001004003840038400384003840038
202044003729900613968725201001002000010020000500571768004001804003740037371720337495201002002000020060000400374003711102011009910010010000100000710121622397870200001004003840038400384003840038
202044003729900613968725201001002000010020000500571768004001804003740037371720337495201002002000020060000400374003711102011009910010010000100000710121622397870200001004003840038400384003840038
202044003730000893968725201001002000010020000500571768014001804003740037371720337495201002002000020260000400374003711102011009910010010000100000710121622397870200001004003840038400384003840038
202044003729960823968725201001002000010020000500571768004001804003740037371720337495201002002000020060504400374003711102011009910010010000100000710121622397870200001004003840038400384003840038
202044003730000613968725201001002000010020000500571768004001804003740037371720337495201002002000020060000400374003711102011009910010010000100000710121622397870200001004003840038400384003840038

1000 unrolls and 10 iterations

Result (median cycles for code): 4.0037

retire (01)cycle (02)030b1e3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a8acc5cfd5d6ddinst fetch restart (de)e0? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
2002440037300006139687252001010200001020000505717680400180400374003737194337517200102020000206000040037400371110021109101010000100000640216223978720000104003840038400384003840038
2002440037300006139687252001010200001020000505717680400180400374003737205337517200102020000206000040037400371110021109101010000100000640216223978720000104003840038400384003840038
2002440037300006139687462001010200001020000505717680400180400374003737194737517200102020000206000040037400371110021109101010000100000640216223978720000104003840038400384003840038
2002440037300006139687252001010200001020000505717680400180400374003737194337517200102020000206000040037400371110021109101010000100001640216223978720000104003840038400384003840038
2002440037300006139687252001010200001020000505717680400180400374003737194337517200102020000206000040037400371110021109101010000100000640216223978720000104003840038400384003840038
2002440037300006139687252001010200001020000505717680400180400374003737194337517200102020000206000040037400371110021109101010000100000640216223978720000104003840038400384003840038
2002440037299006139687252001010200001020000505717680400180400374003737194337517200102020000206000040037400371110021109101010000100000640216223978720000104003840038400384003840038
2002440037300006139687252001010200001020000505717680400180400374003737194337517200102020000206000040037400371110021109101010000100000640216223978720000104003840038400384003840038
2002440037299006139687252001010200001020000505717680400180400374003737194337517200102020000206000040037400371110021109101010000100000640316223978720000104003840038400384003840038
2002440037300006139687252001010200001020000505717680400180400374003737194337517200102020000206000040037400371110021109101010000100000640216223978720000104003840038400384003840038

Test 3: Latency 1->2

Chain cycles: 2

Code:

  movi v0.16b, 0
  tbx v0.8b, { v1.16b, v2.16b }, v3.8b
  add v1.16b, v0.16b, v0.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 2 chain cycles): 4.0038

retire (01)cycle (02)03080b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
40204600904501000141176010565968772301251253000012530000625858762716009106014060038546680354995301252003016220080000600386003811302011009910010030000100400000000001939225225980125400001006003960039600396009160141
4020460038449002000012459687263012512530000125300006258588904160019060448600905464803549953012520230331200817786045460503101302011009910010030000100000010000001910216225980125400001006003960039600396003960039
402046003844900000004415968726301251253000012530000625858762706001906003860038546710354995310472003000020080000600386003811302011009910010030000100000000000001910216225980125400001006003960039600396003960039
402046003845000000006159687263012512530000125300006258587627160019060038600385467102055025301252003000020080000600386003811302011009910010030000100002010418500001910216225980125400001006003960039600396003960039
40204600384500000000615968726301251253000012530000625858762716001906003860038546710354995301252003000020080000600386009011302011009910010030000100000000300001916216225983225400001006003960039600396003960091
402046003845001005071040945968726301251253000012530000625858762716001906003860038546710354995301252003000020080000600386003811302011009910010030000100000000000001910216225980125400001006003960039600396009560039
40204600384500000000615968726301251253000012530000625858762716001906003860038546710354995301252003000020080000600386003811302011009910010030000100000000000001910216225980125400001006003960039600396003960039
40204600384500000000825968726301251253000012530000625858762716001906003860038546710354995301252003000020080000600386003811302011009910010030000100000000300001910216225980125400001006003960039600396003960039
40204600384490000000615968726301251253000012530000625858762716001906003860038546710354995301252003000020080000600386003811302011009910010030000100000000000001910216225980125400001006003960039600396003960039
40204600384490000000615968726301251253000012530000625858762716001906003860038546710354995301252003000020080000600386003811302011009910010030000100000000000001910216225980125400001006003960039600396003960039

1000 unrolls and 10 iterations

Result (median cycles for code, minus 2 chain cycles): 4.0038

retire (01)cycle (02)0307080a0b18191e1f3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4002460038450000000000615968748300131330012133060865858762700601646003860038546903550173001320300002080000602426014111300211091010300001044004000189007173359801340000106003960191600906003960090
40024600384500000000002515968726300131330000133000065858762700600196003860038546933550173001320300002080000600386003811300211091010300001000000000189003173359801340000106003960039600396003960141
4002460398452000011000615968726300131330000133000065858890400600916003860038546933550173001320301642080000600386003811300211091010300001022000000189003173359801340000106003960039600396003960039
40024600384500000000006159687263001313300001330000658587627006009360089600385469111550173016520300002080000600386003811300211091010300001020010000189007173459801340000106003960039600396003960039
40024600384490000000001035968726300131330000133013365858762701600196003860038546933550323001320300002080000600386003811300211091010300001000000000189003173359801340000106003960039600396003960039
4002460038450000000000615968726300131330000133000065858762700600196003860038546883550173001320300002080000600386003811300211091010300001000032000189003173359801340000106003960039600396003960039
4002460038450000000600615968726300131330000133000065858762700600196003860038546933550173001320300002080000600386003811300211091010300001000000000189003173459801340000106003960039600396003960039
4002460038450000000000615968726300131330000133000065858762701600196003860038546933550173001320300002080000600386003811300211091010300001000000000189003173359801340000106003960039600396003960039
4002460038450000000000615968726300131330000133000065858762700600196003860038546933550173001320300002080000600386003811300211091010300001000000000189003177359801340000106003960039600396003960039
4002460038450000000000615968726300131330000133000065858762700600196003860038547003550173001320300002080000600386003811300211091010300001000000000189003173759801340000106003960039600396003960039

Test 4: Latency 1->3

Chain cycles: 2

Code:

  movi v0.16b, 0
  tbx v0.8b, { v1.16b, v2.16b }, v3.8b
  add v2.16b, v0.16b, v0.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 2 chain cycles): 2.0043

retire (01)cycle (02)03081e373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4020440040299012061396822630135125300021253000062557009330400244004340040346893349973012520030000200800004004040043113020110099100100300001000000000001910116113984425400001004004440041400444004140044
402044007031003172939706263012512530001125300006255704752040021400434004034692634996301252003000620080016400404004021302011009910010030000100000002161111917016003985425400001004004140041400414004140041
40204400463000006139722263012512530000125300006255701226040021400404004334695734995301252003000620080016400404004011302011009910010030000100000001291111918016003985225400001004004740041400414004140041
402044004330000261397072630125125300001253000062557041380400514004040040347387349963012520030006200800164008540085113020110099100100300001000000039950001910116113984725400001004004140044400414004140041
4020440040300002613973226301261253000112530000625570783904005140043400433468533499730125200300002008000040070400401130201100991001003000010000000210001910116113987425400001004004140044400444004140047
40204400433000011613973226301611253003612530000625570093304002440043400403469133499730125200300002008000040043400401130201100991001003000010000010210001910116113989725400001004004140041400714007140086
4020440043315000613970726301611253000112530000625570783904006640040400403468933500330125200300002008000040040400401130201100991001003000010000000360001910116113984725400001004007140041400444004440041
40204400403000026139732263012512530002125300006255701151040021400404004034691335042301252003000020080000400434004311302011009910010030000100000001290001910116113984425400001004004740071400414004440041
40204400432990036613972226301251253000212530000625570475204006640040400403471633504230125200300002008000040043400431130201100991001003000010000010240001910116113984725400001004004440044400414004140044
4020440043300003672639732263013512530000125300006255700933040021400434004034685334997301252003000020080000400404008511302011009910010030000100000001260001910116113984725400001004004140044400414004140047

1000 unrolls and 10 iterations

Result (median cycles for code, minus 2 chain cycles): 2.0040

retire (01)cycle (02)030b191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8acbccfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4002440043299000016139707263004913300001330000655700933140021400404004034708335019300132030000208000040046400431130021109101030000100000018903173339843340000104004140044400864004140041
4002440040300000026139707263001413300361330000655704752040021400404010134695335019300132030000208000040040400401130021109101030000100000018905175339843340000104004440041400474004440041
4002440040300000006139707263004913300001330000655701300040021400434004034708335052300132030000208000040040400851130021109101030000100000018902175339888340000104004440041400864004740047
4002440040300000006139707263001313300011330000655704752040066400404004034708335019300132030000208000040040400401130021109101030000100000018904174439888340000104004440044400864004140044
4002440085300000006139707263001413300011330000655701548040066400434004334708335022300132030000208000040040400401130021109101030000100000018904175439843340000104004140041400414004140041
4002440040300000006139732263001313300361330000655700933040021400404004334708335019300132030243208000040043400401130021109101030000100000018903173339846340000104004140086400444008640041
400244004031100210030039732263004913300011330000655700305040027400854004034708335019300132030000208000040040400401130021109101030000100000018904173339888340000104008640044400414008640041
40024400402990000172639707263001313300361330000655705152040021400404004034753335022300132030000208000040040400401130021109101030000100010018906174439843340000104004140044400414004140041
40024400433001000366139710263001313300001330000655700305140021400404004034708335019300132030000208000040085400461130021109101030000100000018904174539843340000104004140044400414004140086
4002440040300000016139707263001313300021330000655700760040021400734004034738335019300132030000208000040046400431130021109101030000100000018902174339846340000104008640041400444004140041

Test 5: Latency 1->4

Chain cycles: 2

Code:

  movi v0.16b, 0
  tbx v0.8b, { v1.16b, v2.16b }, v3.8b
  add v3.16b, v0.16b, v0.16b
  movi v0.16b, 1
  movi v1.16b, 2
  movi v2.16b, 3
  movi v3.16b, 4

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 2 chain cycles): 4.0038

retire (01)cycle (02)030818191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
40204602454500000021935968749301251273000012530000625858762760019600386003854671365499530125200300002008000060038600381130201100991001003000010000001910116115980125400001006003960039600396003960039
40204600384500000061596872630125125300001253000062585876276001960038600385467135499530125200300002008000060038600381130201100991001003000010000001910116115980125400001006003960039600396003960039
40204600384500000098596872630125125300001253000062585876276001960038600385467135499530125200300002008000060038600381130201100991001003000010002001910116115980125400001006003960039600396003960039
4020460038450000121103596872630125125300001253000062585876276001960038600385467135499530125200300002008000060038600381130201100991001003000010000331910116115980125400001006003960039600396003960039
402046003844900054061596872630125125300001253000062585876276001960038600385467135499530125200300002008000060038600381130201100991001003000010000301910116115980125400001006003960039600396003960039
402046003845000000615968726301661253000012530000625858762760019600386003854671354995301252003000020080000600386003811302011009910010030000100003001910151115980125400001006003960095600396003960039
40204600384501000061596872630125125300001253000062585876276001960038600385467135499530125200300002008000060038600381130201100991001003000010000301910116115980125400001006003960039600396003960039
402046003845000000103596872630125125300001253000062585876276001960038600385467135499530125200301662008000060038600381130201100991001003000010000531910116115980125400001006003960039600396003960039
40204600384500000061596872630125125300001253000062585876276001960038600385467135499530125200300002008000060038600381130201100991001003000010000191910116115980125400001006003960039600396003960039
4020460038450000003057596872630125125300001253000062585876276001960038600385467135499530125200300002008000060038600381130201100991001003000010000301910116115994925400001006003960039600396003960039

1000 unrolls and 10 iterations

Result (median cycles for code, minus 2 chain cycles): 4.0038

retire (01)cycle (02)03080b18191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8acc2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
4002460038450000012010359687263001313300001330000658587627160019060038600385469335501730013203000020800006003860038113002110910103000010000001080018903262259877340000106014360039601456003960039
4002460086450010012010359687263001313300001330000658587627160019060038600385469335501730013203000020800006003860038113002110910103000010000001290018902172259801340000106003960039600396003960039
40024600384500000001035968726300131330000133000065858762716001906003860038546933550173001320300002080000600386003811300211091010300001000008130018903172259801340000106003960039600396003960039
4002460038450000000615968726300131330000133000065858762716001906003860038546933550173001320300002080000600386003811300211091010300001000005000018902172259801340000106003960039600396003960039
400246003844900000061596872630013133000013300006585876270600190600386003854693355033300132030000208000060038600381130021109101030000100000000018902172259801340000106003960039600396003960039
400246003844900000061596872630013133000013300006585876271600190600386003854693355017300132030000208000060038600381130021109101030000100000000018902172259801340000106003960039600396003960039
4002460038450000000615968726300131330000133000065858762716001936003860038546933550173001320300002080000600386003811300211091010300001000009290018902172259801340000106003960039600396003960039
400246003845000000061596872630013133000013300006585876271600190600386003854693355017300132030000208000060038600381130021109101030000100000000018902172259801340000106003960039600396003960039
400246003845000000061596872630013133000013300006585876270600190600386003854693355017300132030000208000060038600381130021109101030000100000000018902172259801340000106003960039600396003960039
4002460038449000000726596872630013133000013300006585876270600190600386003854693355017300132030000208000060038600381130021109101030000100000060018902172259801340000106003960039600396003960039

Test 6: throughput

Count: 8

Code:

  movi v0.16b, 0
  tbx v0.8b, { v8.16b, v9.16b }, v10.8b
  movi v1.16b, 0
  tbx v1.8b, { v8.16b, v9.16b }, v10.8b
  movi v2.16b, 0
  tbx v2.8b, { v8.16b, v9.16b }, v10.8b
  movi v3.16b, 0
  tbx v3.8b, { v8.16b, v9.16b }, v10.8b
  movi v4.16b, 0
  tbx v4.8b, { v8.16b, v9.16b }, v10.8b
  movi v5.16b, 0
  tbx v5.8b, { v8.16b, v9.16b }, v10.8b
  movi v6.16b, 0
  tbx v6.8b, { v8.16b, v9.16b }, v10.8b
  movi v7.16b, 0
  tbx v7.8b, { v8.16b, v9.16b }, v10.8b
  movi v8.16b, 9
  movi v9.16b, 10
  movi v10.16b, 11

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03080a1e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa7a8acc5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
2402044006530000001145803700026160125125160000125160000650159996014002404004340043997303100001601252001600002004800004004340043111602011009910010016000010000300101101161140040252400001004004440044400448639140044
24020440043300000011572361026160125125275723125160000650159996014002404004340043997303100001601252001600002004800004004340276111602011009910010016000010000010101101161140040252400001004004440044400444004440044
240204400433000000044026160125125160000125160000650159996014002404004386390997303100001601252001600002004800008643387877111602011009910010016000010000000101101161140040252400001004004440044400444004440044
2402048787730000000677809026160125125160000125160000650159996014002404004386390997303100001601252001600002004800004004340043111602011009910010016000010000000101101161240144162400001004016140158401604016840102
24020440158301100002820671602121251600001251600006501186578718785604020840106997303100001601252001602262004800004004340043111602011009910010016000010001300101101161140040252400001004004440044400444004440044
240204878773000000044026275848125160000125160000650159996014002408787740043997303100001601252001600002004800004004340043111602011009910010016000010000000101101161186268252400001004004440044400444004440044
240204400433000000044026160125125160000125160000650159996014002404004340043997703100001601252001600002004800004004340043111602011009910010016000010001000101101161140040252400001004004440044863914004486391
240204400433000000065026160125125160000125160000650159996014002404004340043997303100001601252001600002004800004004340043111602011009910010016000010000001101101161140040252400001004004440044400444004460230
240204858513000000044026160125125160000125160000650159996004002404004340043997303103291601252001600002004800004004340043111602011009910010016000010000000101101161185501252400001004004440111856874004440044
240204727403000000115763440261601251251600001251600006501599960140024087147400434821403100001601252001600002004800004004340043111602011009910010016000010000000101101161140040252400001004004440044400448613940044

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)0307080b18191e1f373f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f6061696b6d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a8a9acc2cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaeb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
2400244004364200000000497558626160013131600001316000068159996011540024040043400439996031002216001320160000204813204004340043111600211091010160000100000010022812818172111117400403152240000104004440044400444004440044
2400244004364600000000490261600131316000013160000681599960115400240400434004399960310022160013201600002048000040043400431116002110910101600001000000100218322161721115174004031514240000104004440044862494004440044
24002440043300000000007140261600131316000013160000681599960115400240400434004399960310022160013201600002048000040043400431116002110910101600001000000100221141816174221515400403304240000104004440044400444004440044
2400244004330000000000550261600131316000013160000681599960115400240400434004399960310022160013201600002048000040043400431116002110910101600001000000100248518917412816400403302240000104004440044400444004440044
240024400433000000000049026160013131600001316000068159996011586216040043400439996031002216001320160000204800004004340043111600211091010160000100009010021852213174221414400403152240000104004485545400444004440044
240024400433000000000049026160013131600001316000068117076970154002404004340043999603100221600132016000020480000885604004311160021109101016000010000001002411516151721116174004031514240000104004440044400444004440044
2400244004330000000001152034902616001313160000131600006815999601154002434004340043485587656310022160013201600002048000040043862481116002110910101600001000000100241152314172111715400403152240000104004440044400444004440044
2400244004330000000001140734902616001313160000131600006715999601154002408658040043999603100221600132016000020480000400438510811160021109101016000010000001002184189172111315400403152240000104004486249400448623940044
2400244004329900000001152034826026275216131600001316000066159996011540024040043400439996031002216001320160000204800004004340043111600211091010160000100000010022841915172221516400403304240000104004440044400448624940044
2400244004329900000000490261600131327614813160000681180174211540024040043400439996035622716001320160000204800008510840043111600211091010160000100000010024841715172111714400403152240000104004440044400444004440044