Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

DUP (general, 2S)

Test 1: uops

Code:

  dup v0.2s, w0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)03070a1e3a3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
2004375311023600252000100010001000100014075228200357375375723108200010001000100010003753751110011000100000755165537210001000376376376376376
2004375311023600252000100010001000100014075228201356375375723108200010001000100010003754931110011000100000755165537210001000376383381376381
2004375311023600252000100010001000100014075228201358375375723108200010001000100010003753751110011000100000755165537210001000376381376379376
2004375311023600252000100010001000100014075228200356375375723108200010001000100010003753751110011000100000755165537210001000376379376379376
2004375311023600252000100010001000100014075228201356375375723108200010001000100010003753751110011000100000755165537210001000376381376379376
2004375311023600252000103310001000100014075228201356375375723108200010001000100010003753751110011000100000755165537210001000376376376376376
2004375311023600252000100010001000100014075228200363375375723108200010001000100010003753751110011000100003755165537210001000376376378376376
2004375211023600252000100010001000100014075230220356375375723108200010001000100010003753751110011000100000755165537210001000376381376376376
2004375311023600252000100010001000100014075228201357375375723108200010001000100010003753751110011000100000755165537210001000376376376376376
2004375311023600252000100010001000100014075228200365375375723108200010001000100010003753751110011000100000755165537210001000376376376376376

Test 2: Latency 1->2 roundtrip

Code:

  dup v0.2s, w0
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 12.0032

retire (01)cycle (02)0308090b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020412003489900000001200171094562540100101002000010000100200001000050057361761367205311200131200321200321155253116298301002001000020000200100002000012003212003211202011009910010100100001000010000000000131013162311957410000100001000010100120033120033120033120033120033
3020412003289900000001200171094562540100101002000010000100200001000050057356721367216411200131200321200321155253116302301002001000020000200100002000012003212003221202011009910010100100001000010000000000131013163311957410000100001000010100120033120033120033120033120033
3020412003289900000001200171094562540100101002000010000100200001000050057356721367205311200131200321200321155253116240301002001000020000200100002000012003212003211202011009910010100100001000010000000000131013163311957410000100001000010100120033120033120033120033120033
3020412003289900000001200181094562540100101002000010000100200001000050057356721367205311200131200321200321155253116294301002001000020000200100002000012003212003211202011009910010100100001000010000000000131013163311957410000100001000010100120033120033120033120033120033
3020412003289900000001200171094562540100101002000010000100200001000050057356721367205311200131200321200321155253116249301002021000020000200100002000012003212003221202011009910010100100001000010000010600131013253511957410000100001000010100120033120033120130120036120034
3020412003289900000001200171094562540100101002000010000100200001000050057356721367205311200131200321200321155253116312301002001000020000200100002000012003212003211202011009910010100100001000010000000000131013163311957410000100001000010100120033120033120033120033120033
3020412003289900000001200171094562540100101002000010000100200001000050057356721367205311200131200791200351155253116304301002001000020000200100002000012003212003211202011009910010100100001000010000000000131014163311957410000100001000010100120033120033120033120033120033
3020412003289900000001200171094562540100101002000010000100200001000050057356721367205311200131200691200321155253116240301002001000020000200100002000012003212003211202011009910010100100001000010000000000131013163311957410000100001000010100120033120033120033120033120033
3020412003289900000001200171094562540100101002000010000100200001000050057356721367238611200181200321200321155253116240301002001000020000200100002000012003212003211202011009910010100100001000010000000000131013163311957410000100001000010100120033120033120033120033120033
3020412003289900000001200171094562540100101002000010000100200001000050057356721367205311200131200321200321155253116312301252001000020000200100002000012003212003211202011009910010100100001000010000000000131013163311957410000100001000010100120035120298120197120033120033

1000 unrolls and 10 iterations

Result (median cycles for code): 12.0032

retire (01)cycle (02)0318191e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30024120032899001051200171094562540010100102000010000102000010000505735672136700290120016012003212003211554831162623001020100002000020100002000012003212003211200211091010010100001010000000012701161111957410000100001000010010120033120033120033120033120033
30024120032899008611200171094562540010100102000010000102000010000505735672136700290120013012003212003211554831162623001020100002000020100002000012003212003211200211091010010100001010000000012702161111963710000100001000010010120033120033120033120033120033
30024120032899007861200181094562540010100102000010000102000010000505735672136700291120013012003212003211554831163163001020100002000020100002000012003212003211200211091010010100001010000000012701161211957410000100001000010010120033120033120033120033120033
30024120032899008851200171094562540010100102000010000102000010000505735672136700290120013012003212003211554831162623001020100002000020100002000012003212003211200211091010010100001010000100012701161111957410000100001000010010120033120033120033120033120033
30024120032899008821200171094562540010100102000010000102000010000505735672136700290120013012003212003211554891162693001020100002000020100002000012003212003211200211091010010100001010000000012701161111957410000100001000010010120033120033120033120033120033
30024120032899008221200171094562540010100102000010000102000010000505735672136700290120013012003212003211554831162623001020100002000020100002000012011412003811200211091010010100001010000000012701161111957410000100001000010010120033120033120033120033120038
30024120032899003901200171094562540010100102000010000102000010000505735672136700290120013012003212003211554831162623001020100002000020100002000012003212003211200211091010010100001010000000012701161111957410000100001000010010120033120041120033120033120033
30024120032900005551200171094562540010100102000010000102000010000505735672136700291120013012003212003211554831162623001020100002000020100002000012003212003211200211091010010100001010000000012701161111957410000100001000010010120033120033120033120033120033
30024120032899009541200171094562540010100102000010000102000010000505735672136700290120013012003212003211554831162623001020100002000020100002000012003212003211200211091010010100001010000000012701161111957410000100001000010010120033120033120033120033120033
30024120032899008311200171094562540010100102000010000102000010000505735672136700290120013012003212003211554831164633001020100002000020100002000012003212003211200211091010010100001010000000012701161111957410000100001000010010120033120033120033120033120033

Test 3: throughput

Count: 8

Code:

  dup v0.2s, w8
  dup v1.2s, w8
  dup v2.2s, w8
  dup v3.2s, w8
  dup v4.2s, w8
  dup v5.2s, w8
  dup v6.2s, w8
  dup v7.2s, w8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)03070a1e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204267102000172266932516010010080000800001008002080015500116895118841632682226724270946642666731601352008002080020200800208002026708267081180201100991001008000010080000001115117116112670580000800001002671626709267092670926709
16020426708200110266932516010010080000800001008002080015500116895118841632681126717267086632666571601352008002080020200800208002026708267081180201100991001008000010080000001115117116112670580000800001002670926709267092670926709
16020426708200110266932516010010080000800001008002080015500116895118841632678626719267176648666571601352008002080020200800208002026708267081180201100991001008000010080000001115117116112670580000800001002670926709267092670926709
160204267082001115266932516010010080000800001008002080015500116895118841632680526708267356652666571601352008002080020200800208002026708267081180201100991001008000010080000001115117116112670580000800001002670926709267092670926709
16020426708200110266932516010010080000800001008002080015500116895118841632668926708268716721666571601352008002080020200800208002026708267081180201100991001008000010080000001115117116112670580000800001002670926709267092670926709
16020426708200110266932516010010080000800001008002080015500116895118841632682727352267086632666571601352008002080020200800208002026708267081180201100991001008000010080000001115117116112670580000800001002670926709267092670926709
16020426708200110266932516010010080000800001008002080015500116895118841632681826717267136632666571601352008002080020200800208002026708267081180201100991001008000010080000001115117116112670580000800001002670926709267092670926709
16020426708206110266932516010010080000800001008002080015500116895118841632680826721267146632666571601352008002080020200800208002026708267081180201100991001008000010080000001115117116112670580000800001002670926709267092670926709
16020426708200110266932516010010080000800001008002080015500116895118841632680526716267186634666571601352008002080020200800208002026708267111180201100991001008000010080000001115117116112670580000800001002670926709267092670926709
16020426708200110266932516010010080000800001008002080015500116895118841632675726714267176647666571601352008002080020200800208002026708267081180201100991001008000010080000001115117116112670580000800001002670926709267092670926709

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3338

retire (01)cycle (02)031e3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa5a6acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002426713200026693025160010108000080000108000080000501168880188403212675826800267106653036688160010208000080000208000080000267082670811800211091010800001008000000502031611267058000080000102670926709267092670926709
16002426708200026693025160010108000080000108000080000501168880188403212675826710267126657036695160010208000080000208000080000267082670811800211091010800001008000000502011612267058000080000102670926709267092670926709
16002426708200026693025160010108000080000108000080000501168880188403202677826782267086658036688160010208000080000208000080000267082670811800211091010800001008000000502011611267058000080000102670926709267092670926709
16002426708200026693025160010108000080000108000080000501168880188403202684726717267086653036688160010208000080000208000080000267082670811800211091010800001008000000502011611267058000080000102670926709267092670926709
16002426708200026693025160010108000080000108000080000501168880188403202687726716267086653036688160010208000080000208000080000267082670811800211091010800001008000000502011611267058000080000102670926709267092670926709
160024267082000266930251600101080000800001080000800005011688801884032026757267142671766670366881600102080000800002080000800002670826708118002110910108000010080000121502011611267058000080000102670926709267092670926709
160024267082000266930251600101080000800001080000800005011688801884032026689267082689366598936691160010208000080000208000080000267082670811800211091010800001008000000502011611267058000080000102670926709267092670926709
160024267082001226693025160010108000080000108000080000501168880188856512675426708267226672036688160010208000080000208000080000267082670811800211091010800001008000000502011611267058000080000102670926709267092670926709
16002426708200026693025160010108000080000108000080000501168880188403202675126715267176666036688160010208000080000208000080000267082670811800211091010800001008000000502011611267058000080000102670926709267092670926709
16002426708200026693025160010108000080000108000080000501168880188403212676226715267156663036688160010208000080000208000080000267082670811800211091010800001008000000502011611267058000080000102670926709267092670926709