Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FCVTMS (scalar, S to X)

Test 1: uops

Code:

  fcvtms x0, s0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 2.000

retire (01)cycle (02)031e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst integer (97)a0a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
200454140043253000100020002000180000522541541248327420002000200054154111100110000007311611538100010001000542542542542542
200454140085253000100020002000180001522541541248327420002000200054154111100110000067311611538100010001000542542542542542
200454150043253000100020002000180001522541541248327420002000200054154111100110000007311611538100010001000542542542542542
200454143043253000100020002000180001522541541248327420002000200054154111100110000007311611538100010001000542542542542542
200454140085253000100020002000180001522541541248327420002000200054154111100110000007311611538100010001000542542542542542
200454140043253000100020002000180001522541541248327420002000200054154111100110000007311611538100010001000542542542542542
2004541400432530001000200020001800015225415412483274200020002000541541111001100000127311611538100010001000542542542542542
200454140043253000100020002000180001522541541248327420002000200054154111100110000007311611538100010001000542542542542542
200454140043253000100020002000180001522541541248327420002000200054154111100110000007311611538100010001000542542542542542
200454140043253000100020002000180001522541541248327420002000200054154111100110000037311611538100010001000542542542542542

Test 2: Latency 1->2 roundtrip

Code:

  fcvtms x0, s0
  fmov d0, x0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0038

retire (01)cycle (02)03080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
302041300389740000981013002311941725401001010020000100001002000010000500621497914801034113001313003813003812547629126259301002021000020000200100002000013003813003811202011009910010100100001000010000000000131012162212952510000100001000010100130041130041130039130039130039
302041300389740031115201300231194172540100101312000010000100200001000050062149791480114601300131300381300381254763126274301002001000020000200100002000013003813003811202011009910010100100001000010000203000131012162212952510000100001000010100130039130039130039130039130039
30204130038973000095101300231194172540100101002000010000100200001000050062149791480103401300131300381300381254763126246301002001000020000200100002000013003813003851202011009910010100100001000010000000000131012162212952510000100001000010100130039130039130039130039130039
302041300389740000001300231194172540100101002000010000100200001000050062151711482197511300151300381300391256183126246301002001000020000200100002000013035713003811202011009910010100100001000010010000400131014162212952510000100001000010100130039130271130039130039130039
302041300389740000132901300231194172540100101002000010000103200001000050062149791480103401300131300381300381254763126246301002001000020483200100002000013003813003811202011009910010100100001000010000000000131012162212952510000100001000010100130039130039130039130039130039
302041300389740000582013002311941725401001010020000100001002000010196500621497914801034013001313003813003812547631262473076620010000200002001000020000130038130038112020110099100101001000010002100000017127000131012162212952510000100001000010100130071130039130039130039130039
302041303719740000001300231194172540100101002000010000100200001000050062149791480103401300131300381300381254763126246301002001000020000200100002000013003813003811202011009910010100100001000010000420000131012162212982210000100001000010100130039130357130041130039130039
302041300389740004001300231194172540100101002000010000100200001000050062149791480103401300131300381300381254763126252301002001024620000200100002000013003813003811202011009910010100100001000010000000000131012162312952510000100001000010100130039130039130039130039130039
302041300389740000001300231194172540100101002000010000100200001000050062149791480103401300131300381300381254763126269301002001000020000200100002000013003813003811202011009910010100100001000010000000000131012162212952510000100001000010100130039130039130039130039130039
30204130038974000196901300231194172540100101002000010000100200001000050062149791480103401300131300381300381254763126246301002001000020000200100002000013003813003811202011009910010100100001000010000000000131015162212952510000100001000010100130361130039130039130039130039

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0038

retire (01)cycle (02)0307080a0b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3002413003810080000000001300231194172540019100102000010000102000010000506214979148000250130013013003813003812549831262683001020100002000020100002000013003813003811200211091010010100001000100000000012706165412952510000100001000010010130039130039130039130039130039
300241300389740000000001300231194172540036100102000010000102000010000506214979148000250130013013050513014012549831262683001020100002000020100002013113003813003811200211091010010100001000100000000012705165412952510000100001000010010130039130039130039130039130039
300241300389740000000001300231194172540010100102000010000102000010000506214979148000251130013013003813003812549831262683001020100002000020100002000013003813003811200211091010010100001000100000000012705165312952510000100001000010010130039130039130039130039130039
300241300419740000000001300231194173340010100102000010000102000010000506214979148000250130013013003813003812549831262683001020100002000020100002000013003813003811200211091010010100001000100000000012704164512952510000100001000010010130039130039130039130039130039
300241300389740000000001300231194172540010100102000010002102000010000506214979148000250130013013003813003812549831262683001020100002000020100002000013003813003911200211091010010100001000100000000012705163512952510000100001000010010130039130039130039130096130039
300241300389740000000001300231194662540010100102000010000102000010000506214979148000250130013013003813003812549831262683001020100002000020100002000013004113003811200211091010010100001000100000000012705165412996510000100001000010010130604130630130479130530130548
300241306389781021000001300231194522540010100132000010000102000010000506214979148000250130023013003813003812549831262683001020100002000020100002000013003813014111200211091010010100001000100000000012703164512952510000100001000010010130039130039130039130039130039
300241300389740000000001300231194552540010100102000010000102000010000556215123148000250130013013003813003812549831262683001020100002000020100002000013003813003811200211091010010100001000100000000012704164412952510000100001000010010130039130039130039130086130039
30024130038974000000672001300961194332540010100102000010000102000010000506214979148000250130013013003813003812549831262683001020100002000020100002000013003813003811200211091010010100001000100000000012704164512952510000100001000010010130039130039130039130118130039
300241300389740000000001300231194182540010100102000010000102000010000506215444148000250130013013003813003812549831262683001020100002000020100002000013003813003811200211091010010100001000100000000012704164512952510000100001000010010130039130039130108130040130039

Test 3: throughput

Count: 8

Code:

  fcvtms x0, s8
  fcvtms x1, s8
  fcvtms x2, s8
  fcvtms x3, s8
  fcvtms x4, s8
  fcvtms x5, s8
  fcvtms x6, s8
  fcvtms x7, s8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03181e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa0a8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ec? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602044006230000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000001115117017004003880000080000801004004240042400424004240042
1602044004130000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000001115117016004003880000080000801004004240042400424004240042
1602044004130000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000001115117016004003880000080000801004004240042400424004240042
16020440041300006972524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000001115117016004003880000080000801004004240042400424004240042
1602044004130000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000001115117016004003880000080000801004004240042400424004240042
1602044004130000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000031115117016004003880000080000801004004240042400424004240042
1602044004130000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000001115117016004003880000080000801004004240042400424004240042
1602044004130000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000101115117016004003880000080000801004004240042400424004240042
1602044004130000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000001115117016004003880000080000801004004240042400424004240042
1602044004130009742524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000001115117016004003880000080000801004004240042400424004240042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)030a18191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa0a1a6a8a9acc2c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024400423000000004225240010800101600001016000050144000004002240041400411999603200211600102016000020160000400414004111800211091080010100000030000502081669400388000080000800104004240042400424004240042
1600244004130000000042252400108001016000010160000501440000040022400414004119996032002116001020160000201600004004140041118002110910800101000000144000502081677400388000080000800104004240042400424004240042
16002440041300000000422524001080010160000101600005014400000400224004140041199960320021160010201600002016000040041400411180021109108001010000002310005020101688400388000080000800104004240042401284004240042
1600244004130000000042252400108001016000010160000501440000040022400414004119996032002116001020160000201600004004140041118002110910800101000000147000502091657400388000080000800104004240042400424004240042
16002440041300000000422524001080010160000101600005014400000400224004140041199960320021160010201600002016000040041400411180021109108001010000009600050205161111400388000080000800104004240042400424004240042
16002440123301000000422524001080010160000101600005014400000400224004140041199960320021160010201600002016000040041400411180021109108001010000001560005020101688400388000080000800104004240042400424004240042
16002440041300000000422524001080010160000101600005014400000400224004140041199960320021160010201600002016000040041400411180021109108001010000001680005020716109400388000080000800104004240042400424004240042
160024400413000000007072524001080010160000101600005014400000400224004140041199960320021160010201600002016000040041400411180021109108001010000001620005020101668400388000080000800104004240042400424004240042
16002440041300000000422524001080010160000101600005014400000400224004140041199960320021160010201600002016000040041400411180021109108001010000002280005020101668400388000080000800104004240042400424004240042
16002440041300000000422524001080010160000101600005014400000400224004140041199960320021160010201600002016000040041400411180021109108001010000003000502071668400388000080000800104004240042400424004240042