Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FCVTZU (scalar, fixed-point, S to X)

Test 1: uops

Code:

  fcvtzu x0, s0, #3
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 2.000

retire (01)cycle (02)033f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst integer (97)a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
200454144325300010002000200018000052254154124832742000200020005415411110011000007311611538100010001000542542542542542
200454144325300010002000200018000152254154124832742000200020005415411110011000007311611538100010001000542542542542542
200454144325300010002000200018000052254154124832742000200020005415411110011000007311611538100010001000542542542542542
200454144325300010002000200018000052254154124832742000200020005415411110011000037311611538100010001000542542542542542
200454144325300010002000200018000052254154124832742000200020005415411110011000007311611538100010001000542542542542542
200454144325300010002000200018000052254154124832742000200020005415411110011000007311611538100010001000542542542542542
200454144325300010002000200018000052254154124832742000200020005415411110011000207311611538100010001000542542542542542
200454144325300010002000200018000052254154124832742000200020005415411110011000107311611538100010001000542542542542542
200454144325300010002000200018000052254154124832742000200020005415411110011000007311611538100010001000542542542542542
200454144325300010002000200018000052254154124832742000200020005415411110011000007311611538100010001000542542542542542

Test 2: Latency 1->2 roundtrip

Code:

  fcvtzu x0, s0, #3
  fmov d0, x0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0038

retire (01)cycle (02)03090b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a7a8a9acc5c9branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130038974000000130023119456254010010100200001000010020000100005006214979148010341130013130038130038125476312624630100200100002000020010000201291300381300381120201100991001010010000100010000000000001310111622129525100000100001000010100130039130039130094130059130039
30204130038974000000130023119417254010010100200001000010020000100005006214979148010340130013130038130038125476312624630100200100002000020010000200001300381300381120201100991001010010000100010000000000001310121622129525100000100001000010100130039130039130112130042130039
30204130038974000000130023119418254010010116200001000010020000100005006214979148010340130013130038130042125476312624630100200100002000020010000200001300381300381120201100991001010010000100010000000000001310121622129525100000100001000010100130039130039130044130090130039
30204130038974000000130023119422254010010100200101000010020000100005006214979148010341130013130038130038125476312624730100200100002000020010000200001300381300381120201100991001010010000100010000000000001310121622129528100000100001000010100130039130039130090130053130039
30204130038974000000130023119417254010010100200001000010020000100005006214979148010340130013130038130038125476312624630100200100002000020010000200001300381300381120201100991001010010000100010000000000001310121622129525100000100001000010100130039130039130097130042130039
30204130038974000000130023119417254010010100200001000010020000100005006214979148010341130013130038130038125476312624630100200100002000020010000200001300381300381120201100991001010010000100010000000000001310111622129525100000100001000010100130039130039130039130113130039
30204130038973000000130023119417254010010100200001000010020000100005006214979148010341130013130038130038125476312624630100200100002000020010000200001300381300381120201100991001010010000100010000000000001310121622129525100000100001000010100130039130039130079130040130039
302041301299740000270130023119417254010010100200001000010020000100005006214979148010341130016130038130038125476312624630100200100002000020010000200001300381300381120201100991001010010000100010000000000101310121622129525100000100001000010100130039130039130114130047130039
30204130038974000000130023119471254010010100200001000010020000100005006214979148010341130013130038130038125476312624630100200100002000020010000200001300381300381120201100991001010010000100010000000000001310121622129548100000100001000010100130039130092130066130041130039
30204130038974000000130023119417254010010100200001000010020000100005006214979148010340130013130038130038125476312624630100200100002000020010000200001300381300381120201100991001010010000100010000000000001310121622129525100000100001000010100130039130039130102130040130039

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0038

retire (01)cycle (02)0308090b191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a7a8a9acc2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3002413003897400000013002311941725400101001020000100001020000100005062150271480047201300131300381300381254983126268300102010000200002010000200001300381300381120021109101001010000100010000000000012702161112984810004100001000010010130128130039130464130039130039
3002413003897411000013002311941725400101001020000100001020000100005062149791480002501300131300381300381254983126268300102010000200002010000200001300381300381120021109101001010000100010000000000012701161112952810000100001000010010130039130039130039130039130039
3002413003897400000013002311941725400101001020000100001020000100005062149791480002501300131300381300381254983126268300102010000200002010000200001300381300381120021109101001010000100010000000000012701161112952510000100001000010010130039130039130039130039130039
3002413003897400000013002311941725400101001020000100001020000100005062149791480002501300131300381300381254983126268300102010000200002010000200001300411300391120021109101001010000100010000000000012701161112952510000100001000010010130039130039130039130039130039
3002413003897400000013002311941725400101001020000100001020000100005062149791480002501300131300381300381254983126268300102010000200002010000200001300381300381120021109101001010000100010000000000012701161112952510000100001000010010130039130039130039130039130039
3002413003897400000013002511941725400101001020000100001020000100005062149791480002501300131300381300381254983126268300102010000200002010000200001300381300381120021109101001010000100010000000000012701161112952510000100001000010010130039130039130039130039130039
3002413003897400000013002311941725400101001020000100001020000100005062149791480002501300131300381300381254983126268300102010000200002010000200001300381300381120021109101001010000100010000000000012701162112952510000100001000010010130039130039130039130039130039
3002413003897300000013002311941725400101001020000100001020000100005062149791480014001300131300381300381254983126268300102010000200002010000200001300381300381120021109101001010000100010000000000012701161112952510000100001000010010130039130039130039130039130039
3002413003897400000013002311941725400101001020000100001020000100005062149791480002501300131300381300381254983126268300102010000200002010000200001300381300381120021109101001010000100010000000000012701162112952510000100001000010010130045130040130376130039130039
3002413003897400000013002311941725400101001020000100001020000100005062149791480002501300131300381300381254983126268300102010000200002010000200001300411300381120021109101001010000100410004010300012701161112952510000100001000010010130039130039130040130039130039

Test 3: throughput

Count: 8

Code:

  fcvtzu x0, s8, #3
  fcvtzu x1, s8, #3
  fcvtzu x2, s8, #3
  fcvtzu x3, s8, #3
  fcvtzu x4, s8, #3
  fcvtzu x5, s8, #3
  fcvtzu x6, s8, #3
  fcvtzu x7, s8, #3
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)031e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa8acc5branch mispredict (cb)cdcfd2d5d6e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602044007730000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000011151170016400388000080000801004004240042400424004240042
16020440041300009122524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000011151170016400388000080000801004004240042400424004240042
16020440041300603225240104801001600041001600205001440132040022040041400411997712200351601202001600322001600324004140041118020110099100801001000011151170016400388000080000801004004240042400424004240042
1602044004130000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000011151170016400388000080000801004004240042400424004240042
1602044004130000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000011151170016400388000080000801004004240042400424004240042
1602044004130000322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000011151170016400388000080000801004004240042400424004240042
1602044004129900322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000011151170016400388000080000801004004240042400424004240042
1602044004130090322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000011151170016400388000080000801004004240042400424004240042
1602044004129900322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000011151170016400388000080000801004004240042400424004240042
1602044004129900322524010480100160004100160020500144013204002204004140041199776199921601202001600322001600324004140041118020110099100801001000011151170016400388000080000801004004240042400424004240042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa0a6a8accfd2icache miss (d3)d5d6d9dbddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600244005529902372524001080010160000101600005014400001400224004140041199963200211600102016000020160000400414004111800211091080010100000502000141603515400388000080000800104004240042400424004240042
160024400413000422524026680010160000101600005014400001400224004140041199963200211600102016000020160000400414004111800211091080010100000502000616001615400388000080000800104004240042400424004240042
160024400413000422524001080010160000101600005014400001400224004140041199963200211600102016000020160000400414004111800211091080010100000502000161600165400388000080000800104004240042400424004240042
1600244004130004225240010800101600001016000050144000014002240041400411999632002116001020160000201600004004140041118002110910800101000005020001616001615400388000080000800104004240042400424004240042
1600244004130004225240010800101600001016000050144000014002240041400411999632002116001020160000201600004004140041118002110910800101000035020001616001615400388000080000800104004240042400424004240042
160024400412999842524001080010160000101600005014400001400224004140041199963200211600102016000020160000400414004111800211091080010100003502000616001615400388000080000800104004240042400424004240042
1600244004130012654924031480010160000101600005014436141400224012240041199963200211600102016020820160000402044004111800211091080010100010502000616001616400388000080000800104004240042400424004240042
160024400413000422524001080010160000101600005014400001400224004140041199963200211600102016000020160000400414004111800211091080010100010502000161600515400388000080000800104004240042400424004240042
1600244004130005172524001080010160000101600005014400001400224004140041199963200211600102016000020160000400414004111800211091080010100000502000161600515400388000080000800104004240042400424004240042
160024400413001242252400108001016000010160000501440000140281400414004120092320021160010201600002016000040041400411180021109108001010000050200061600615400388000080000800104004240042400424004240042