Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FCVTPU (scalar, S to W)

Test 1: uops

Code:

  fcvtpu w0, s0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 2.000

retire (01)cycle (02)030b181e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst integer (97)a1a6a8a9accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
2004541400004325300010002000200018000052254154124832742000200020005415411110011000000007311611538100010001000542542542542542
2004541400004325300010002000200018000052254154124832742000200020005415411110011000000007311611538100010001000542542542542542
2004541400004325300010002000200018000052254154124832742000200020005415411110011000000007311611538100010001000542542542542542
2004541500004325300010002000200018000052254154124832742000200020005415411110011000000007311611538100010001000542542542542542
2004541400004325300010002000200018000052254154124832742000200020005415411110011000000007311611538100010001000542542542542542
2004541400004325300010002000200018000152254154124832742000200020005415411110011000000007311611538100010001000542542542542542
2004541400004325300010002000200018000152254154124832742000200020005415411110011000000007311611538100010001000542542542542542
2004541400004325300010002000200018000052254154124832742000200020005415411110011000000007311611538100010001000542542542542542
2004541400004325300010002000200018000052254154124832742000200020005415411110011000000007311611538100010001000542542542542542
2004541400004325300010002000200018000052254154124832742000200020005415411110011000000007311611538100010001000542542542542542

Test 2: Latency 1->2 roundtrip

Code:

  fcvtpu w0, s0
  fmov d0, x0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0038

retire (01)cycle (02)030b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020413003897400000013002311941725401001010020000100001002000010000500621497914801034113001313003813006412547631262463010020010000200002001000020000130038130038212020110099100101001000010000100000000000131012162212952510000100001000010100130039130039130039130077130077
3020413004797400000013002311941725401001010020000100001002000010000500621497914801034113001313003813003812547631262463010020010000200002001000020000130038130038112020110099100101001000010000100000000000131012162212952510000100001000010100130039130039130039130039130039
3020413003897400000013002711941725401001010020000100001002000010000500621497914801034113001313003813003812547631262463010020010000200002001000020000130038130038112020110099100101001000010000100000000000131012162212952510000100001000010100130039130039130039130039130039
3020413003897400000013002311941725401001010020000100001002000010000500621497914801034013001313003813003812547631262463010020010000200002001000020000130038130038112020110099100101001000010000100000000000131012162212952510000100001000010100130039130039130039130039130039
3020413004097400000013002311941725401001010020000100001002000010000500621497914801034113001313003813004312547631262463010020010000200002001000020000130038130038112020110099100101001000010000100000000000131012162212952510000100001000010100130039130111130053130039130039
3020413003897400000013002311941725401001010020000100001002000010000500621497914801034113001313003813003812547631262463010020010060200002001000020000130038130041112020110099100101001000010000100000000000131012162212952510000100001000010100130039130039130039130039130039
3020413003897400000013002311941725401001010020000100001002000010000500621497914801034013001313003813003812547631262463010020210000200002001000020000130038130038112020110099100101001000010000100000000000131012162212952510000100001000010100130039130039130039130039130039
3020413003897400000013002311941725401001010020000100001002000010000500621497914801034013001313003813003812547631262463010020010000200002001000020000130038130038112020110099100101001000010000100000030000131012162212952510000100001000010100130039130039130039130039130039
3020513009697400000013002311941725401001010020004100001002000010000500621497914801266113001313003813003812547631262463010020010000200002001000020000130038130039112020110099100101001000010000100000000000131012162212952510000100001000010100130072130039130039130039130039
3020413003897500000013002311941725401001010020000100001002000010000500621497914801034013001313003813003812547631262463010020010000200002001000020000130038130038112020110099100101001000010000100000000000131012162212952510000100001000010100130039130039130039130039130039

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0038

retire (01)cycle (02)0308090b181e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a8a9acc5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30024130038974000021601301081194172540010100102000010000102000010000506214979148000251130023130038130041125637312626830182201000020000201000020000130038130043112002110910100101000010100000030012701161112952610000100001000010010130039130039130039130040130040
30024130038974000017701300231207808594024410059201201007313244251186265630339715014119113001313004013003812555110012623630010201000020000201000020000130038130038112002110910100101000010100000030012701161112952510000100001000010010130039130039130039130046130039
300241300389740000483521300231194172540010100102000010000102000010000506214979148000251130013130038130040125500312626830010201000020000201000020491130040130038112002110910100101000010100000000012701161112952510000100001000010010130039130039130039130039130039
30024130038974000060301300231194172540010100172000010000102000010000506223695148000250130013130038130038125498312626830010201000020241201000020000130041130039512002110910100101000010100004000012701481112952510000100001000010010130039130039130039130039130039
300241300389740000001300231194172540010100162000010000102000010000506215123148000250130013130038130038125498312632830010201000020000201000020485130038130038112002110910100101000010100001000012701161112952510000100001000010010130039130039130039130039130039
300241300389740000001300231194172540010100102000010000102000010000506214979148000250130015130038130038125498312647230010201000020000201000020000130038130038112002110910100101000010100000000012701161112952510000100001000010010130039130039130039130039130039
300241300389750000001300231194172540010100102000010000102000010000506214979148000250130013130038130038125498312626830010201000020000201000020000130040130039512002110910100101000010100000030012701161112952510000100001000010010130039130039130039130039130039
300241300389740000001300231194172540010100102000010000102000010000506214979148000250130013130038130038125498312626830010201000020000201000020481130038130038112002110910100101000010100003000012702241112952710000100001000010010130039130039130039130039130039
300241300389740000001300231194172540010100102000010000102000010000506214979148000250130013130038130038125498312626830010201000020000201000020000130038130038112002110910100101000010100000000012701161112952510000100001000010010130039130039130039130039130039
300241300389740000001300231194172540010100102000010000102000010000506214979148000250130013130038130038125498312626830010201000020000201000020000130038130038112002110910100101000010100000000012701161112952510000100001000010010130040130039130039130039130039

Test 3: throughput

Count: 8

Code:

  fcvtpu w0, s8
  fcvtpu w1, s8
  fcvtpu w2, s8
  fcvtpu w3, s8
  fcvtpu w4, s8
  fcvtpu w5, s8
  fcvtpu w6, s8
  fcvtpu w7, s8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)0304070a0b18191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa0a1a8a9acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ea? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204400413100110000003225240104801001600041001600205001440132040022400414004119977619992160120200160032200160032400414004111802011009910080100100001001115117116114003880000080000801004012440042400424004240042
160204400413110110000003225240104801001600041001600205001440132040022400414004119977619992160120200160032200160032400414004111802011009910080100100000001115117170114003880000080000801004004240042400424004240042
160204400413100110000003225240104801001600041001600205001440132040022400414004119977619992160120200160032200160032400414004111802011009910080100100000001115117116114003880000080000801004004240042400424004240042
160204400413100110000006025240104801001600041001600205001440132040022400414004119977619992160120200160032200160032400414004111802011009910080100100001031115117116114003880000080000801004004240042400424004240042
16020440041310011000120069725240104801001600041001600205001440132040022400414004119977619992160120200160032200160032400414004111802011009910080100100000001115117116114003880000080000801004004240042400424004240042
160204400413100110000003225240104801001600041001600205001440132040022400414004119977619992160120200160032200160032400414004111802011009910080100100000101115117116114003880000080000801004004240042400424004240042
1602044004131101100000032252401048010016000410016002050014401320408654105141058202701032057716283220416270220216292641081411591518020110099100801001002010875311153431142214094781418080000801004004240205411504116541147
160204412393180110019001077791243310811501613761041618685001461696040785411284091520284562067016316820216290620216166041151411671418020110099100801001000207945311153771151114101980000080000801004004240042400424004240042
16020440123311011013913322640738826124419281370162940106163104596146954604110641285413102035512920400163386200163380206162286412424138818180201100991008010010000001133511151172106114075281250080000801004114541140411714075641071
1602044106631901109910689680592141243304805851621281021614865001460270040405407424004119977619992160120200160032200160032400414004111802011009910080100100001001115231279114049681258080000801004027840827410644076340853

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa0a1a8acc2cdcfd2d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002440055300154272240010800101600001016000050144000004002240041400411999603202061600102016000020160000400414004111800211091080010100000005020011161311400388000080000800104004240042400424004240042
16002440041300070725240010800101600001016000050144000004002240041400411999603200211600102016000020160000400414004111800211091080010100000005020013161111400388000080000800104004240042400424004240042
1600244004129906425240010800101600001016000050144000004002240041400411999603200211600102016000020160000400414004111800211091080010100000005020012161111400388000080000800104004240042400424004240042
16002440041299080225240010800101600001016000050144000004002240041400411999603200211600102016000020160000400414004111800211091080010100000005020010161113400388000080000800104004240042400424004240042
160024400413000422524001080010160000101600005014400000400224004140041199960320021160010201600002016000040041400411180021109108001010000000502001216914400388000080000800104004240042400424004240042
1600244004131004225240010800101600001016000050144000004002240041400411999603200211600102016000020160000400414004111800211091080010100010005020010161110400388000080000800104004240042400424004240042
1600244004130004225240010800101600001016000050144000014002240041400411999603200211600102016000020160000400414004111800211091080010100000005020010161311400388000080000800104004240042400424004240042
1600244004129904225240010800101600001016000050144000004002240041400411999603200211600102016000020160000400414004111800211091080010100000005020011161310400388000080000800104004240042400424004240042
1600244004130004225240010800101600001016000050144000004002240041400411999603200211600102016000020160000400414004111800211091080010100000005020016161011400388000080000800104004240042400424004240042
1600244004130004225240010800101600001016000050144000004002240041400411999603200211600102016000020160000400414004111800211091080010100000005020013161311400388000080000800104004240042400424004240042