Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FCVTPU (scalar, D to W)

Test 1: uops

Code:

  fcvtpu w0, d0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 2.000

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst integer (97)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
20045414043253000100020002000180000522541541248327420002000200054154111100110007311611538100010001000542542542542542
20045415043253000100020002000180000522541541248327420002000200054154111100110007311611538100010001000542542542542542
20045415643253000100020002000180000522541541248327420002000200054154111100110007311611538100010001000542542542542542
20045414043253000100020002000180000522541541248327420002000200054154111100110007311611538100010001000542542542542542
20045414043253000100020002000180000522541541248327420002000200054154111100110007311611538100010001000542542542542542
20045414043253000100020002000180000522541541248327420002000200054154111100110007311611538100010001000542542542542542
20045414043253000100020002000180000522541541248327420002000200054154111100110007311611538100010001000542542542542542
20045414043253000100020002000180000522541541248327420002000200054154111100110007311611538100010001000542542542542542
20045414043253000100020002000180000522541541248327420002000200054154111100110007311611538100010001000542542542542542
20045414043253000100020002000180000522541541248327420002000200054154111100110007311611538100010001000542542542542542

Test 2: Latency 1->2 roundtrip

Code:

  fcvtpu w0, d0
  fmov d0, x0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0038

retire (01)cycle (02)0304080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)77~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa1a6a8acc2c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30204130038974000000013002311945525401001010020000100001002000010000526621497914801034013001301300381300381254836126241030100200100022000620010002200061300381300381120201100991001010010000100010000002111131801161112953410000100001000010100130039130039130039130042130039
3020413003897400121156013002311941725401001010020000100001002000010000500621502714801034013001301300381300381254763126246030100200100002000020010000200001300381300381120201100991001010010000100010000000000131013162212952510000100001000010100130039130363130039130039130042
30204130038974000000013002311941725401001010020000100001002000010000500621497914801034013001301300381300381254763126249030100200100002061120610186200001301181300381120201100991001010010000100010007042830000131012163312952510000100001000010100130039130039130039130039130039
30204130038974000000013002311941725401001010020000100001002000010000500621497914801034013001701300381300381254763126246030100200100002000020010000200001305191300382120201100991001010010000100010000000000131012163212952510000100001000010100130039130039130039130039130039
302041300389740000012013002311941732401001010020000100001002000010000500621497914801034013001301300381300381254763126246030100200100002000020010000200001300381300381120201100991001010010000100010000000000131012162312952510000100001000010100130039130039130039130039130039
30204130038974000000013002311941825401001010020000100001002000010000500621497914801034013001301300381300381254763126246030100200100002000020010000200001300381300381120201100991001010010000100010000000000131012162312952510000100001000010100130039130039130039130039130039
30204130038974000000013002311941725401001010020000100001002000010000500621497914801034013001301300381300381254763126246030100200100002000020010000200001300381300381120201100991001010010000100010000300000131013163212952510000100001000010100130039130039130039130039130039
30204130038974000000013002311943625401001010020000100001002000010000500621497914801034013001301300381300381254763126246030100200100002000020010000200001300381300381120201100991001010010000100010000000000131013162212952510000100001000010100130039130070130039130039130039
30204130038974000000013002311941725401001010020000100001002000010000500621497914801034013001301300381300381254763126246030100200100002000020010000200001300381300381120201100991001010010000100010000000000131013162212959710000100001000010100130039130039130039130039130039
30204130038974000000013027811941725401001010020000100001002000010000500621502714801034013008701300381300391254763126246030100200100002000020010000200001300381300381120201100991001010010000100010000000000131012162212958610000100001000010100131396131028130696132279132386

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0038

retire (01)cycle (02)03080b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3002413003897400000013002311941725400101001020000100001020000100005062149791480002501130013130038130043125498312626830010201000020000201000020000130038130038112002110910100101000010001000010000012700002160401112952510000100001000010010130039130040130039130039130039
300241300389740000001300231194172540010100102000010000102000010000506214979148003670113008513003813003812550031262683001020100002000020100002000013003813003811200211091010010100001000100000000012700001160001112952510000100001000010010130039130039130039130039130039
300241300389740000001300231194172540010100102000010000102000010000506214979148000250013001413003813003812549831262683001020100002000020100002000013003813003811200211091010010100001000100000000012700001160001112952510000100001000010010130041130039130039130137130039
300241300389740000001300231194172540010100102000010000102000010000506214979148000250013001313003813003812549831262713001020100002000020100002000013003813009311200211091010010100001000100000000012700001160001112961010000100001000010010130039130039130039130039130039
300241300389740000001300231194172540010100102000010000102000010000506214979148000250013001313003813003812549831262683001020100002000020100652000013003813003811200211091010010100001000100000000012700001160001112952510000100001000010010130039130039130039130039130041
300241300389740000001300231194172540010100102000010000102000010000506214979148000250013001313003813003812549831262683001020100002024620100622000013008313009311200211091010010100001000100000000012700001160002112952510000100001000010010130039130039130039130039130039
300241300389740000001300231194172540010100102000010000102000010000506214979148000250113001313003813012912549931262683001020100002000020100002000013003813007811200211091010010100001000100001042880012700001160001112952510000100001000010010130066130042130039130039130039
300241300389740000001300231194172540010100102000010000102000010000506214979148000250013001313003813003812549831262683001020100002000020100002000013006213003911200211091010010100001000100000000012700001160001212952510000100001000010010130039130039130039130039130039
3002413003897400000013002311941725400101001020000100001020000100005062149791480002501130013130038130038125536312626830010201000020131201000020000130038130084112002110910100101000010221001312187480012700001160001112952510000100001000010010130039130039130039130039130039
3002413003897400000013002311941725400101001020000100001020000100005062156991480658900130015130038130038125522912626830180201000020000201000020240130373130144212002110910100101000010001000040912012700001160011112952510000100001000010010130039130039130039130039130039

Test 3: throughput

Count: 8

Code:

  fcvtpu w0, d8
  fcvtpu w1, d8
  fcvtpu w2, d8
  fcvtpu w3, d8
  fcvtpu w4, d8
  fcvtpu w5, d8
  fcvtpu w6, d8
  fcvtpu w7, d8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst ldst (9b)9fa0a8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eb? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602044006430005072524010480100160004100160020500144013214002240041400411997761999216012020016003220016003240041400411180201100991008010001000001115117016004003880000080000801004004240042400424004240042
160204400413000322524010480100160004100160020500144013214002240041400411997761999216012020016003220016003240041400411180201100991008010001000001115117016004003880000080000801004004240042400424004240042
160204400413000322524010480100160004100160020500144013214002240041400411997761999216012020016003220016003240041400411180201100991008010001000001115117016004003880000080000801004004240042400424004240042
160204400413000322524010480100160004100160020500144013214002240041400411997761999216012020016003220016003240041400411180201100991008010001000001115117016004003880000080000801004004240042400424004240042
160204400413000322524010480100160004100160020500144013214002240041400411997761999216012020016003220016003240041400411180201100991008010001000001115117016004003880000080000801004004240042400424004240042
160204400413000322524010480100160004100160020500144013214002240041400411997761999216012020016003220016003240041400411180201100991008010001000001115117016004003880000080000801004004240042400424004240042
160204400413000322524010480100160004100160020500144013214002240041400411997761999216012020016003220016003240041400411180201100991008010001000001115117016004003880000080000801004004240042400424004240042
160204400413000322524010480100160004100160020500144013214002240041400411997761999216012020016003220016003240041400411180201100991008010001000001115117016004003880000080000801004004240042400424004240042
160204400413000322524010480100160004100160020500144013214002240041403601997761999216012020016003220016003240041400411180201100991008010001000001115117016004003880000080000801004004240042400424004240042
160204400413000322524010480100160004100160020500144013214002240041400411997761999216012020016003220016003240041400411180201100991008010001000001115117016004003880000080000801004004240042400424004240042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.5005

retire (01)cycle (02)03181e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)5f60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa8accfd0d2icache miss (d3)d5d6daddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
16002440055311000422524001080010160000101600005014400000040022400414004119996320021160010201600002016000040041400411180021109108001010005020000216011400388000080000800104004240042400424004240042
16002440041311000702524001080010160000101600005014400001140022400414004119996320021160010201600002016000040041401281180021109108001010005038000216011400388000080000800104004240042400424004240042
160024400413100301372524001080010160000101600005014400001040033400414004119996320021160010201600002016000040041400411180021109108001010005020000116011400388000080000800104004240042400424004240042
16002440041311000422524001080010160000101600005014400000040022400414004119996320021160010201600002016000040041400411180021109108001010005020000116011400388000080000800104004240042400424004240042
16002440041321000422524001080010160000101600005014400000040022400414004119996320021160010201600002016000040041400411180021109108001010005020000216011400388000080000800104004240042400424004240042
160024400413210002132524027280303160000101600005014400001140022400414004120019320021160010201600002016021440041400412180021109108001010005020300116011400388000080000800104004240042400424004240042
160024400413100001402524001080010160000101600005014400000040022400414004119996320021160010201600002016020040041400411180021109108001010005020000116011400388000080000800104004240042400424004240042
16002440041310000842524001080010160000101600005014400000040022400414004119996320021160010201600002016000040041400411180021109108001010005020000116011400388000080000800104004240042400424004240042
160024400413100120422524001080010160000101600005014400000040022400414004119996320021160010201600002016000040041400411180021109108001010005020000216011400388000080000800104004240042400424004240042
16002440041310000422524001080010160000101600005014400000040022400414004119996320021160010201600002016000040041400411180021109108001010065020000144011400388000080000800104004240042400424004240042