Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

FJCVTZS

Test 1: uops

Code:

  fjcvtzs w0, d0
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 3.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 2.000

retire (01)cycle (02)031e3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch fp/simd (57)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op fp/simd (7e)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst integer (97)a8accfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
2004104180462530001000200020001800011022104110417453774200020002000104110411110011000007311611103810001000100010421042104210421042
2004104183462530001000200020001800011022104110417453774200020002000104110411110011000007311611103810001000100010421042104210421042
2004104180462530001000200020001800011022104110417453774200020002000104110411110011000007311611103810001000100010421042104210421042
2004104180882530001000200020001800011022104110417453774200020002000104110411110011000007311611103810001000100010421042104210421042
2004104180462530001000200020001800011022104110417453774200020002000104110411110011000007311611103810001000100010421042104210421042
2004104180462530001000200020001800011022104110417453774200020002000104110411110011000007311611103810001000100010421042104210421042
2004104180462530001000200020001800011022104110417453774200020002000104110411110011000037311611103810001000100010421042104210421042
2004104180672530001000200020001800011022104110417453774200020002000104110411110011000007311611103810001000100010421042104210421042
2004104180462530001000200020001800011022104110417453774200020002000104110411110011000007311611103810001000100010421042104210421042
2004104180462530001000200020001800011022104110417453774200020002000104110411110011000007311611103810001000100010421042104210421042

Test 2: Latency 1->2 roundtrip

Code:

  fjcvtzs w0, d0
  fmov d0, x0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0038

retire (01)cycle (02)03070818191e1f3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a7a8a9acc2c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020413003897400000013002312950425401001010020000100001002000010000500528887193220980130013013003813003812568671262413010020010002200062001000220006130038130040112020110099100101001000010001000000000111131701161112960110000100001000010100130039130039130039130039130039
3020413003897400000013002312950425401001010020000100001002000010000500528887193220980130013013008513004012568661262413010020010002200062001000220006130038130038112020110099100101001000010001000000000111131801160112960010000100001000010100130039130039130039130039130039
3020413003897400000013002312950425401001010020000100001002000010000500528887193220980130013013003813003812567931262463010020010000200002001000020000130038130038112020110099100101001000010001000000000000131012162212959010000100001000010100130039130039130039130039130039
3020413003897400000013002312950425401001010020000100001002000010000500528887193220980130013013003813003812567931262553010020010000200002001000020000130038130041112020110099100101001000010001000000000000131012162212959010000100001000010100130039130039130039130039130039
3020413003897400000013002312950425401001010020000100001002000010000500528887193220980130013013003813003812567931262463010020010000200002001000020000130038130038112020110099100101001000010001000000000000131012162212959010000100001000010100130039130039130039130039130039
3020413003897400000013002312950425401001010020000100001002000010000500528887193220980130013013003813003812567931262463010020010000200002001000020000130040130038112020110099100101001000010001000000000000131012162212959010000100001000010100130039130039130039130039130039
3020413003897400000013002312950425401001010020000100001002000010000500528887193220980130013013003813003812567931262463010020010000200002001000020000130077130039112020110099100101001000010001000000000000131012162312959010000100001000010100130039130039130039130039130039
3020413003897400000013002312950425401001010020000100001002000010000500528887193220980130013013003813003812567931262463010020010000200002001000020000130040130038112020110099100101001000010001000000000000131012162212959010000100001000010100130039130039130039130077130039
3020413003897400000013002312950425401001010020000100001002000010000500528887193220980130013013003813003812567931262463010020010000200002001000020000130379130038112020110099100101001000010001000000000000131012172312959010000100001000010100130039130039130039130039130039
3020413003897400000013002312950425401001010020000100001002000010000500528887193220980130013013003813003812567931262463010020010000200002001000020000130052130038112020110099100101001000010001000000000000131012162212959010000100001000010100130039130039130039130039130039

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0038

retire (01)cycle (02)030b18191e3a3f4e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa6a7a8a9acc2branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30024130038974000001300231295042540010100102000010000102000010000505287724932209813001313003813003812570131262683001020100002000020100002000013003813003811200211091010010100001010000030000012701161112959010000100001000010010130039130039130039130039130040
30024130083974000001300231295042540010100102000010000102000010000505287724932209813001313003813003812570131262683001020100002000020100002000013003813003811200211091010010100001010000070000012701161112959010000100001000010010130039130039130039130039130039
30024130038974000001300231295042540010100102000010000102000010000505287724932209813001313003813003812570131262683001020100002000020100002000013003813003811200211091010010100001010000000000012701161112959010000100001000010010130039130039130039130088130039
30024130038974000901300231295042540010100102000010000102000010000505287724932209813001313003813003812570131262683001020100002000020100002000013003813003811200211091010010100001010000000000012701161112959010000100001000010010130039130039130039130039130039
30024130038974000001300231295042540010100102000010000102000010000505287724932209813001313003813003812570131262683001020100472000020100002000013003813003811200211091010010100001010000020001012701161112959010000100001000010010130039130039130039130039130040
30024130038974000001300231295042540010100102000010000102000010000505287724932209813001313003813003812570131262683001020100002000020100002000013003813003811200211091010010100001010000000000012701161112959010000100001000010010130039130039130039130039130039
3002413003897400070201300231295052540010100102000010000102000010000505287724932209813001313003813003812570131262683012520100002000020100002000013003813003811200211091010010100001010000010040012701161112959010000100001000010010130039130039130039130039130039
30024130038974000001300231295042540010100102000010000102000010000505287724932209813001313003813003812570131262683001020100002000020100002000013003813003811200211091010010100001010000000000012701162112959010000100001000010010130039130039130039130039130058
30024130038974055011300231295082540010100112001610010112044410211555297334932927213001313054613020812570631262683001020100002000020100002000013004013003811200211091010010100001010000030000012701161112959010000100001000010010130039130039130039130039130039
30024130038974000001300231295042540010100102000010000102000010000505287724932209813001313003813003812570131262683001020100002000020100002000013003813003811200211091010010100001010000000000012701161112959010000100001000010010130039130039130039130039130039

Test 3: throughput

Count: 8

Code:

  fjcvtzs w0, d8
  fjcvtzs w1, d8
  fjcvtzs w2, d8
  fjcvtzs w3, d8
  fjcvtzs w4, d8
  fjcvtzs w5, d8
  fjcvtzs w6, d8
  fjcvtzs w7, d8
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)030b191e1f3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)73scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa8acc5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160204800576000012034252401008010016000010016000450014400241800220800418004159976075999316010420016001620016001680041800411180201100991008010010000111511701600800388000080000801008009280042800428004280042
16020480041599000034252401008010016000010016000450014400241800220800418004159976075999416010420016001620016001680041800411180201100991008010010000111511701600800388000080000801008004280042800428004280042
16020480041600000034252402498012416000010016000450014400241800220800418004159976075999416010420016001620016001680041800411180201100991008010010000111511601600800388000080000801008004280042800428004280042
16020480041600000035252401008010016000010016000450014400241800220800418004159976075999316010420016001620016001680041800411180201100991008010010000111511701600800388000080000801008004280042800428004280042
160204800415990000699252401008010016000010016000450014400241800220800418004159976075999416010420016001620016001680041800411180201100991008010010000111511701600800388000080000801008004280042800428004280042
160204800416000000224252401008010016000010016000450014400241800220800418004159976075999416010420016001620016001680041800411180201100991008010010000111511601600800388000080000801008004280042800428004280042
16020480041599000034252401008010016000010016000450014400241800220800418004159976075999316010420016001620016001680041800411180201100991008010010060111511711600800388000080000801008004280042800428004280042
16020480041600000035252401008010016005210016000450014400240800220800418004159976075999316010420016001620016001680041800411180201100991008010010000111511701600800388000080000801008004280042800428004280042
16020480041599000034252401008010016000010016000450014400240800220800418004159976075999416010420016001620016001680041800411180201100991008010010000111511601600800388000080000801008004280042800428004280042
16020480041600000034252401008010016000010016000450014400241800220800418004159976075999416010420016001620016001680041800411180201100991008010010009111511701610800388000080000801008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire (01)cycle (02)030818191e1f3a3f51inst issue (52)~issue int (53)~issue fp/simd (54)~dispatch int (56)~dispatch fp/simd (57)huge thing int (59)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op fp/simd (7e)~map lookup int (7f)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)9fa1a8acc2branch mispredict (cb)cfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
160024800415990000001042524001080010160000101600005014400000800220800418008959992360021160010201600002016000080041800411180021109108001010000005020041642800378000080000800108004280042800428004280042
16002480041599000000682524001080010160000101600005014400000800220800418004159992360021160010201600002016000080041800411180021109108001010000005020041624800378000080000800108004280042800428004280042
160024800415990000007412524001080010160000101600005014400000800220800418004159992360021160010201600002016000080041800411180021109108001010000005020021644800378000080000800108004280042800428004280042
160024800415990000006982524001080010160000101600005014400001800220800418004159992360021160010201600002016000080041800411180021109108001010000005020021624800378000080000800108004280042800428004280042
160024800415990000006052524001080010160000101600005014400000800220800418004159992360021160010201600002016000080041800411180021109108001010000005020041624800378000080000800108004280042800428004280042
160024800415990000001302524001080010160000101600005014400000800220800418004159992360021160010201600002016000080041800411180021109108001010000005020041642800378000080000800108004280042800428004280042
160024800415990000006472524001080010160000101600005014400000800220800418004159992360021160010201600002016000080041800411180021109108001010000005020041642800378000080000800108004280042800428004280042
160024800416000000004992524001080010160000101600005014400000800220800418004159992360021160010201600002016000080041800411180021109108001010000005020021624800378000080000800108004280042800428004280042
160024800416000000006682524001080010160000101600005014400000800220800418004159992360021160010201600002016000080041800411180021109108001010000005020041663800378000080000800108004280042800428004280042
1600248004160000000021402524001080010160150101600005014400000800220800418004159992360021160010201600002016000080041800411180021109108001010000005020021624800378000080000800108004280042800428004280042