Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

UCVTF (scalar, fixed-point, D from X)

Test 1: uops

Code:

  ucvtf d0, x0, #3
  mov x0, 1
  mov x1, 2

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 2.000

Issues: 2.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.000

retire (01)cycle (02)031e3f4f51inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst neon or fp (9a)a6a8accfd5d6ddinst fetch restart (de)e0ld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000378377377377378
2004376303610252000100010001000100014075228201357376380723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000380377377377377
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377382377
20043762123610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376203611252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000384387377377377
2004376303610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100003731161137310001000377377377377377
2004376303610252000100010001000100014075228201357379376723109200010001000100010003763761110011000100000731161137310001000377377377377377
2004376203610252000100010001000100014075228201357376376723109200010001000100010003763761110011000100000731161137310001000377377377377377

Test 2: Latency 1->2 roundtrip

Code:

  ucvtf d0, x0, #3
  fmov x0, d0
  mov x0, 1
  mov x1, 2

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03080b18191e1f3a3f464f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a6a8a9acc2c5branch mispredict (cb)cdcfd0d5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
3020413003297400000001300170119408254010010100200001000010020000100005006214497148027091130065013003213003212546631262403010020010000200002001000020000130032130032112020210099100101001000010000100000000000131013162212951910000100001000010100130033130033130033130033130033
3020413003297400000001300170119408254010010100200001000010020000100005006214497148027091130013013003613003212546631262403010020010000200002001000020000130032130032112020110099100101001000010000100000000000131012162212951910000100001000010100130033130071130064130033130033
3020413003297400000001300170119408254010010100200001000010020000100005006214497148027091130147013003213003212546631262483010020010000200002001000020000130032130032112020110099100101001000010000100561416999000001949143183212951910092100001000010100133534132920133572132973133124
3020413359499601384156762552113339401208306584040810206201261007416224318120098426303837150383651132857013003213003212546624812857837116248123252524525612567253391330441336393812020110099100101001000010000100000030000131012162212951910000100001000010100130033130033130033130033130033
3020413003297400000021300170119408254010010100200001000010020000100005006214497148027091130013013003213003212546631262403010020010000200002001000020000130032130069112020110099100101001000010000100000000000131012162212951910000100001000010100130033130033130122130033130033
30204130032974000012001300171119408714012910103200031000010420117100005006214641148032851130013013003213003212546631262423010020010000200002021000020000130032130032112020110099100101001000010000100001000000190512162212951910000100001000010100130033130033133994130828130033
302041300351008000030013001701196611094022910148200271000611921863107845166236263148935581130711013003213134212603494126438322692081122320733200108592000013139413003517120201100991001010010000100221000602424011000149913682413029910041100001000010100130620131706130847131004130641
30204130922988000000013001701194082540100101002000010000100200001014750062144971480270901300130130034130032125466201262403010020010000200002021055220000131375130383112020110099100101001000010000100000000000131012162212951910000100001000010100130033130033130033130033130033
30204130032974000039001300180119408254010010100200001000010020000100005006214497148027090130013013003213003212546631262403010020010000200002001000020000130032130032112020110099100101001000010000100001000000131012162212952110000100001000010100130033130033130038130033130033
302041300329740000000130017011940825401001013020000100001002000010000500621449714802709113001301300321300331254663126240301002001000020000200100002000013003513003411202011009910010100100001000010000001140000132912262312952010000100001000010100130033130117130034130033130033

1000 unrolls and 10 iterations

Result (median cycles for code): 13.0032

retire (01)cycle (02)03080a0b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a6a8acbranch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
30024130032100800000001300171194082540010100102000010000102000010000506214497148005281130013013003213003212548931262623001020100002000020100002000013003213006811200211091010010100001001000000012704162212951910003100001000010010130033130033130033130033130033
30024130032100800000001300171194082540010100122000010000102000010000506214497148005281130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001001000000012702162312951910000100001000010010130033130033130033130033130033
3002413003210081000000130017119408254001010010200001000010200001000050621449714800528113001301300321300321254891011262623001020100002013020100002000013011613003211200211091010010100001001000000012702162212952510000100001000010010130033130033130033130033130033
30024130032100800000001300171194082540010100102000010002102000010000506214497148005281130019013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001001000000012702162312951910000100001000010010130033130033130033130033130033
30024130034100800000901300171194082540010100102000010000102000010000506214497148005281130013013003213003212549231262623001020100002000020100002000013003213003211200211091010010100001001000000012702162212951910000100001000010010130033130033130033130033130033
30024130033100800000001300171194082540017100102000010000102000010000506214497147696351130013013003213003212548931262673001020100002000020100002000013003213003211200211091010010100001001000000012702162212952110000100001000010010130033130033130033130377130385
30024131848102711027293840220013256212050967740213100482009310062152337911519826278550149724791131914013272113264112666415812785934143261188323013221183923026132683132646271200211091010010100001021004411239850204354124813276810065100001000010010133745133370133784133652133318
30024133271103710100001300171194082540010100102000010000102000010000506214497148005281130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001001000000012702162212952010000100001000010010130033130069130033130033130033
30024130032100800000001319981194082540010100102000010000102000010000506214497148005281130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001001000000012702162412956110000100001000010010130033130033130033130033130033
30024130032100800000001300171194082540010100102000010000102000010000506214497148005281130013013003213003212548931262623001020100002000020100002000013003213003211200211091010010100001001000020012702162312951910000100001000010010130033130033130033130033130033

Test 3: throughput

Count: 8

Code:

  ucvtf d0, x8, #3
  ucvtf d1, x8, #3
  ucvtf d2, x8, #3
  ucvtf d3, x8, #3
  ucvtf d4, x8, #3
  ucvtf d5, x8, #3
  ucvtf d6, x8, #3
  ucvtf d7, x8, #3
  mov x8, 9

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)0308090b18191e1f3a3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a7a8a9acc2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)ld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1602042671220700000000267010251601001008000080000100800208001550011689511884163266902670926709663606665816013520080020800202008002080020267092670911802011009910010080000100000800000000011151170160026709080000800001002671026710267102671026710
1602042671120800000000267040251601001008000080000100800208001550011689511884163266902670926709663206665816013520080020800202008002080020267132670911802011009910010080000100000800000000011151170160026706080000800001002671026710267102671026714
1602042670920700000000266940251601001008000080000100800208001550011689511884163266902670926709663206665816013520080020800202008002080020267132670911802011009910010080000100000800000003011151170160026710080000800001002671026710267102671026710
1602042670920700000000266940251601001008000080000100800208001550011689511883887266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100000800000000011151170160026706080000800001002671026710267102671026710
1602042671320700000000266942251601001008000080000100800208001550011689511884163266922670926709663206665816013520080020800202008002080020267092670911802011009910010080000100000800000000011151170160026706080000800001002671026710267102671026710
1602042670920600000000266990251601001008000080000100800208001550011689511884163266902670926709663906665816013520080020800202008002080020267092670911802011009910010080000100000800000003011151170160026706080000800001002671026710267102671026710
1602042670920700000000267090251601001008000080000100800208001550011689511884163266902671026709663206665816013420080020800202008002080020267092670911802011009910010080000100000800000000011151170160026706080000800001002671026710267102671026710
1602042670920700000000266980251601001008000080000100800208001550011689511896532268662671426713663206666216013520080020800202008002080024267092670911802011009910010080000100000800000000011151170160026706080000800001002671026710267102671026710
1602042670920600000000266940251601001008000080000100800208001550011689511884163266902670926709663206665816013520080020800202008002080020267132670911802011009910010080000100000800000100011151170160026706080000800001002671026710267102671026710
1602042670920600000000266940251601001008000080000100800208001550011689511884163266902670926709663206665816013520080020800202008002080020267092670911802011009910010080000100000800000000011151170160026706080000800001002671026710267102671026714

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3339

retire (01)cycle (02)0308090b18191e1f3f4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696d6edispatch stall (70)72scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst neon or fp (9a)9fa0a1a5a6a7a8a9acc2cfd5d6dbddinst fetch restart (de)e0eald/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
1600242670920700000002669402516001010800008000010800008000050116888018840320026690267092670966530366891600102080000800002080000800002670926709118002110910108000010000800000000050201116035267141508000080000102671026710267102671026710
16002426709207000000026694025160010108000080000108000080000501168880188403200266902670926709665303668916001020800008000020800008000026709267091180021109101080000100008000000002506551605326706708000080000102671026710267102671026710
16002426709207000000026695025160010108000080000108000080000501168880188403200266902670926709665303668916001020800008000020800008000026709267091180021109101080000100008000000000502051605326706498000080000102671026710267102671026710
16002426709207000000026697025160010108000080000108000080000501168880188403200266902670926709665303668916001020800008000020800008000026709267091180021109101080000100008000000000502031603526706488000080000102671026710267102671026710
16002426709207000000026694025160010108000080000108000080000501168880188403200266902670926709665303668916001020800008000020800008000026709267091180021109101080000100008000000000502031603526706498000080000102671026710267102671026710
16002426709208000000026694025160010108000080000108000080000501168880188403200266902670926709665303668916001020800008000020800008000026709267091180021109101080000100008000002000502041603626706498000080000102671326710267102671026710
16002427913214001881068704280675743331620901081170811701081489816026112146981942075002790128195280757191179152768016328520817048171620815178151828200280839180021109101080000100029781041002792025202716075278333518000080000102771126722267102671426879
1600242670920700000002819562233416235212810408117014807438142450121913519493870027719280752820472091721667737163289208170481713208189981710282092687810180021109101080000100036080260010793825245510705527982498000080000102820528203282112838327545
16002428326218101591188802281836473341626101080650811701081668816025012186801956823002794528194270407293209165767516292220817048151820817078171028378283271018002110910108000010003298130100078282524279806427978498000080000102820227372280892821028211
160024282012190000000266940251600101080000801301080000805345011688801884032002743427545272126719162133741616255720813328143120815218132927875278729180021109101080000104222480522012609805219512407428167508000080000102836228533285282869328554