Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ORR (vector, immediate, 8H)

Test 1: uops

Code:

  orr v0.8h, #1
  movi v0.16b, 1

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 1.000

retire uop (01)cycle (02)0309181e3f4e51schedule uop (52)schedule simd uop (54)dispatch simd uop (57)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map simd uop (7e)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd alu (9a)l1d cache writeback (a8)acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)f5f6f7f8fd
1004203715000342168625100010001000264521120182037203715713189510001000100020372037111001100000073116111786100020382038203820382038
100420371500099168625100010001000264521120182037203715713189510001000100020372037111001100000073116111786100020382038203820382038
1004203715000187168625100010001000264521120182037203715713189510001000100020372037111001100000073116111786100020382038203820382038
1004203716000505168625100010001000264521120182037203715713189510001000100020372037111001100000073116111786100020382038203820382038
1004203715000195168625100010001000264521120182037203715713189510001000100020372037111001100000073116111786100020382038203820382038
100420371600082168625100010001000264521120182037203715713189510001000100020372037111001100000073116111786100020382038203820382038
100420371500084168625100010001000264521120182037203715713189510001000100020372037111001100010073116111786100020382038203820382038
100420371500082168625100010001000264521120182037203715713189510001000100020372037111001100010073116111786100020382038203820382038
1004203716000143168625100010001000264521120182037203715713189510001000100020372037111001100060073116111786100020382038203820382038
100420371600061168625100010001000264521120182037203715713189510001000100020372037111001100050073116111820100020382038203820382038

Test 2: Latency 1->1

Code:

  orr v0.8h, #1
  movi v0.16b, 1

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 2.0037

retire uop (01)cycle (02)03l2 tlb miss data (0b)1e1f3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb miss (a1)l1d cache writeback (a8)a9acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? simd retires (ee)? int retires (ef)f5f6f7f8fd
10204200371500006119686251010010010000100100005002847521020018200372003718428718741101002001000820010008200372003711102011009910010010000100000611171801600198000100001002003820038200382003820038
10204200371500006119686251010010010000100100005002847521020018200372003718428618741101002001000820010008200372003711102011009910010010000100000011171701600198000100001002003820038200382003820038
1020420037150042006119686251010010010000100100005002847521020018200372003718428618740101002001000820010008200372003711102011009910010010000100000011171701600198000100001002003820038200382003820038
10204200851500006119686251010010010000100100005002847521020018200372003718428618740101002001000820010008200372003711102011009910010010000100000011171701600198000100001002003820038200382003820038
10204200371500006119686251010010010000100100005002847521020018200372003718428618741101002001000820010008200372008611102011009910010010000100000011172222422197870100001002003820038200382003820038
102042003715001621089719686251011410210000100100005002849896020018200372003718409618733101002001000020010000200372003711102011009910010010000100000011172222422197870100001002003820038200382003820038
10204200371500009719686251010010010000100100005002847521020018200372003718409618733101002001000020010000200372003711102011009910010010000100050011172222422197870100001002003820038200382003820038
10204200371500009719686251010010010000100100005172847521020018200372003718428718740101002001000820010008200372003711102011009910010010000100000011171701600198010100001002003820038200382003820038
10204200371500006119686251010010010000100100005002847521020018200372003718428718741101002001000820010008200372003711102011009910010010000100010011171801600198010100001002003820038200382003820038
102042003715001206119686251010010010000100100005002847521020018200372003718428618740101002001000820010008200372003711102011009910010010000100000011171801600198010100001002003820038200382003820038

1000 unrolls and 10 iterations

Result (median cycles for code): 2.0037

retire uop (01)cycle (02)03mmu table walk data (08)181e3f4e51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
10024200371500006119686251001010100001010000502847521020018200372003718443318767100102010000201000020037200371110021109101010000100150640316331978610000102003820038200382003820038
1002420037150000611968625100101010000101000050284752102001820037200371844331876710010201000020100002003720037111002110910101000010380640316331978610000102003820038200382003820038
100242003715000061196862510010101000010100005028475210200182003720037184433187671001020100002010000200372003711100211091010100001000640316331978610000102003820038200382003820038
10024200371500006119686251001010100001010000502847521020018200372003718455318767100102010000201000020037200371110021109101010000100156640316331978610000102003820038200382003820038
1002420037150000103196862510010101000010100005028475210200182003720037184433187671001020100002010000200372003711100211091010100001000640316331978610000102003820038200382003820038
100242003715000061196862510010101000010100005028475210200182003720037184433187671001020100002010000200372003711100211091010100001000640316331978610000102003820038200382003820038
1002420037150002461196862510010101000010100005028475210200182003720037184433187671001020100002010000200372003711100211091010100001000640316331978610000102003820038200382003820038
1002420037150000611968625100101010000101000050284752102001820037200371844331876710010201000020100002003720037111002110910101000010072640316331978610000102003820038200382003820038
100242003715000061196862510010101000010100005028475210200182003720037184433187671001020100002010000200372003711100211091010100001000640316331978610000102003820038200382003820038
100242003715000061196862510010101000010100005028475210200182003720037184433187671001020100002010000200372003711100211091010100001009640316331978610000102003820038200382003820038

Test 3: throughput

Count: 8

Code:

  movi v0.16b, 0
  orr v0.8h, #1
  movi v1.16b, 0
  orr v1.8h, #1
  movi v2.16b, 0
  orr v2.8h, #1
  movi v3.16b, 0
  orr v3.8h, #1
  movi v4.16b, 0
  orr v4.8h, #1
  movi v5.16b, 0
  orr v5.8h, #1
  movi v6.16b, 0
  orr v6.8h, #1
  movi v7.16b, 0
  orr v7.8h, #1

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2508

retire uop (01)cycle (02)03191e1f3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020420090151000382580100100800001008000050064000020044200632006332180100200800002008000020063200631116020110099100100160000100061011111611200601600001002006420064200642006420064
160204200631500003825801001008000010080000500640000200442006320063321801002008000020080000200632006311160201100991001001600001000991011111611200601600001002006420064200642006420064
160204200631500150382580100100800001008000050064000020044200632006332180228200800002008000020063200631116020110099100100160000100001011111611200601600001002006420064200642006420064
1602042006315000080258010010080000100800005006400002004420063200633218010020080000200800002006320063111602011009910010016000010001171011111611200601600001002006420064200642006420064
160204200631500360382580100100800001008000050064000020044200632006332180100200800002008000020063200631116020110099100100160000100001011111611200601600001002006420064200642006420064
16020420063150000382580214100800001008000050064000020044200632006332180100200800002008000020063200631116020110099100100160000100001011111611200601600001002006420064200642006420064
16020420063150000382580100100800001008000050064000020044200632006332180100200800002008000020063200631116020110099100100160000100001011111611200601600001002006420064200642006420064
16020420063150000382580100100800001008000050064000020044200632006332180100200800002008000020063200631116020110099100100160000100001011111611200601600001002006420064200642006420064
16020420063150000382580100100800001008000050064000020044200632006332180100200800002008000020063200631116020110099100100160000100091011111611200601600001002006420064200642006420064
160204200631500210382580100100800001008000050064000020044200632006332180100200800002008000020063200631116020110099100100160000100001011111611200601600001002006420064200642006420064

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2506

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e3a3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)ea? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002420068151220007201572580012128000012800006264000011020026200452004532180012208000020800002004520045111600211091010160000100011928100478212724211232420042215160000102004620050200462004620046
1600242004515000000604425800121280000128000062640000115200302004920049321800122080000208000020049200491116002110910101600001000131004811312124422232220046230160000102004620046200462004620046
1600242004515101000018662580012128000012800006264000011520026200452004532180012208000020800002004520045111600211091010160000100412398100468212520211232420132215160000102004620046200502004620046
1600242004515011000017102580012128000012800006264000011520026200452004532180012208000020800002004520045111600211091010160000100000100468212320211222320042215160000102004620046200462004620046
160024200451502200000442580012128000012800006264000011520026200452004532180012208000020800002004520045111600211091010160000100000100488212320211232220042215160000102004620046200502004820046
160024200451501000000502580012128000012800006264000011520026200452004532180012208000020800002004520045111600211091010160000100003100468212220211172520042215160000102004620046200462004620046
160024200451503200000502580012128000012800006264000011520026200452004532180012208000020800002004520045111600211091010160000100020100468212320211232520042215160000102004620046200462004620046
160024200451501200000562580012128000012800006264000011520026200452004532180012208000020800002004520045111600211091010160000100020100488212220211212220042215160000102004620046200462004620046
160024200451500100000502580012128000012800006264000011520026200452004532180012208000020800002004520045111600211091010160000100000100468212320211232320042215160000102004620046200462004620046
160024200451502100000502580012128000012800006264000011520026200452004532180012208000020800002004520045111600211091010160000100000100478212420211232420042215160000102004620046200462004620046

Test 4: throughput

Count: 16

Code:

  orr v0.8h, #1
  orr v1.8h, #1
  orr v2.8h, #1
  orr v3.8h, #1
  orr v4.8h, #1
  orr v5.8h, #1
  orr v6.8h, #1
  orr v7.8h, #1
  orr v8.8h, #1
  orr v9.8h, #1
  orr v10.8h, #1
  orr v11.8h, #1
  orr v12.8h, #1
  orr v13.8h, #1
  orr v14.8h, #1
  orr v15.8h, #1

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)acbranch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020440057300040251601001001600001001600005001120016140019400384003819973319996160100200160000200160000400384008711160201100991001001600001000001011011611400351600001004003940039400394003940039
16020440038301040441602191001600001001600005001120016040019400384003819973319996160100200160000200160000400384003811160201100991001001600001000001011011611400351600001004003940039400394003940039
16020440038299040251601001001600001001600005001120016040019400384003819973319996160100200160000200160000400384003811160201100991001001600001000001011011611400351600001004003940039400394003940039
16020440038300040251601001001600001001600005001120016040019400384003819973319996160100200160000200160000400384003811160201100991001001600001000001011011611400351600001004003940039400394003940039
16020440038299040251601001001600001001600005001120016140019400384003819973319996160100200160000200160000400384003811160201100991001001600001000001011011611400351600001004003940039400394003940039
16020440038300040511601001001600001001600005001120016140019400384003819973319996160100200160000200160000400384003811160201100991001001600001000001011011611400351600001004003940039400394003940039
16020440038300040251601001001600001001600005001120016040019400384003819973319996160100200160000200160000400384003811160201100991001001600001000001011011611400351600001004003940039400394003940039
1602044003830041440251601001001600001001600005001120016140019400384003819973319996160100200160000200160000400384003811160201100991001001600001000001011011611400351600001004003940039400394003940039
16020440038300040251601001001600001001600005001120016140019400384003819973319996160100200160000200160000400384003811160201100991001001600001000001011011611400351600001004003940039400394003940039
160204400383000705251601001001600001001600005001120016040019400384003819973319996160100200160000200160000400384003811160201100991001001600001000001011011611400351600001004003940039400394003940039

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.2502

retire uop (01)cycle (02)031e3f51schedule uop (52)schedule int uop (53)schedule simd uop (54)dispatch int uop (56)dispatch simd uop (57)int uops in schedulers (59)ldst uops in schedulers (5b)5f6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map simd uop (7e)map int uop inputs (7f)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd alu (9a)9fl1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaeb? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600244004830004525160010101600001016000050112001601040019400384003819996320018160010201600002016000040038400381116002110910101600001006100243113916211141640035208160000104003940039400394003940039
1600244003830005125160010101600001016000050112001610040019400384003819996320018160010201600002016000040038400381116002110910101600001000100228411916211131140035208160000104003940039400394003940039
1600244003830004525160010101600001016000050112001610540019400384003819996320018160010201600002016000040038400381116002110910101600001009100223111316211131740035208160000104003940039400394003940039
1600244003830004525160010101600001016000050112001611040019400384003819996320018160010201600002016000040038400381116002110910101600001000100228111316211181440035208160000104003940039400394003940039
1600244003830007825160010101600001016000050112001610540019400384003819996320018160010201600002016000040038400381116002110910101600001000100228411316211141240035208160000104003940039400394003940039
1600244003830005525160010101600001016000050112001611040019400384003819996320018160010201600002016000040038400381116002110910101600001000100223511316211131140035208160000104003940039400394003940039
1600244003830004525160010101600001016000050112001610540019400384003819996320018160010201600002016000040038400381116002110910101600001000100228411316211201540035208160000104003940039400394003940039
1600244003830004525160010101600001016000050112001611540019400384003819996320018160010201600002016000040038400381116002110910101600001000100228411116211131240035208160000104003940039400394003940039
1600244003830004525160010101600001016000050112001611540019400384008819996320018160010201600002016000040038400381116002110910101600001000100223411316211111340035208160000104003940039400394003940039
1600244003830004525160010101600001016000050112001611540019400384003819996320018160010201600002016000040038400381116002110910101600001000100228411616211112040035208160000104003940039400394003940039