Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PACIZB

Test 1: uops

Code:

  pacizb x0
  mov x0, 1

(requires arm64e binary, with arm64e_preview_abi boot arg)

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acl1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? int retires (ef)f5f6f7f8fd
100470297000001201255824251000100010001783300493949702970296623368181000100010007029870111001100000000073185216789100000100070307030703070307030
1004702970000000965824251000100010001783301493949702970296623368181000100010007029870111001100000000073185116789100000100070307030703070307030
10047029700000001915824251000100010001783301493949702970296623368181000100010007029870111001100000100073185116789100000100070307030703070307030
1004702963000000615824251000100010001783300493949702970296623368181000100010007029870111001100000003073185116789100000100070307030703070307030
1004702970000014101265824251000100010001783301493949702970296623368181000100010007029870111001100002000073185116789100000100070307030703070307030
1004702970000000615824251000100010001783301493949702970296623368181000100010007029870111001100000000073185116789100000100070307030703070307030
1004702970000000615824251000100010001783301493949702970296623368181000100010007029870111001100000003073185116789100000100070307030703070307030
1004702970000000615824251000100010001783300493949702970296623368181000100010007029870111001100000000073185116789100000100070307030703070307030
1004702970000000615824251000100010001783301493949702970296623368181000100010007029870111001100000000073185116789100000100070307030703070307030
1004702970000000615824251000100010001783300493949702970296623368181000100010007029870111001100000000073185216789100000100070307030703070307030

Test 2: Latency 1->1

Code:

  pacizb x0
  mov x0, 1

(requires arm64e binary, with arm64e_preview_abi boot arg)

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0029

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)l1d cache writeback (a8)acbranch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10204700296190615982425102001020010200180833004966949700297002968480368674102001020010200700299121110201100991010001007100179116979610100101007003070072700307003070030
10204700296220615982425102001020010200180833004966949700297002968480368674102001020010200700299121110201100991010000307100179116979610100101007003070030700307003070030
10204700296520665982425102001020010200180833004966949700297002968480368674102001024810200700299121110201100991010001007100179116979610100101007003070030700307003070030
10204700296229615982425102001020010200180833004966949700297002968480368674102001020010200700299121110201100991010000007100179116979610100101007003070030700307003070030
10204700296220615982425102001020010200180833004966949700297002968480368674102001020010200700299121110201100991010005007100179116979610100101007003070030700307003070030
10204700296180615982425102001020010200180833004966949700297002968480368674102001020010200700299121110201100991010000307100179116979610100101007003070030700307003070030
10204700296230615982425102001020010200180833004966949700297002968480368674102001020010200700299121110201100991010002007100179116979610100101007003070030700307003070030
10204700296220615982447102031020010200180833004966949700297002968480368674102001020010200700299121110201100991010000007100179116979610100101007003070030700307003070030
102047002964901035982425102001020010200180833004966949700297002968480368674102001020010200700299121110201100991010001007100279116979610100101007003070030700307003070030
1020470029618053659824251020010200102001808330049669497002970029684803686741020010200102007002991211102011009910100242007100179116979610100101007003070030700307003070030

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0029

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)acbranch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10024700296190000000008459824251002010020100201807430049669497002970029685023686961002010020100207002987011100211091001000130640479226980510010100107003070030700307003070030
10024700296520000000006159824251002010020100201807430049669497002970029685023686961002010020100207002987011100211091001000000640279226980510010100107003070030700307003070030
100247002961800000000015659824251002010020100201807430149669497002970029685028686221002010020100207002987011100211091001000000640379226980510010100107003070030700307003070030
100247002962200000000010359824251002010020100201807430049669497002970029685023686961002010020100207002987011100211091001000001640279226980510010100107003070030700307018570030
10024700296220000000006159824251002010020100201807430149669497002970029685023686961002010020100207002987011100211091001000000642279226980510010100107003070030700307003070030
100247002965100000012006159824251002010020100201807430049669497002970029685023686961002010020100207002987011100211091001000000640279226980510010100107003070030700307003070030
10024700296230000000006159824251002010020100201807430149669497002970029685023686961002010020100207002987011100211091001000000640279226980510010100107003070030700307003070030
10024700296230000000006159824251002010020100201807430049639037002970029685023686961002010020100207002987011100211091001000000640279226980510010100107003070030700307003070030
10024700296220000000006159824251002010020100201807430049669497002970029685023686961002010020100207002987011100211091001000000640279226980510010100107003070190700707003070030
10024700296220000000006159824251002010020100201807430149669497002970029685023686961002010020100207002987011100211091001000030640279226980510010100107003070030700307003070030

Test 3: throughput

Count: 8

Code:

  pacizb x0
  pacizb x1
  pacizb x2
  pacizb x3
  pacizb x4
  pacizb x5
  pacizb x6
  pacizb x7

(requires arm64e binary, with arm64e_preview_abi boot arg)

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0004

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)18191e1f3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)ld unit uop (a6)l1d cache writeback (a8)a9acc2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
802048004069900001500077258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000000005110225238002580100801008003680036800368003680036
80204800357000020000324258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000009005110225228002580100801008003680036800368003680036
8020480035700000000035478020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000006005110225228002580100801008012780036800368003680036
8020480035695000030035258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000000005110225228002580100801008003680036800368003680036
80204800356991000120035258020080200802004010001497695580035800356996636998480200802008023380035164118020110099801003000005110225228002580100801008008280036800368003680036
8020480035696000000035258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000000005110225228005880100801008003680036800368003680036
8020480035750000000077258020080200802004010000497695580035800356996636998480200802008020080035164118020110099801002000005110225228002580100801008003680082800368003680036
80204800357000000000700258020080200802004010001497695580035800356996636998480200802008020080081164118020110099801000000005110225228002580100801008003680036800368003680036
80204800357000100000520258020080200802004010000497695580035800356996636998480200802008020080035164118020110099801000003005110225228002580100801008003680036800368003680036
8020480035700000000098258020080200802004010001497700180035800356996636998480200802008020080035164118020110099801000000015110225228002580100801008003680036800368003680036

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0004

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)91inst int alu (97)l1d cache writeback (a8)accfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? int retires (ef)f5f6f7f8fd
8002480041699000000035258002080020800204001001497695580035800356998837000680020800208002080035164118002110908001014050200006256380024800100800108003680036800368003680036
8002480035701000000021725800208002080020400322149769558003580035699883700068002080020800208003516411800211090800101050200006256380024800100800108003680036800368003680036
800248003574600000006325800208002080020400100049769558003580035699883700068002080020800208003516411800211090800100050200003253680024800100800108003680036800368003680036
80024800357000000027030025800208002080020400100049769558003580035699883700068002080020800208003516411800211090800100050200003253680024800100800108003680036800368003680036
80024800357060000000612580020800208002040010014976955800358003569988370006800208002080020800351641180021109080010005467000731515881247807250800108162081575813478176081713
8002481579709000000020325800208002080020400100149769558003580035699883700068002080020800208003516411800211090800100050200006256380024800100800108003680036800368003680036
800248003570200000004025800208002080020400100149769558003580035699883700068002080020800208003516411800211090800100050200003253680024800100800108003680036800368003680036
8002480035702000001205625800208002080020400100149769558003580035699883700068002080020800208003516411800211090800100050200006257380024800100800108003680036800368003680036
800248003570200000120101525800208002080020400100149769558003580035699883700068002080020800208003516411800211090800101350200003253680024800100800108003680036800368003680036
8002480035703000002703525800208002080020400100049769558003580035699883700068002080020800208003516411800211090800100050200003253680024800102800108003680036800368003680036