Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PACDZB

Test 1: uops

Code:

  pacdzb x0
  mov x0, 1

(requires arm64e binary, with arm64e_preview_abi boot arg)

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03181e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)l1d tlb miss (a1)l1d cache writeback (a8)a9acc2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
100470297003001875824251000100010001783301493949702970296623368181000100010007029870111001100000000731851167891000100070307030703070307030
100470297001530615824251000100010001783301493949702970296623368181000100010007029870111001100001000731851167891000100070307030703070307030
1004702970160615824251003100010001783300493949702970296623368181000100010007029870111001100000004731851167891000100070307030703070307030
1004702977000615824251000100010001783300493949702970296635368181000100010007029870111001100000000731851167891000100070307030703070707030
1004706870000615824251000100010001783300493949702970296623368181000100010007029870111001100000200731931167891000100070307030703070307030
10047029660150895824251003100010001783300493949702970296634368181000100010007069870111001100000000731851167891000100070307030703070307030
1004702966000615824251000100010001783300493949702970296623368181000100010007029870111001100000000862851267891000100070307030703070307030
10047029680210615824251000100010001783300493949702970686623368181000100010007029870111001100000000731851168171000100070307030703070307030
10047029670601035824251000100010001783300493949702970296623368181000100010007029870111001100000000731851167891000100070307030703070307030
1004702967000615824251000100010001783300493949702970296623368181000100010007029870111001100000000731851167891000100070307030703070307030

Test 2: Latency 1->1

Code:

  pacdzb x0
  mov x0, 1

(requires arm64e binary, with arm64e_preview_abi boot arg)

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0029

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)l1d cache writeback (a8)a9acbranch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? int retires (ef)f5f6f7f8fd
10204700296760000000615982425102001020010200180833014966949070029700296848036867410200102001020070029912111020110099101000000000007101791169796101000101007003070030700307003070030
1020470029689000001203455982425102001020010200180833004966949070029700296848036867410200102001020070029912111020110099101000000030007101791169796101000101007003070030700307003070030
10204700296790000000615982425102001020010200180833014966949070029700296848036867410200102001020070029912111020110099101000000000007101791169796101004101007003070030700307003070030
1020470029679000001207265982425102001020010200180833004966949070029700296848036867410200102001020070029912111020110099101000001000007101791169796101002101007003070030700307003070030
102047002967900000001035982425102001020010200180833014966949070029700296848036867410200102001020070029912111020110099101000000000007101791169796101000101007003070030700307003070030
102047002967800000901175982425102001020010200180833004966949370029700296848036867410200102001020070029912111020110099101000001000007101791169796101000101007003070030700307003070030
10204700296780000030615982425102001020010200180833004966949070029700296848036867410200102001020070029912111020110099101000000030007101791169796101000101007003070030700307003070030
102047002967900000007265982425102001020010200180833004966949070029700296848036867410200102001020070029912111020110099101000000000007101791169796101000101007003070056700307003070030
1020470029678000002401045982425102001020010200180833004966949070029700296848036867410200102001020070029912111020110099101000000000007101791169796101000101007003070030700307003070030
102047002967700000120615982425102001020010200180833014966949070029700296848036867410200102001020070029912111020110099101000000030007101791169796101000101007003070030700307003070030

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0029

retire uop (01)cycle (02)03mmu table walk data (08)18191e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
10024700296750000098659824251002010020100201807430149669490700297002968502368696100201002010020700298701110021109100100000006400379226980510010100107003070030700307003070030
1002470029679000006159824251002010020100201807430149669490700297002968502368696100201002010020700298701110021109100100000006400279226980510010100107003070030700307003070030
1002470029690000006159824251002010020100201807376098669493700297002968502368696100201002010020700298701110021109100100003006400279226980510010100107003070030700307003070030
10024700296790000072659824251002010020100201807430049669490700297002968502368696100201002010020700298701110021109100100000006400279226980510010100107003070030700307003070030
1002470029679000006159824251002010020100201807430049669490700297002968502368696100201002010020700298701110021109100100000006400279226980510010100107003070030700307003070030
1002470029692000008959824251002010020100201807430149669490700297002968502368696100201002010020700298701110021109100100000006400279226980510010100107003070030700307003070030
1002470029679000006159824251002010020100201807430049669490700297002968502368696100201002010020700298701110021109100100000006400279226980510010100107003070030700307003070030
10024700296910000010359824251002010020100201807430149669490700297002968502368696100201002010020700298701110021109100100000006400279226980510010100107003070030700307003070030
1002470029689000006159824251002010020100201807430149669490700297002968502368696100201002010020700298701110021109100100000006400279226980510010100107003070030700307003070030
10024700296800000025059824251002010020100201807430049669490700297002968502368696100201002010020700298701110021109100100000006400279226980510010100107003070030700307003070030

Test 3: throughput

Count: 8

Code:

  pacdzb x0
  pacdzb x1
  pacdzb x2
  pacdzb x3
  pacdzb x4
  pacdzb x5
  pacdzb x6
  pacdzb x7

(requires arm64e binary, with arm64e_preview_abi boot arg)

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0004

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)ea? int retires (ef)f5f6f7f8fd
80204800636980001008835478020080200802004010000497700080035800356998336998480200802008020080035164118020110099801000000003000051102252380058801210801008003680036800368003680036
80204800817000000208835468020080200802004010000497700180035800356996637001780200802348020080081164118020110099801002000000000051102252280025801000801008003680036800368003680036
802048003575000000120718468020080221802664010000497695580035800356998536998480222802008023380035164118020110099801000201000000051362252280025801000801008003680036800368008280036
802048003575000000132035258020080200802004010000497695580035800356998486998480200802008020080080164118020110099801000000000000051102332280058801000801008003680036800818003680036
8020480035700000000078258022080200802004010000497695580035800356996676998480200802008023280035164218020110099801000220000000051102252280091801000801008003680036800828008180036
8020480035700100000035258020080200802444011070497695580035800806998536998480200802348020080035164118020110099801000000000000051102332280025801210801008003680036800368003680081
8020480035700101101320700258020080200802004011070497695580035800356998536998480200802008023280035164118020110099801000000000200051102252280025801000801008003680081800368003680036
80204800357491101008879258020080200802444010000497695580035800817000336998480200802358020080081164118020110099801000001000000051102252280025801000801008003680036800368003680127
80204800357480000000352580200802008020040100004977001800818008169966166998480200802008020080080164218020110099801000000000000051222332280057801000801008003680036800828003680036
80204800816991001000226258020080200802004010000497695580035800356996636998480222802008020080035164118020110099801002000000000051102332280058801000801008003680036800368008280081

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0004

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eb? int retires (ef)f5f6f7f8fd
8002480040696000011201892580020800208002040010004977000800358003569988370006800208002080020800351641180021109800100000005020172510980024800100800108003680036800368003680036
80024800357000000000165258002080020800204001000497695580035800356998837000680020800208002080035164118002110980010000000502082591080024800100800108003680036800368003680036
8002480081699000100072825800208002080020400100049769558003580035699883700068002080020800208003516411800211098001000010050208259980024800300800108003680036800368003680036
80024800357000000000632580020800208004240010004976955800358003569988370006800208002080020800351641180021109800100001005020102591080024800100800108003680082800368003680036
800248003574900000007632580020800208002040010004976955800358003569988370006800208002080020800351641180021109800102000035020102588800248001020800108003680036800368003680036
800248003569900000120603425800208002080020400100049769558003580080699883700068002080054800208003516411800211098001000010350208259880059800100800108003680036800678003680036
80024800357490000012077478002080020800204001000497695580035800356998837000680020800208002080035164118002110980010000000502082511980024800100800108003680036800368003680036
800248003570000000002902580020800208002040010014976955800358003569988370006800208002080020800351641180021109800100000205020102511980024800100800108003680036800368003680036
8002480035699000001207725800208004180020400100049769558003580035699883700068002080020800208003516411800211098001000010050209259980024800100800108003680036800368003680036
8002480035695000001320352580020800208002040010004976955800358003569988370006800208002080020800351641180021109800100010049050208259980024800100800108003680036800368003680036