Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

PACIZA

Test 1: uops

Code:

  paciza x0
  mov x0, 1

(requires arm64e binary, with arm64e_preview_abi boot arg)

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 1.000

Load/store unit issues: 0.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)031e3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst int alu (97)ld unit uop (a6)l1d cache writeback (a8)accfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
100470297006158242510001000100017833014939497029702966233681810001000100070298701110011000003731851167891000100070307030703070307030
100470297006158242510001000100017833014939497029702966233681810001000100070298701110011000000731851167891000100070307030703070307030
1004702970126158242510001000100017833014939497029702966233681810001000100070298701110011000000731851167891000100070307030703070307030
100470297006158242510001000100017833004939497029702966233681810001000100070298701110011000000731851167891000100070307030703070307030
100470297006158242510001000100017833004939497029702966233681810001000100070298701110011000013731851167891000100070307030703070307030
1004702971010358242510001000100017833004939497029702966233681810001000100070298701110011000000731851167891000100070307030703070307030
100470297036158242510001000100017833004939497029702966233681810001000100070298701110011000000731851167891000100070307030703070307030
1004702970126158242510001000100017833004939497029702966233681810001000100070298701110011000000731851167891000100070307030703070307030
1004702970010958242510001000100017833014939497029702966233681810001000100070298701110011000000731851167891000100070307030703070307030
100470297006158242510001000100017833014939497029702966233681810001000100070298701110011000000731851167891000100070307030703070307030

Test 2: Latency 1->1

Code:

  paciza x0
  mov x0, 1

(requires arm64e binary, with arm64e_preview_abi boot arg)

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code): 7.0029

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)ld unit uop (a6)l1d cache writeback (a8)a9acbranch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
102047002967700000000615982425102001020010200180833014966949070029700296848036867410200102001020007002991211102011009910100000000710179116979610100101007003070030700307003070030
1020470054678000000001035982425102001020010200180833004966949070029700546848036867410200102001020007002991211102011009910100000000710179116979610100101007003070030700307003070030
102047002967900000012061598152510200102001020018083301490070029700296848036867410200102001020007002991211102011009910100000000710179116987510100101007003070030700307003070030
102047002967800000000615982425102001020010200180833004966949070029700296848036867410200102001020007002991211102011009910100010000710179116979610100101007003070030700307003070030
102047002967910000000615982425102001020010200180833004966949070029700296848036867410200102001020007002991211102011009910100010000710179116979610100101007003070030700307003070030
1020470029679000000003195982425102001020010200180833004966949070029700296848036867410200102001020007002991211102011009910100010000710179116979610100101007003070030700307003070030
102047002967800000030615982425102001020010200180833004966949070029700296848036867410200102001020007002991211102011009910100000000710179116979610100101007003070030700307003070030
102047002967900000002161035982425102001020010200180833004966949070029700296848036867410200102001020007002991211102011009910100000000770179116979610100101007003070030700307003070030
102047002967400000000615982425102001020010200180833004966949070029700296848036867410200102001020007002991211102011009910100000000710179116979610100101007003070030700307003070030
102047002968900000000615982425102001020010200180833004966949070029700296848036867410200102001020007002991211102011009910100000000710179116979610100101007003070030700307003070030

1000 unrolls and 10 iterations

Result (median cycles for code): 7.0029

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)18191e1f3a3f4d51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)l1d tlb miss (a1)l1d cache writeback (a8)a9accfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
100247002961700000000117598242510020100201002018074301496694970029700296850236869610020100201002070029870111002110910010000006400379236980510010100107003070030700307003070030
100247002962100000000578598242510020100201002018074301496694970029700296850236869610020100201002070029870111002110910010000006400279226980510010100107003070030700307003070030
10024700296560000000061598242510020100201002018074300496694970029700296850236869610020100201002070029870111002110910010000006400279226980510010100107003070030700307003070030
10024700296200000000061598242510020100201002018074301496694970029700296850236869610020100201002070029870111002110910010000006400279226980510010100107003070030700307003070030
100247002962010000000104598242510020100201002018074301496694970029700296850236869610020100201002070029870111002110910010000006400279226980510010100107003070030700307003070030
10024700296190000000061598242510020100201002018074301496694970029700296850236869610020100201002070029870111002110910010000006400279226980510010100107003070030700307003070030
100247002961900000120061598242510020100201002018074301496694970029700296850236869610020100201002070029870111002110910010000036400279226980510010100107003070030700307003070030
10024700296200000000061598242510020100201002018074300496694970029700296850236869610020100801002070029870111002110910010001006400279226980510010100107003070030700307003070030
10024700296200000000061598242510020100201002018074301496694970029700296850236869610020100201002070029870111002110910010001006400279226980510010100107003070030700307003070030
10024700296211000000061598242510020100201002018074300496694970029700296850236869610020100201002070029870111002110910010000006400279226980510010100107003070030700307003070030

Test 3: throughput

Count: 8

Code:

  paciza x0
  paciza x1
  paciza x2
  paciza x3
  paciza x4
  paciza x5
  paciza x6
  paciza x7

(requires arm64e binary, with arm64e_preview_abi boot arg)

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0004

retire uop (01)cycle (02)03l2 tlb miss data (0b)18191e1f3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? int retires (ef)f5f6f7f8fd
8020480040663000390192258020080200802004010000497695580035800356996636998480200802008020080035164118020110099801000000030005110225228002580100801008003680036800368003680036
8020480035643000120742258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000001030005110225228002580100801008003680036800368003680036
80204800356440001202280258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000001000005110225228002580100801008003680036800368003680036
802048003570200000770258020080200802004010000497695580035800356996636998480200802008020080035164118020110099801000000000005110225228002580100801008003680036800368003680036
802048003570300000467258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000000000005110225228002580100801008003680036800368003680036
8020480035698000750230258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000000000005110225228002580100801008003680036800368003680036
8020480035702000120337258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000000000005110225228002580100801008003680036800368003680036
8020480035702000001274258020080200802004010001497695580035800356996636998480229802008020080035164118020110099801000000000005110225228002580100801008003680036800368003680036
802048003570300000251258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000001000005110225228002580100801008003680036800368003680036
8020480035702000120637258020080200802004010001497695580035800356996636998480200802008020080035164118020110099801000000000005110225228002580100801008003680036800368003680036

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0004

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0f18191e1f3a3f51schedule uop (52)schedule int uop (53)dispatch int uop (56)int uops in schedulers (59)6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map int uop inputs (7f)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst int alu (97)l1d tlb access (a0)l1d tlb miss (a1)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acc2cfd0d2d5map dispatch bubble (d6)dbddfetch restart (de)e0? int output thing (e9)ea? int retires (ef)f5f6f7f8fd
80024800416990000000016325800208002080020400100104976955800358003569988370006800208002080080800351641180021109800100000000050205112501180024800100800108003680036800368003680036
8002480035702000000003525800208002080020400100154976955800358003569988370006800208002080020800351641180021109800100000000050205112501180024800100800108008080081800368003680036
8002480035700000000003525800208002080020400100154976955800358003569988370006800208002080020800351641180021109800100000000050205112501180024800100800108003680036800368003680036
8002480035702000000003525800208002080020400100154976955800358003569988370006800208002080020800351641180021109800100000000050205122501180024800100800108003680036800368003680036
80024800357010000000051025800208002080020400100154976955800358003569988370006800208002080020800351641180021109800100000000050205112501180024800100800108003680036800368003680036
8002480035702000000003525800208002080020400100154976955800358003569988370006800208002080020800351641180021109800100000003050205012501180024800100800108003680036800368003680036
8002480035697000000003525800208002080020400100154976955800358003569988370006800208002080020800351641180021109800100000100050205012502180024800100800108003680036800368003680036
8002480035703000000006325800208002080020400100054976955800358003569988370006800208002080020800351641180021109800100000003050205012501180024800100800108003680036800368003680036
80024800357050000012007743800208002080020400100154976955800358003569988370006800208002080020800351641180021109800100000000050200012501180024800100800108003680036800368003680036
8002480035701000000003525800208002080020400100154976955800358003569988370006800208002080020800351641180021109800100000000050205122501180074800100800108003680036800368003680036