Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (16B)

Test 1: uops

Code:

  ld3r { v0.16b, v1.16b, v2.16b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 4.012

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.012

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e1f22243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
64005292782191181130101001045232884400117013401230121000300010005000357454228910290942928231040001000300010003000292112919511610011000100001002331002012110003020012922911668353162548202793101381814494828430163321379114889100030002927129285292752932629258
6400429209219014011000401045382875911017133400930121000300010005000357087228480291352919031040001000300010003000291922919111610011000100001002121003012610002131113009926068633073545202803061382017414428327163101394514976100030002921729307292642938529312
640042923722018113100301046242873610017142400930061000300010005000357732228470290762925931040001000300010003000291872910111610011000100001000031000000310022101112916915069033109643202813090382616434328392162021407314734100030002933029192292382922529269
6400429388218116116100401045192877300017017401230091000300010005000357498228580291042926731040001000300010003000292062921211610011000100011000001000000110002120012878939568433058440202933072381613373828437163091392814800100030002917929273292572936629299
6400429159219015013000100045822879300017139400330121000300010005000357496228380290612918131040001000300010003000292142921311610011000100001002221001001110003020013024917368593019844203613114381719383628372163581395815050100030002928429322292802926329307
6400429287219012015000101047022886600017046400330121000300010005000356341228170290792925331040001000300010003000291762923311610011000100001003321002001110003131012825912868163055842201933090382815424228356161911400015109100030002931929287292092932929283
640042922322011518100301045442870000017090400330151000300010005000357767228250291312926031040001000300010003000291202917911610011000100001003131004000310003131113253927068583094531202293103381613404428297162881409015058100030002927029273292982925729309
6400429300220115017000401045772883200017009401230091000300010005000356443228570291412925231040001000300010003000292182919411610011000100001004331003001310003121112982925168623070438202823092382318354128287163451385015090100030002927129267292902933429315
6400429178218116110100300046162879400017016400330031000300010005000357680229290290622927231040001000300010003000291322911811610011000100011003331003000410002131212701912468133044844203593138381214414228307166151379515129100030002923529290292622927229248
6400429227219111113000500046102872100016984400330121000300010005000357606229220291072922531040001000300010003000291202908011610011000100001002231001011410002131112761925768263016542202943093382217373328354163411421815222100030002924429345292942928129288

Test 2: throughput

Count: 8

Code:

  ld3r { v0.16b, v1.16b, v2.16b }, [x6]
  ld3r { v0.16b, v1.16b, v2.16b }, [x6]
  ld3r { v0.16b, v1.16b, v2.16b }, [x6]
  ld3r { v0.16b, v1.16b, v2.16b }, [x6]
  ld3r { v0.16b, v1.16b, v2.16b }, [x6]
  ld3r { v0.16b, v1.16b, v2.16b }, [x6]
  ld3r { v0.16b, v1.16b, v2.16b }, [x6]
  ld3r { v0.16b, v1.16b, v2.16b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320205800676001110035100800261600025320120100240018800001002400008000050040004628839811800228004180041323320100200800002400002008000024000080041800411180201100991001008000080000010080006728800290030800006129070510911711800381013800002400001008004280042800428004280042
32020480041599101006000800260600025320120100240020800001002400008000050040004628839811800228004180041323320100200800002400002008003324000080041800411180201100991001008000080000110080008828800320129800246170705109117118003801313800002400001008004280042800428004280042
320204800416001010061008002600600253201201002400868000010024000080000500400151288022008002280041800413233201002008000024000020080000240000800418004111802011009910010080000800000100800088278003100298002460728705109117118003811313800002400001008004280042800428004280042
320204800416001100136000800261600025320186100240086800001002400008000050040000128839810800228004180041323320100200800002400002008000024000080041800411180201100991001008000080000010080007608002900318002361728605109117118003801313800002400001008004280042800428004280042
320204800415991000037101800261660025320120100240018800001002400008000050040005428839810800228004180041323320100200800002400002008000024000080041800411180201100991001008000080000110080008827800311131800006129071510911711800381013800002400001008004280042800428004280042
3202048004159911001370008002616600253201201002400798000010024000080000500400000288022008002280041800413233201002008000024000020080000240000800418004111802011009910010080000800001100800077288000700318000060292771510911711800380013800002400001008004280042800428004280042
320204800416001010071008002616600253201831002400868000010024000080000500400046288022008002280041800413233201002008000024000020080000240000800418004111802011009910010080000800000100800077278004800348002361310605109117118003811313800002400001008004280042800428004280042
32020480041600111003510080026160002532018310024008380000100240000800005004000462883981180022800418004132332010020080000240000200800002400008004180041118020110099100100800008000001008000772880031007800230162771510911711800380130800002400001008004280042800428004280042
3202048004159911000360018002616600253201831002400208000010024000080000500400046288398108002280041800413233201002008000024000020080000240000800418004111802011009910010080000800000100800079278003259068000061627715109117118003801313800002400001008004280042800428004280042
320204800415991110070008002616600253201791002400798000010024000080000500400011288403308002280041800413233201002008000024000020080000240000800418004121802011009910010080000800000100800088278003101298002461300715109117118003801313800002400001008004280042800428004280042

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfl1i cache miss demand (d3)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e7eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320025800545990010000017010008002616600253200821024001880000102400008000050400038288288701800228004180041032332001020800002400002080000240000800418004111800211091010800008000011080000015800100011800006001500050190417000428003810980000240000108004280042800428004280042
3200248004159910011000320100080079066002532001010240000800001024000080000504000092880961018002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800077248002600268000060262372050190417000428003806680000240000108004280042800428004280042
3200248004159900000000170100080026106002532008210240075800001024000080000504000382882839018002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800000158000000118001160111500050190417000428003806680000240000108004280042800428004280042
3200248004159900000000160100080026160002532005210240042800001024000080000504000072880000018002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800000158000000148001100111500050190417000248003816080000240000108004280042800428004280042
320024800416000000000017010008002616000253200521024004280000102400008000050400000288096101800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000015800110010800110011000050190417000428003809980000240000108009380042800428004280042
320024800416001101000070000180026006002532005210240042800001024000080000504000082880964018002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800078248002701268000061262471050190417000428003806680000240000108004280042800428004280042
320024800415990000000017010008002616000253200811024007180000102400008000050400033288283901800228004180041032332001020800002400002080000240000800418004111800211091010800008000001080000015800000011800106011000050190217000428003816680000240000108004280042800428004280042
32002480041600000000001701000800261660025320052102400998000010240000800325040017328821920080022800418004103233200102080000240000208000024000080093800411180021109101080000800000108000001580014000800106010000050190417000428003800680000240000108004280042800428004280042
3200248004159900000000160100080026106002532007710240067800001024000080000504000392882774018002280041800410323320010208000024000020800002400008004180041118002110910108000080000010800076248000601268002001252462050190217000348003816680000240000108004280042800428004280042
3200248004159900000000001000800261660025320082102400678000010240000800005040003828827740180022800418004103233200102080000240000208000024000080041800411180021109101080000800000108000000800120010800006001500050190317000248003806680000240000108004280042800428004280042