Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (post-index, 8B)

Test 1: uops

Code:

  ld3r { v0.8b, v1.8b, v2.8b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.009

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f61696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
64005287812241200111100300004778283621016519500910003003100010003000100050005000357040140228750285862869331050001000300020003000286242851611610011000100001002331003002410002121101323895036968321215819553319038202757552804610001489512808136311000300010002871528660287612870828743
64004286942221111101100301004694282850016555500910003003100010003000100050005000356280210228610284972864931050001000300020003000285192846011610011000100001002221003000110000121201324495076952318006019654321638172759632805410001547312394136511000300010002865128683287152858728580
64004286062231211011100300004681282610016509500910003012100010003000100050005000357640148228440285542870131050001000300020003000286352854811610011000100001002121002001110002101101330195846964315106019708319238172665602799410001554112566133641000300010002865228695286032873128669
64004287152321100111100300004757283940016532500910003009100010003000100050005000357050200228480285342872531050001000300020003000285862853311610011000100001003001002002110002121201314296476950326405519718320538282064562810210001543812634137741000300010002863528668287642877428699
64004287482231111211000100004984282550016586500910003009100010003000100050005000356340140228650286662875231050001000300020003000285392861811610011000100011003301002001010000121101328194517004319805919640322838372466642822710001527112716140201000300010002864528643286952888928623
64004286572231111210000130000485028241001640550031000300910001000300010005000500035713050228500285812859731050001000300020003000285322871611610011000100001002201002001110002131001337996906907318406119693316838282259642812910001542912507134961000300010002869828641286182862428616
64004286752221211211000300004798283960016528500310003003100010003000100050005000357090148228850283842869631050001000300020003000285022860311610011000100001002221003000110002101101340495596958329305819641317138261854612816010001536512661136351000300010002867028681286602871628591
6400428748223121101100010000481028422001653150091000300310001000300010005000500035632050228930286042869831050001000300020003000286552855211610011000100001002221002001110002101101331194717059320505619765318538292466592822410001523312703137761000300010002870528758287172870128750
64004286832221011101100150100474828310001647150091000300910001000300010005000500035706079228490286202867031050001000300020003000285532874311610011000100001001121002001410002101001340597327018312706119651317438241755552821010001539212750135891000300010002872128616286602869128686
6400428648223111101110010000485128422001659750031000300910001000300010005000500035710019228640285772873631050001000300020003000286552860511610011000100001000221003001110002101101353295157010317405819557318138251463652816010001551412768134711000300010002873728784285712876328733

Test 2: throughput

Count: 8

Code:

  ld3r { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3r { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3r { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3r { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3r { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3r { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3r { v0.8b, v1.8b, v2.8b }, [x6], x8
  ld3r { v0.8b, v1.8b, v2.8b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3202058006962100000000001008002716609254001508010024005180000801002400008000048049947999831214938002380042800420324400100200800002400002001600002400008004280042118020110099100100800008000001008011101880014001680014611318005109117118003908000013080000240000801008004380043800438004380043
3202048004262000001100001008002716600254001598010024005180000801002400008000048049947999831214938002380042800420324400100200800002400002001600002400008004280042118020110099100100800008000001008000001880015001480014610180051091171180039080000101380000240000801008004380043800438004380043
320204800426211000000034010080027166002540015980100240051800008010024000080000480499480011312155580023800428004203244001002008004024000020016000024000080042800421180201100991001008000080000010080000018800130015800146114180051091171180039180000101080000240000801008004380043800438004380043
320204800426210000000029000080027166005940015180100240051800008010024000080000480499480007311999480023800428004203244001002008000024000020016000024000080042800421180201100991001008000080000010080000018800141013800146114200051091171180039180000101080000240000801008004380043800438004380043
32020480042620000000003000008002716600254001518010024005180000801002400008000048049948001131214938002380042800420324400100200800002400002001600002400008004280042118020110099100100800008000001008000001880018101480014610200051091171180039080000101080000240000801008004380043800438004380043
320204800426200000000019000080027106002540015180100240051800008010024000080000480499480011312149380023800428004203244001002008000024000020016000024000080042800421180201100991001008000080000010080000018800142016800136113180051091171180039080000101080000240000801008004380043800438004380043
32020480042620000000000000080027066002540015980140240051800008010024000080000480499480015311999480023800428004203244001002008000024000020016000024000080118800421180201100991001008000080000010080000018800131014800006016200051091171180039180000131080000240000801008004380043800438004380043
32020480042620000000000000080027166002540015180100240059800008010024000080000480499481632312149380023800428004203244001002008000024000020016000024000080042800421180201100991001008000080000010080037018800140017800130114180051211171180039180000131080000240000801008004380043800438004380043
320204800426210000111034000080027166002540015980100240059800008010024000080000480499480019312149380023800428004203524001002008000024000020016000024000080042800421180201100991001008000080000010080000018800520017800146116200051091170180038180000131380000240000801008004380043800438004380043
320204800426200000000020000080027166002540015980100240000800008010024000080000480499480007312154780023800428004203244001002008000024000020016000024000080042800421180201100991001008000080000010080000018800160014800006114200051091171180039080000131080000240000801008004380043800438004380043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32002580055621001010004301001800271066002540009780010240037800008001024000080000480287480011312076680023800428004203244000102080000240000201600002400008004280042118002110910108000080000110800066358002402137800146125270005019817136800390800009980000240000800108004380043800438004380043
32002480042621100000003100000800271061500254000958001024003880000800102400008000048004948006331209798002380342800420324400010208000024012020160000240000800428004211800211091010800008000001080000025800200005718001061250000501913171313800390800006680000240000800108004380043800438004380043
3200248004262010000000420000080027970002540004880010240084800408001024000080000480049480005312259280023801168004203244000102080000240000201600002400008004280042118002110910108000080000010800065368003800116800316123000050191417138800390800006080000240000800108004380043800438004380119
32002580042620000000003000000800271615002540005880010240038800008001024000080000480049481648312085180023800428004273524000102080000240000201600002400008004280042118002110910108000080000010800066258007202022800326110286005019917136800391800006680000240000800108004380043800438004380043
320024801166200000000020000008002716100025400074800102400728000080010240000800004800494800223120852800238004280042032440001020800002400002016000024000080042800421180021109101080000800000108000651780020000208002260102360050191317138800391800006680000240000800108004380043800438004380043
32002480042620000001001900000800271006002540005380050240015800008001024000080000480049480006312128680023800428004203244000102080000240000201600002400008004280042218002110910108000080000010800006238000501098002061202361150191417136800390800006680000240000800108004380043800438004380043
320024800426210000000050000080027161000254000978001024008780000800102400008000048028748002331223428005180042800420324400010208000024000020160000240000800428004211800211091010800008000001080037035800200002080021511327000501911171313800390800006080000240000800108004380043800438004380043
32002480042620100000102801000800271060025400080800102400758000080010240000800004800494800063122342800238004280042034940001020800002400002016000024000080042800421180021109101080000800001108000003680059010138002261240000501913171313800391800006680000240000800108004380043800438004380043
32002480042620100000002800000800275600025400082800102400388000080010240000800004800494800273122342800238004280042032440001020800002400002016000024000080042800421180022109101080000800000108000002680024000228001061522000501913171312800390800006680000240000800108004380043800438004380043
32002480042620100000002600000800271660025400058800102400708000080010240000800004800494799983122342800238004280042032440001020800002400002016000024000080042800421180021109101080000800000108000002680015000118000561523000501912171313800391800009680000240000800108004380043800438004380043