Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (post-index, 2S)

Test 1: uops

Code:

  ld3r { v0.2s, v1.2s, v2.2s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.006

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.006

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)dfe0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
64005294232373201101000130104615288130001725450061000300910001000300010005000500035691011002281629168295163105000100030002000300029254293301161001100010000100003100000010002030013238934469923197058203483283382216555802861310001607913187144751000300010002934129428295222942629302
640042939923601000000002000465528850000172575006100030001000100030001000500050003568200002287729204294103105000100030002000300029337292021161001100010000100000100000010002000013468940369153137059205353226381113596602859010001608313335145501000300010002948829386292982954329458
640042941123501000000002010482828945100172045009100030061000100030001000500050003562603002283729233293993105000100030002000300029411292631161001100010000100002100100310002020013106942469613118057204433351381416625102877310001613713383144661000300010002939629344293642948729523
640042957223611000000003010461829002000173075006100030001000100030001000500050003569503002279929217294393105000100030002000300029348293041161001100010001100002100020610002020013312931769403145059204903348381522605602860210001632013515143751000300010002942729447294712944129425
640042935323600001000002000467228938000172925000100030001000100030001000500050003560503002278929185294973105000100030002000300029197293601161001100010000100002100000010010000013112925569423121065206293243381417585902869710001602713435144991000300010002953329524296432950029441
6400429398236000010000011000470828916000169985006100030001000100030001000500050023569505002280129335296283105000100030002000300029290292581161001100010001100002100000010002000013217925869823171056203583244381517635202864910001618513456143851000300010002937329370293682936629440
640042949723601001000009000467128901000171585006100030061000100030001000500050003570606002286729202294163105000100030002000300029335293131161001100010000100000100000010002000013337927869773160169204763249381513545502872810001632113188144861000300010002946229477293982939929545
640042949023700001000003000455828859000173475006100030061000100030001000500050003568804002282529217295036105000100030002000300029152292451161001100010001100000100010110000000013320927969763190055203423206381415596402857310001629313395146031000300010002927529466294282942929427
640042950923700001000000000476128994000172095006100030061000100030001000500050003560600002276129191293913105000100030002000300029425291411161001100010000100002100000010002020013195947069243261058204213294381410615802865010001607813457145171000300010002941229457295252933929481
640042939423700000000002000476428990000171625006100030061000100030001000500050003568704002282929231294763105000100030002000300029277292701161001100010000100302100000010002000013092939969263160055203563260381816565602864510001620513435143761000300010002944029486290602946529391

Test 2: throughput

Count: 8

Code:

  ld3r { v0.2s, v1.2s, v2.2s }, [x6], x8
  ld3r { v0.2s, v1.2s, v2.2s }, [x6], x8
  ld3r { v0.2s, v1.2s, v2.2s }, [x6], x8
  ld3r { v0.2s, v1.2s, v2.2s }, [x6], x8
  ld3r { v0.2s, v1.2s, v2.2s }, [x6], x8
  ld3r { v0.2s, v1.2s, v2.2s }, [x6], x8
  ld3r { v0.2s, v1.2s, v2.2s }, [x6], x8
  ld3r { v0.2s, v1.2s, v2.2s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)d9ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3202058006962111101110003100018009716600254001638010024007580000801002400008000048049948003131328070800238004280042032440010020080000240000200160000240000800428004211802011009910010080000800000100800066228002600025800186125235005111525253800391800000080000240000801008004380043800438004380043
32020480042620110111010016200018002716003254001648010024007580000801002400008000048049948003131225990800238004280042032440010020080000240000200160000240120800428004211802011009910010080000800000100800066228002400025800196124226005111517154801021800009980000240000801008004380043800438011880043
3202048004262111001100002900028002716600254001758010024006480000801002400008000048049948164731328050800238004280042032440010020080000240000200160000240000800428004211802011009910010080000800000100800066238002400127800186125235005111317255800391800009980000240000801008004380043800438004380043
3202048004262111111110002900018010316600254001758010024006480040801002400008000048049948003431225360800238004280042032440010020080000240000200160000240000800428004211802011009910010080000800000100800056238002500128800186125235005111517153800391800009980000240000801008004380043800438004380043
320204800426201120100000161000180027106002540016480100240063800008010024000080000480499479998312253608002380042800420324400100200800002400002001600002400008004280042118020110099100100800008000001008000562380024000248001861252350051115171558003918000091080000240000801008004380043800438004380043
32020480042622111010110030000180027166002540016480100240075800008010024000080000480737480023312253608002380042800420324400100200800002400002001600002400008004280042118020110099100100800008000001008000562380007000248001801232360151114172358003908000010980000240000801008004380043800438004380043
320204800426201101100000500028002716600484001638010024008080000801002400008000048049948002331225360800238004280042032440010020080000240120200160000240000800428004211802011009910010080000800000100800067228002501127800206125235105111317154800391800000980000240000801008004380043800438004380043
32020480042621110010000016300018002716611025400163801002400758000080100240000800004804994800233122536080023800428004201024400100200800002401202001600002400008004280042118020110099100100800008000001008000762280024030584800196123236005111517155800390800009980000240000801008004380043800438004380043
3202048004262011001100002900018002716600254001698014024018980000801002400008000048049948002331225360800238004280042032440010020080000240000200160000240120800428004221802011009910010080000800000100800076228002400026800006124226005111417134800391800009980000240000801008004380043800438004380118
32020480042620111010000016300018002716600254001648010024001880000801002400008000048049948002331225910800238004280042035240010020080000240000200160000240000800428004211802011009910010080000800000100800076238002701125800200127226005111517155800391800009980000240000801008004380043800438004380118

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32002580056620110110040000080027166002540007380010240064800008001024000080000480049480073312274880078080042800420324400010208000024000020160000240000800428004211800211091010800008000001080007522800250002680019612422600501911711800391800009980000240000800108004380043800438004380043
320024800426201011100410001800271660025400085800102400648000080010240000800004800494800323122536800230800428004203244000102080000240000201600002400008004280042118002110910108000080000010800067238002400058580018612422601501911711801240800009080000240000800108004380043800438004380043
3200248004262010011003100028002716600254000738001024006380000800102400008000048028748002331225368002308004280042032440001020800002400002016000024000080042800421180021109101080000800000108000660800230002780018612523500501911711800391800009980000240000800108004380043800438004380043
32002480042620110110016200018002716611025400073800102400758000080010240000800004800494800463120140800230800428004203244000102080000240000201600002400008004280042118002110910108000080000010800055228002400123800186125235105019117118003918000010980000240000800108004380043800438004380043
32002480042620111110038010180027100002540007480010240018800008001024000080000480049480034312248180023080042800420350400010208000024000020160000240000800428004211800211091010800008000001080007523800250004280000002422600501911711800391800000980000240000800108004380043800438004380043
32002480042620100110029000180027160002540007480010240132800008001024000080000480049480023312253680023080117800420324400010208000024000020160000240000800428004211800211091010800008000001080006623800250012580000602422600501911711800390800009980000240000800108004380043800438011680043
3200248004262011011006000080027000002540008580010240080800008001024000080000480049480023312253680023080042800420324400010208000024000020160000240000800428004211800211091010800008000001080006522800250102580018612523510501911711800391800009980000240000800108004380043800438004380043
32002480042621101110029000180027166002540008580010240069800008001024000080000480049480034312259280023080042800420324400010208000024000020160000240000800428004211800211091010800008000001080007624800270002680018612323610501911711800391800009980000240000800108004380043800438004380043
3200248004262110010002900018002716600254000748001024006480000800102400008000048004948004231225368002308004280042032440001020800002400002016000024000080042800421180021109101080000800000108000560800230002480018612523510501911711800391800009980000240000800108004380043800438004380043
32002480042621100110018000180027166002540007480010240063800008001024000080000480049480006312030180023080042800420324400010208000024000020160000240000800428004211800211091010800008000001080005523800250002381073612306105019117118003918000016980000240000800108004380043800438004380043