Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3R (post-index, 4S)

Test 1: uops

Code:

  ld3r { v0.4s, v1.4s, v2.4s }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.009

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f2223243a3f43464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)91inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
640052876222311101131000040000479128328010164385012100030031000100030001000500050003569362278428545287833105000100030000200030002859328562116100101000100011002331004002210010231001320894636914315864619951315238131744442819510001538812735138681000300010002868628530286732867328536
640042874122211911161000050000479328256010163525012100030121000100030001000500050003569382284628550286393305000100030000200030002844328655116100101000100001003431002001210022121101337495686993319755019682329938191044472822310001550012785135961000300010002872428696284672866228695
640042837622311511160000040000471228307011164235012100030031000100030001000500050413571852282628573286363105000100030000200030002854228469116100111000100001001231002001210002231101317594256956332354219705318738061245472806110001505412718136271000300010002866828760287342863028603
640042866722311810110000040000484428321001164475012100030121000100030001000500050003573552286428508286523105000100030000200230002850028565116100101000100001002221003001410002121101326295496959312174519600323938151143462817110001531212727136411000300010002861928744286282871728673
6400428605222112111310000270000476828295000164315009100030031000100030001000500050003566182287828617286953105000100030000200030002873728634116100101000100011002131002002110002221101314995226966315864919706320238191641422812210001562412668134661000300010002873128700287472872428652
6400428652222117111210000400004801283600001651750121000300310001000300010005000500035626102288228614286583105000100030000200030002855628610116100101000100001001131002071110010131001342493016986328173919547320738121642482825710001530112686135921000300010002859128571285392853228718
640042874922311411191100010000460628352011164475012100030121000100030001000500050003575202279328514285563105000100030000200030002858828540116100101000100001001201001000210012101101335797157016322575019587322238231347462808410001544412441136111000300010002864328473286222867728660
64004285572221141117100003000047552830200116551501210003012100010003000100050005005357152227982852928638310500010003000020003000285972857311610010100010001100210100400021000013101861359895686972315274919643313438121739472816510001548812505135861000300010002859828705286992871328618
640042874722111510190000040000483728401011164105012100030091000100030001000500050003571292278328459286113105005100030000200030002869528564116100101000100011003301003001210012231001338497196956316694119636318938131649452808510001547612682138431000300010002856328563287142863928650
640042865722211601161000010000473028301111164305012100030031000100030001000500050003573492286828405285583105000100030000200030002868228608116100101000100011002221001001210012231101318897006907318564019587319938081344432827610001554012596134771000300010002858928611286612852528595

Test 2: throughput

Count: 8

Code:

  ld3r { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3r { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3r { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3r { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3r { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3r { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3r { v0.4s, v1.4s, v2.4s }, [x6], x8
  ld3r { v0.4s, v1.4s, v2.4s }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
320205800566200001000340018002716600254001758010624006580004801082400248000848053848005731216820800238004380042061340014020080008240024200160016240024800428004211802011009910010080000800000100800010188001601580001010200111511841644800391800060080000240000801008004380043800438004380043
3202048004262000010001900180027166002540010080100240050800008010024000080000480499480015312149308002380042800420324400100200800002400002001600002400008004280042118020110099100100800008000001008000001880016068180016611620000051115175580039080000131380000240000801008004380043800438004380043
3202048004264301000002200180027106002540014880100240048800008010024000080000480499480019312085208002380042800420324400100200800002400002001600002400008004280042118020110099100100800008000001008000001380013118800136113170000511151755800391800009680000240000801008004380043800438004380043
320204800426210000100180018002716000254001488010024004880000801002400008000048049948001431209130800238004280042032440010020080000240000200160000240000800428004211802011009910010080000800000100800000138001201380013610170000511151745800391800009080000240000801008004380043800438004380043
320204800426200000000000180027066002540014880100240048800008010024000080000480499480006312093108002380042800420324400100200800002400002001600002400008004280042118020110099100100800008000001008000001380012018800130013170000511141753800390800009080000240000801008004380043800438004380043
320204800426200000000180018002716000254001488010024004880000801002400008000048049948000631209090800238004280042032440010020080000240000200160000240000800428004211802011009910010080000800000100800000138001211580013619170000511161765800391800009680000240000801008004380043800438004380043
3202048004262000000001600180027106002540010080100240038800008010024000080000480499480015312085208002380042800420324400100200800002400002001600002400008004280042118020110099100100800008000001008000001780012112800130110170000511151735800391800009980000240000801008004380043800438004380043
32020480042620000000015001800271660025400148801002400488000080100240000800004804994800063120910080023800428004203244001002008000024000020016000024000080042800421180201100991001008000080000010080000013800001128001561000000511141755800390800000980000240000801008004380043800438004380043
320204800426210000000280018002716600254001488010024003880000801002400008000048049947999831199940800238004280042032440010020080000240000200160000240000800428004211802011009910010080000800000100800000138001201280013619170000511151765800390800009080000240000801008004380043800438004380043
320204800426210001000168001800271060325400138801402400488000080100240120800004804994800113131124080023800428004203244001002008000024000020016000024000080042801171180201100991001008000080000110080000008005011380013611002000511152545800391800409980000240000801008004380043800438011880043

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0005

retire uop (01)cycle (02)03l1d tlb fill (05)09l2 tlb miss data (0b)0e0f18191e1f243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd0d2d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200258005562100000002200080027166002540006880010240051800008001024000080000480049480023312006708002380042800420324400010208000024000020160000240000800428004211800211091010800008000011080000018800160015800006114005019003170003380039180000101380000240000800108004380043800438004380043
32002480042620000000022000800271660025400061800102400008000080010240000800004800494800373121898080023800428004203244000102080000240000201600002400008004280042118002110910108000080000010800000188001600158001661142005019003170003380039180000131380000240000800108004380043801158004380043
32002480042620000000022000800271660025400069800102400598000080010240000800004800494800153121493080023800428004203244000102080000240000201600002400008004280042118002110910108000080000010800000188001510080000611400501900317000338003908000001080000240000800108004380043800438004380043
3200248004262000000003200080027166002540006980010240059800008001024000080000480049480265312149308002380042800420324400010208000024000020160000240000800428004211800211091010800008000001080000018800160038000061152005019003170003380039080000131380000240000800108004380043800438004380043
32002480042636000000022010800271660025400069800102400518000080010240000800004800494800153121493080023800428004203244000102080000240000201600002400008004280042118002110910108000080000010800000188001400138000001162005019033170004380039080000131380000240000800108004380043800438004380043
3200248004262000000002100080027066002540006980010240059800008001024000080000480049480021312189808002380042800420324400010208000024000020160000240000800428004211800211091010800008000001080000008000000188001660142005019002170003380039180000331380000240000800108004380043800438004380043
3200248004262000010002100080027160002540006980010240000800008001024000080000480049479998311999408002380042800420324400010208000024000020160000240000800428004211800211091010800008000001080000018800140013800156102005019003170002380039180000101380000240000800108004380043800438004380043
320024800426210000000000080027060002540006880010240059800008001024000080000480049480015312149308002380042800420324400010208000024000020160000240000800428011511800211091010800008000011080000020800001008001461142005019002170003380039180000101380000240000800108004380043800438004380043
3200248004262000000000000803311660025400061800102400518000080010240000800004800494800193121493080023800428004203244000102080000240000201600002400008004280042118002110910108000080000010800000208001500228001661141805019003170002380039180000131380000240000800108004380043800438004380043
320024800426200000000450008002710000254000698001024005880000800102400008000048004948001531215550800238004280042032440001020800002400002016000024000080042800421180021109101080000800000108000002080015103780016610005019004170003380039180000131080000240000800108004380043800438004380043