Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1R (16B)

Test 1: uops

Code:

  ld1r { v0.16b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.004

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.004

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
620052866021321012000003010517027818111603420041004100010001000500011933522630280342856031020001000100010001000284562853911610011000100011003431002002100020121113421995972903345043194493566381311414127795139781211912917100010002805828415279642793228181
620042864721101311101004000481128171101642520031003100010001000500011943222635284262840931020001000100010001000281552842411610011000100011001231002001100120231213403976572173446044200443379381412424227785149031249212900100010002811428660283942810528073
620042817021101010100004010520328330011630620031003100010001000500011946922667282852800031020001000100010001000281022855811610011000100001003231003022100120231113584983470853406035200173478380912383927816140891220813111100010002803628162284912862228182
620042801721201010100004010497228085111599920041004100010001000500011946922693279652810331020001000100010001000281702799611610011000100001003231004022100120231213300980272183405045199823457381213423827793152051304612949100010002820528056286212828228230
620042811321001011100004010473928226011663820041004100010001000500011927722676280812857431020001000100010001000280632852911610011000100001002231003011100120221213798966069913408044199493183382112363427821138701305912807100010002818228587280652808928005
62004282702110101010000301051702796811161382004100310001000100050001194213227102803328224310200010001000100010002812428060116100110001000010033310040221001202212133151003270873433241194833472381811434427877136811291712811100010002846828079280942793528383
6200428401210010111000030004756277571116162200410041000100010005000119451226722810928065310200010001000100010002855328503116100110001000010021310030121001202311135521023373493480039196793279380611403927966140791292414316100010002854428261285782863027933
620042814321101010100004000487028134101658420041003100010001000500011943322692281072860731020001000100010001000284242818611610011000100001002331004022100120231213447935969513346047195633444382018363927828150861230213198100010002855528441285512802928154
620042811120901211100004010494628267011593920051003100010001000500011928422648278562855031020001000100010001000282742847211610011000100001003221003012100120231213682966272673352043196093515381515423427783154991254112931100010002817028097280942846127982
62004281322100111110000400052232783310162292003100410001000100050001192842270028093285893102000100010001000100028187283491161001100010001100332100302110002023121319399437109325603719264345838139393728039139661240512582100010002839528701280672810228063

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1r { v0.16b }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03090e0f1e22233f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
602051400511048211100140036139411129363257010240100200021000030100200001000012640206693734143087011140027014005114005413179303132399601003020010000200006020010000200001400351400511150201100991004010010000100000100100000110000000110000110321021261113955940000101010100001000040100140052140052140052140052140052
60204140056104900010014003613941112936325701024010020000100003010020000100001264020669373414310939014002701400511400511317970313239960100302001000020000602001000020000140051140051115020110099100401001000010000010010000011000501243331000301032101126111395654000010100100001000040100140052140052140052140052140036
6020414005310490000101400361394111293632570102401002000010000301002000010000126402066937341431248601400270140035140051131797031323826010030200100002000060200100002000014005414005211502011009910040100100001000001001000601100030227856100001103210132111395594000001310100001000040100140052140052140036140052140055
602041400581049000131014003913941112934725701024010020002100003010020000100001264020669373414310939114002701400511400351317970313239960100303641000020000602001000020000140054140054115020110099100401001000010000010010000011000000001000101032101126111395594000010100100001000040100140036140052140052140052140036
602041400511049000110140036139411129363257010240100200021000030100200001000012639586693734143109391140011014003514005113179703132399601003020010000200006020010000200001400511400511150201100991004010010000100000100100000110000000010000110321011261113955940000101010100001000040100140036140052140052140036140055
60204140051105000016001400201394111293632570100401002000210000301002000010000126395866929471431093911400110140051140035131793031323826010030200100002000060200100002000014003514005111502011009910040100100001000001001000001100000002071000000032101139111395594000010010100001000040100140052140055140036140055140036
6020414005110490001011401551394111293632570102401002000010000301002000010000126380366937341431093911400300140035140051131839031323896010030200100002000060200100002000014005514003511502011009910040100100001000001001000001100000000100001103210112611139559400000100100001000040100140052140052140036140052140052
60204140035104900010114002013940612934725701004010020002100003010020000100001264020669373414310939114001101400511400511317970313239960100302001000020000602001000020000140054140035115020110099100401001000010000110010000001000001001000011032101171113954640000101010100001000040100140036140054140052140052140052
602041400351049000101140020139411129363257010240100200021000030100200001000012640206692947143109391140030014005114005113179703132399601003020010000200006020010000200001400511400511150201100991004010010000100000100100000110000010010000110321011261113955940000101010100001000040100140052140052140052140052140036
60204140051104900010114003613940612936325701024010020002100003010020000100001264020669294714310939114002701400351400541317970313238260100302001000020000602001000020000140035140051115020110099100401001000010000010010000011000000001000010032101126111395594000010010100001000040100140036140052140052140036140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6002514005810490000000060100140036139398129363257001240010200021000030010200001000012644296693734143261850140011140051140035131915313248360010300201000020000600201000020000140051140051115002110910400101000010000010100000010000010010000101000031401111307813955440000101010100001000040010140052140052140036140052140036
600241400571049111010001010014003613938812947625700144001020004100003001020000100001264488669373414326185114009614005314013813181931324346001030182102122000060020100002000014005114003611500211091040010100001000001010000001000610111000010100003140101130121713955440000101010100001000040010140052140052140052140052140058
6002414005110480000000014010014002013940412936925700124001020004100033001020000100001271524671071714321442114008814005114005113180362132434600103002010213200006002010000200001400511404822150021109104001010000100001101000001100000100100001010000316711113010713955440000101010100001000040010140052140052140052140052140053
60024140051104900100001601001400391394001293472570010400102000210000300102000010000126448866937341432144211400581400511400511318193132424600103002010000200006002010000200001400411400571150021109104001010000100000101000001100000003100001010400314012113012813957040000101010100001000040010140036140097140052140036140052
6002414003510490000000015000014002013939812936425700124001020000100003001020000100001264429669294714325254114004314005114005113189831324346001030020100002000061316100002000014005114003511500211091040010100001000001010000011000004001000010100003140121350121213957040000101010100001000040010140036140052140036140052140036
60024140035104900000011100001400361393981293632570012400102000210000300102000010000126448866937341432618501400581404181400521318193132435600103002010000200006002010000200001400351400511150021109104001010000100000101000001100000003100011010000314010111071213957040000101010100001000040010140052140036140036140052140052
600241400511049000000007010014003613939812936325700124001020000100063001020000100001264488669294714326185014006014003514005113181931324406001030020102162000060020100002000014005714010811500211091040010100001000011010002111000000001000011100003140101110971395704000010010100001000040010140052140052140052140052140052
60024140051105200000000143521001400201393981293632570012400102000210000300102000010000126448866937341432618511400541400511400511318253132434600103002010000200006002010000200001400351400511150021109104001010000100000101000001100000000100001010000314013111014121395704000001010100001000040010140052140036140036140036140052
6002414003510490000000010100140036139398129363257001240010200021000030010200001000012644886693734143261851140069140051140051131803313243460010300201000020000600201000020000140051140035515002110910400101000010000010100032110000000310000101000031401111301061395544000001010100001000040010140036140042140058140052140052
60024140051104900000000100001400201393941293632570012400102000210000300102000010000126448866929471432144211400311400511400511318203132440600103002010000200006002010000200001400511400351150021109104001010000100000101000001100010000100000000000314011113071213986840000101010100001000040010140052140052140052140052140052

Test 3: throughput

Count: 8

Code:

  ld1r { v0.16b }, [x6]
  ld1r { v0.16b }, [x6]
  ld1r { v0.16b }, [x6]
  ld1r { v0.16b }, [x6]
  ld1r { v0.16b }, [x6]
  ld1r { v0.16b }, [x6]
  ld1r { v0.16b }, [x6]
  ld1r { v0.16b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6069interrupt pending (6c)6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602052673920010011100660003267222072025160165100800648000010080000800005001167722188640312672002674326737665903669516010020080000800002008000080000267372671511802011009901001008000080000100800212143800191006080040615843190511021622267341313280000800001002673826738267382673826738
160204267372001100110066010326722077125160165100800628000010080000800005001167371188384912672502673726739665903669516010020080000800002008000080000267372673711802011009901001008000080000100800191943800192026180040615843192511021622267341313080000800001002675626716267382673826738
16020426737200111111002100032670027002516011910080065800001008000080000500116737118866431267320267372673766590366731601002008000080000200800008000026715267151180201100990100100800008000010080020224580061102608000061594319051102162226734130180000800001002673826716267382673826738
16020426737200101010006701032672237012516016410080019800001008000080000500116812518794151267260267432673766590366951601002008000080000200800008000026737267171180201100990100100800008000010080020194380059000608000001584319151102162226713130180000800001002674326718267462674226749
16020426741200121110045488103267222772325160163100800658013010080000800005001167922188783412685502739226724666903669516010020080000800002008000080000267412674111802011009901001008000080000100800201943800190212580039615843190511021622267341313280000800001002675326756267382674926742
160204267552011111100011201032673300921251601651008006580000100800008000050011701791887284126728026721267166659233669816010020080000800002008000080191267452674511802011009901001008000080000100800191943800591002180040611943190511021622267121313180000800001002671626741267162674126738
1602042671520011111000670002267612702025160163100800198000010080000800005001173027187941512674402674126737666303670016010020080000800002008000080000267152673711802011009901001008000080000100800202143800600016380040615843190511021622267341313180000800001002675326748267462674226738
1602042673720011011000670002267242771825160118100800638000010080000800005001167722187829412671802673826737665903669516010020080000800002008000080000267372673711802011009901001008000080000100800192043800591116680000615943190511021622267381313280000800001002674026738267402673826740
16020426737200111010007501032672207702516016510080065800001008000080000500117032318853691267180267372673766590366951601002008000080000200800008000026737267151180201100990100100800008000010080020214380019002648004061584319051102162226734013180000800001002671626716267382673826738
16020426715200111110006700032672237702516016310080019800001008000080000500116829118776411268260267392674566660366951601002008019280000200800008000026737267371180201100990100100800008000010080019200800190012180040615943190511021622267341313280000800001002673826738267162671626738

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f191e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160025267292011011090101267172011925160010108004480000108000080000501174628188334300267262670826708667636712160010208000080000208000080000267282670811800211091010800008000011080000043080039013980039613943050201416101126705014008000080000102673326733267292672926733
1600242670820000110440012671721102516001010800448000010800008000050116908518801700026715267322673266763671216001020800008000020800008000026708267281180021109101080000800000108000004308003900398000060044050208161111267290101008000080000102673326733267292673326709
16002426732200001000001267170101625160010108000080000108000080000501169085188521400267102670826733667336713160010208000080000208000080000267322672811800211091010800008000001080000043080039010800380004405020101611926729001438000080000102673326709267092670926709
160024267082000010000012669320119251600541080000800001080000800005011748871884032002670926732267326677366881600102080000800002080000800002673226728118002110910108000080000010800000430800380138800396104405020816910267290141408000080000102673326709267092670926733
16002426728200001000001267172111925160054108000080000108000080000501174887188595200267212670826732667636688160010208000080000208000080000267082670811800211091010800008000001080000000800380038800386004405020816101026705001438000080000102673326709267362673326733
160024267282000011044001266932111925160010108004480000108000080000501174887188796100266892673226708667636712160010208000080000208000080000267322670811800211091010800008000001080000043080039003880000613944050201216111026725014008000080000102673326733267092670926733
160024267082000010044100267172010251600101080044800001080000800005011686271884032002668926732267086676366881600102080000800002080000800002673226728118002110910108000080000010800000430800380008003801390050201116101126729014008000080000102670926709267332670926733
160024267322000011044001268810111625160054108004480000108000080000501169085188403200267102670826732667536688160010208000080000208000080000267322673011800211091010800008000001080000000800380038800396139430502010161010267250141008000080000102673326709267332673326709
1600242673220000000440012669320119251600541080044800001080000800005011688801885952002671226708267286653367121600102080000800002080000800002673226708118002110910108000080000010800000430800000008003860043050201016910267290101008000080000102680726739267092673326737
1600242673220000100441012671701002516005410800008000010800008000050116888018879610026698267082673266533668816001020800008000020800008000026728267281180021109101080000800000108000004308003800388003861364305020816810267290141408000080000102673326709267092670926733