Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1R (4S)

Test 1: uops

Code:

  ld1r { v0.4s }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.002

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.002

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e2223243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6200528428212510000121005046283591116452200210031000100010005000119281002267128108281203272000100010001000100028058281361161001100010000100001001001000113140921039170143457052199823462381212414127761149881290612830100010002863628110280562809928015
6200427953213200100020005180278740116664200010031000100010005003119224102261627922281093102000100010001000100028004280651161001100010000100021001001001102133221021573203556031198553228381111343227777151091242212795100010002805028520281962825728490
6200428064211000000021005317278880015933200210021000100010005010119225002265928055280583102000100010001000100028488280611161001100010001100021000011001102135151040270723521036197083486382110463527983149351208613733100010002863428046280682862328015
620042808421200000002000530327908111599120021002100010001000501211924210227242808628531310200010001000100010002811028041116100110001000010000100001100111213495106717218352203719943353038168383127807138841211112554100010002853727950284052797927995
6200427994212100000021005293279581116072200010001000100010005005119074082268328364280203102000100010001000100028077279991161001100010000100021001011002100139381038973003473039198933221381315394127873137951233713154100010002861428139279462808228516
6200428032212200101021005295278400116066200010021000100010005000119316082270727963280263102000100010001000100028122280321161001100010000100021002001000122138451025573463522033194603497381610333427747139551219312924100010002846628137284712818227979
620042842821211010103100521628250101599920021002100010001000500611928710226882832528110310200010001000100010002811227948116100110001000010002100100100021214160974172663442038194373423381613413827727137711222912842100010002806527999280132844628393
6200427975213310000020004855278611116300200210021000100010005004119315182268927988280003102000100010001000100027946278721161001100010000100021001011000112141751045473073458037193753557381512343827777137111216312737100010002794527983280182815428526
6200428098213200000020005308279361016308200210021000100010005010119271082261728026280803102000100010001000100027949281091161001100010000100001001041000012141461038872283455039193743212381211293827749139981192912656100010002797828449281962853927916
6200428543212200000101005309279601016416200210021000100010005006119002002269228012280883102000100010001000100028479283311161001100010001100021001001000012141161048671053535037198783520382011413627798156341268014366100010002844028064280862817727929

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1r { v0.4s }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
602051400471049000000010000140035139410129347257010240100200001000030100200001000012638396693778143106330140026140047140047131795313238260100302001000020000602001000020000140035140047115020110099100401001000010000010010000001000000001000010000321021221113955240000009100001000040100140036140036140051140051140051
60204140047104901000004030000140020139410129362257010240100200021000030100200001000012640546692947143106331140026140050140050131793313238260100302001000020000602001000020000140050140047115020110099100401001000010000010010000001000000001000000000321011221113955240000960100001000040100140036140036140036140051140051
6020414004710500000000101001400351394101293622570102401002000210000302502000010000126405466935381431063311400231400501400501317953132402601003084210000200006020010211200001400361404251150201100991004010010000100000100100070110009034121000010100321021271113955740000669100001000040100140051140036140036140048140051
60204140105104900010005940100140037139407129348257010240100200141000030100200001000012641546692947143087011140026140050140047131797611324026010030200100002000060200100002000014043514004711502011009910040100100001000001001000000100000002911000000140321011271113955640000669100001000040100140051140038140051140048140051
60204140048104900000001010014003513941012934725701004010020002100063010020000100001264415669353814310633014032914005014003513179531324026010030200100002000060200100002000014003514004711502011009910040100100001000001001000001100000002731000000100321011221113955540000009100001000040100140058140051140048140036140052
602041400921049001000010000140020139406129347757011640100200021000030100200001000012641806701741143108370140025140050140050131792313239960100302001000020000602001000020426140054140035115020110099100401001000010000010010000011000100031000000000321011271113954640000906100001000040100140051140036140051140402140051
602041401191052000100060100140020139410129347257010240100200001000030100200001000012642986693733143088051140026140050140050131795313238260100302001000020000602001006520000140047140047115020110099100401001000010000010010000011000001001000010000321011221113955540000600100001000040100140051140036140051140051140051
602041401041049000110010100140035139407129500257010040100200021000030100200001000012640456694177143087011140026140035140047131795313240260100302001000020000602001000020000140050140047115020110099100401001000010000010010000011000000001000010000321011271113954640000666100001000040100140051140051140094140056140036
602041400501049000000010000140035139410129362257010040100200021000030662200001000012638036693685143111281140011140035140047131795313240260100302001000020000602001000020000140050140035115020110099100401001000010000010010000011000000091000000100321011271113954640000999100001000040100140054140036140051140036140036
6020414004710480000004520100140035139597129362257010240100200001000030100200001000012640456693634143298971140023140050140412131793313238260100302001000020000602001000020000140035140047115020110099100401001000010000110010004011000004201000010100321011221113955540000069100001000040100140064140050140048140439140051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f181e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60025140057104901001010100140036139398129363257001240010200021000030010200001000012645156693734143261850140027014005114003513180903132440600103002010000200006002010000200001400511400511150021109104001010000100001101000001100000000100001010031405113104513963040010101010100001000040010140405140052140052140052142770
600241401691071000002188102143163139398129363257001240010200001000030010200001000012645246693734143261850140027014005314005113181906132434603023002010000202136002010000200001406321400521150021109104001010000100000101000001100000000100001010031404113003713957040000101010100001000040010140168140263140036140036140052
60024140051104900000000100140036139398129363257001040010200021000030010200001000012644296692947143261850140027014005114005113181903132420600103002010000200006002010000200001400511400511150021109104001010000100000101000001100000000100001000031404111007713957040000101010100001000040010140036140036140052140052140052
60024140051104900000010100140036139398129347257001240010200021000030010200001000012644886693734143261850140027014003514005113181903132434600103002010000200006002010000200001400511400511150021109104001010000100000101000001100000000100001000031403111004413957040000101010100001000040010140052140052140052140052140052
600241400511048000000101001400361393981293632570012400102000210000300102000010000126448866929471432618501400270140051140051131819031324346001030020100002000060020100002000014005114005111500211091040010100001000001010000011000000001000010000314071130061013957040000101010100001000040010140052140052140052140052140036
60024140051104900000010100140036139398129363257001040010200001000030010200001000012644886693734143261850140011014005114003513181903132420600103002010000200006002010000200001400511400511150021109104001010000100000101000111100020011100001111231404113004413957040000000100001000040010140052140052140052140052140052
60024140051104900000025000014002013939812936325700124001020000100003001020000100001264488669373414326185114002701400351400511318190313243560010300201000020000600201000020000140051140035115002110910400101000010000010100000110000000010000101003140411300761395704000010010100001000040010140036140157140052140052140036
60024140051104900000000001140036139398129363257001040010200021000030010200001000012644886692947143214420140011014005114005113181903132434600103002010000200006002010000200001400511400511150021109104001010000100000101000001100000003100001010031404113005513957040000101010100001000040010140052140052140052140052140052
60024140051104900000010100140036139398129363257001240010200001000030010200001000012644296693734143261850140027014005114005113180303132420600103002010000200006002010000200001400511400511150021109104001010000100000101000000100000000100001000031407111005413955440000101010100001000040010140052140036140052140052140052
600241400511049000000100001400201393981293632570012400102000010000300102000010000126448866929471432144201400270140035140051131819031324206001030020100002000060020100002000014005114005111500211091040010100001000011010000011000000120110000101003140411100851395704000001010100001000040010140052140052140052140036140052

Test 3: throughput

Count: 8

Code:

  ld1r { v0.4s }, [x6]
  ld1r { v0.4s }, [x6]
  ld1r { v0.4s }, [x6]
  ld1r { v0.4s }, [x6]
  ld1r { v0.4s }, [x6]
  ld1r { v0.4s }, [x6]
  ld1r { v0.4s }, [x6]
  ld1r { v0.4s }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160205267412001010670022672227720251601641008006380000100800008000050011692951878294026721267372674166613669516010020080000800002008000080000267372673711802011009901001008000080000010080019190800601002180000613943190511031633267341313080000800001002673826738267382673826738
160204267372001010671032672200720251601631008006580000100800008000050011692951878294126699267422671666593669516010020080000800002008000080000267152673711802011009901001008000080000010080019194380058000218000060394319151103163326734013180000800001002673826738267382673826738
160204267372001110671002672227720251601631008001980000100800008000050011692951886401126721267472674466593670416010020080000800002008000080000267372673711802011009901001008000080000010080019194380058001618003901194319251102163326734130180000800001002673826738267162673826738
1602042690520010106710126722077202516011910080019800001008000080000500116929518782941266992674726737665936695160100200800008000020080000800002673726737118020110099010010080000800000100800211944800581012180000605843192511031623267341313180000800001002673826738267382673826738
1602042673720011102110326722077202516016410080065800001008000080000500116845018794151267182674526739665836695160100200800008000020080000800002673726737118020110099010010080000800000100800202043800591006180041613943191511031633267341313180000800001002673826738267382673826738
1602042673720011006600326700007192516016510080065800001008000080000500116988318794151267202674226737666136695160100200800008000020080000800002673726737118020110099010010080000800000100800192144800590006380039615843192511031633267341313080000800001002673826738267382673826716
1602042671520011107500326722377202516016510080063800001008000080000500116772218801901266962674326878666736695160100200800008000020080000800002671526737118020110099010010080000800000100800192044800580006380039015943190511031633267341313080000800001002673926716267382673826741
160204267372001000211032670030719251601651008006680000100800008000050011701791879415126724267412673766703669816010020080000800002008000080000267422673711802011009901001008000080000010080020190800590006180040603943192511031633267381313080000800001002673826738267382673826738
1602042673720011101031032672237719251601651008006580000100800008000050011738841879415126718267212674766593669516010020080000800002008000080000267372671911802011009901001008000080000010080152204380058100618004061604319151103163326734013180000800001002673826738267382673826738
16020426737201111121102267003771925160119100800658000010080000800005001167371188256212672126752267376659366951601002008000080000200800008000026737267371180201100990100100800008000001008002020080058000618000001594319151103163326712130180000800001002673826716267382671726738

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)dbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600252674220111100082000326722077192516007410800658000010800008000050116929518838010267182681326724666003671716001020800008000020800008000026741267371180021109010108000080000010800191943080058100648003961043191050209160782674101308000080000102671626716267162671626742
1600242673720111110088000326722377182516007310800638000010800008000050116772218782030267092671526739668803672216001020801898000020800008000026721267411180021109010108000080000010800192043080019101618003901190191050206160882671201308000080000102673826738267382671626738
1600242673720011100067000026722070125160073108001980000108000080000501168291187716002672726715267376682036695160010208000080000208000080000267372671511800211090101080000800000108001919430800591012280000010431900502051607626712131318000080000102673826738267382673826716
160024267372001000006600012672200719251600751080019800001080000800005011701791885460027247267462674866590366951600102080000800002080000800002673726715118002110901010800008000001080020190318005916060800400158431910502051607626712131308000080000102671626717267162673826738
1600242671520010101021000326722377125160075108006580000108000080000501169295188380112670726737267156682036717160010208000080000208000080000267152671511800211090101080000800000108002119430800580002180039615943190050206160562671201328000080000102673826738267382682026716
160024267372001010006710012672230719251600751080065800001080000800005011695441885456126707267152673766810367171600102080000800002080000800002673726737118002110901010800008000001080019194308005710061800400159431900502071606726712131318000080000102673826738267382673826738
160024267372001100008200002670007720251600751080065800001080000800005011701791883190026726267422673766810366951600102080000800002080000800002673726737118002110901010800008000001080019204308007200121800406159431900502051607726712131308000080000102671626738267382671626716
16002426737200100100660002267220771925160029108001980000108000080000501170179188546002672026720267376681036695160010208000080000208000080000267372671611800211090101080000800000108002019008005810021800396139431900502061606526712131318000080000102671926716267382673826716
16002426715200111100210003267222001251600291080065800001080000800005011701791883190026721267412673766830367171600102080000800002080000800002673726737118002110901010800008000001080020194308006001121800406019431900502051605626734131318000080000102673926738267382671626738
160024267372001101116600022672220720251600291080019800001080000800005011701791883190026822267502671566810367171600102080000800002080000800002673726737118002110901010800008000001080019190080074101608000061394313015020516088267170028000080000102673826716267382673826716