Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (single, S)

Test 1: uops

Code:

  ld1 { v0.s }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.001

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.001

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6200528619214120100001104923282530016166200110011000100010005000119173227202793328549310200010001000100020002843928015116100110001000110002100000110002000135061031272353529850196183418381920444327889138171316013110100010002844228110281172825528157
62004280362141401301021052722828111161302001100110001000100050001192592269127934284673102000100010001000200028154285551161001100010000100021001000100021201333210367729735051046193283433381213463927849152111229812938100010002822828308280472808028545
62004284402111401000020049112820100161252001100110001000100050001190422266528009280993102000100010001000200028265281151161001100010000100021000000100020201379910459733435241047199913434382220414327855137681237813103100010002802428056280332808628478
620042809521217015001210501827940001605920011001100010001000500011938122705279752803431020001000100010002000283652812911610011000100001000210000031001202013922985772223252648199333561381114494927992139051222714453100010002810428530281732821628219
6200428082214230140002005362278560015982200010011000100010005000119265227102835728267310200010001000100020002840028689116100110001000010002100100310000020141911057772703254743200313423381112414227907138161217313355100010002799528536284002859128038
6200428015209140170002005221278611116209200110011000100010005000119241122712280342840231020001000100010002000281542808911610011000100001000210010041001202014028983672773270438198703473381915433927996139351245312823100010002851228155283182818528011
6200428483212140140006105273277980016469200010011000100010005000119255226782798928117310200010001000100020002812428043116100110001000010000100000210001110141039828730134981039193893495382214363827803147151198313793100010002803328135281542803328022
6200428105211110150002105327278460016029200210021000100010005000119316227322787327960310200010011000100020002799428114216100110001000010002100000110011120140841064772993529736194273482381514444527753141881223212742100010002816828000279952802528030
6200428094210100110002005344278930016062200010011000100010005000119298227142823228104310200010001000100020002844227996116100110001000010002100110010002020140591047771563523544195413433381214444527838141791277814412100010002807528325284602810328079
6200428048211170130002005318279660016223200110001000100010005000119288227052836128043310200010001000100020002790728105116100110001000010002100000010002010135261040972313541636195313435381415434227812141461215312807100010002806928010281412808228079

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.s }[1], [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f18191e1f2223243f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60205140051104900000101001400361394111293632570102401002000210000301002000010000126380366937341431093911400300140051140051131797313240560100302001000020000602001000030000140051140051115020110099100401001000010000010010000010000001000011032101126111395594000010100100001000040100140052140036140052140052140052
602041400511049000001010014003913941112936325701024010020002100003010020000100001264020669373414310939114001101400511400511317973132403601003020010000200006020010000300001400351400511150201100991004010010000100000100100001100000010000110321011261113955940000101010100001000040100140052140052140052140036140052
60204140035104901100001001400201394111293632570102401002000210000301002000010000126402066937341430870111400273140051140051131797313239960100302001000020000602001000030000140051140051115020110099100401001000010000010010000010000001000011032101171113955440000101010100001000040100140052140052140052140052140036
602041400511049000009010014002013941112936325701024010020002100003010020000100001264020669373414310939114002701400511400511317973132399601003020010000200006020010000300001400511400511150201100991004010010000100000100100001100000010000010321011271113955940000101010100001000040100140052140052140102140052140052
6020414003510490000040010014003613941112936325701004010020002100003010020000100001263803669373414310939114002701400511400511317973132580601003020010000200006051810159300001400511400511150201100991004010010000100000100100000100000010000110321011261113955940000101010100001000040100140052140052140052140052140036
602041400351049000001010014003613941112934725701024010020002100003010020000100001264020669373414310939114002701400511400511317973132399601003020010000200006020010000300001400511400351150201100991004010010000100000100100001100000010000110321011262113955940000101010100001000040100140517140139140152140052140052
6020414024810521000008810014003613941112936325701024010020002100003010020000100001264020669373414310939114002701400511400511317973132400601003020010000200006020010000300001400511400351150201100991004010010000100000100100001100000010000110321011261113955940000101010100001000040100140052140052140052140052140036
60204140051104901000101001400361394111293632570100401002000210000301002000010000126380366937341431093911400270140051140051131797313239960100302001000020000602001000030000140051140102115020110099100401001000010000010010000110000001000011032101126111395594000001010100001000040100140052140052140052140052140052
60204140051104900000101001400361394111293632570102401002000210000301002000010000126402066937341431093911400270140051140051131793313242360100302001000020000602001000030000140051140051115020110099100401001000010000010010000110000001000001032101126111395464000001010100001000040100140052140052140052140052140052
60204140051104900000101001400361394111293632570102401002000010000301002000010000126402066937341431093901400270140051140051131797313256060100302001000020000602001000030000140051140051115020110099100401001000010000010010002110000001000010032101126111395594000010100100001000040100140092140056140038140052140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6002514004710480001000101014003213939412935925700124001020002100003001020000100001264443669353814325829014002301400471400481318150313243060010300201000020000600201000030000140047140047115002110910400101000010000010100000110000000100001100314041132213956640000066100001000040010140048140048140048140048140048
6002414004710490000000100014003213939412935925700124003520002100003001020000100001264443669353814325829014031601400471400471318150313243060010306631000020000600201000030000140047140047115002110910400101000010000110100000110000120100001100314021132213956640000666100001000040010140048140048140048140048140048
6002414004710490001120101014003213939412935925700124001020002100053001020000100001264443669353814325829014002301400471400471318150313252160010300201000020000600201021530000140047140047115002110910400101000010000010100000110000000100001100314021112213956740000669100001000040010140051140337140051140048140051
6002414005010490001100101014003213939412935925700104001020002100003001020000100001264452669353814339627014002301400471400471318150313259860010303421010720428600201000030000140048140047115002110910400101000010000010100000110000220100001100314021132213956640000666100001000040010140048140048140048140036140051
6002414004710490000001101014003213939412940525700124001020002100003001020000100001264443669353814325829014002301400471400471318150313243060010300201000020000600201000030161140047140047115002110910400101000010000010100000110000000100001100316821132213956640000669100001000040010140048140048140048140051140048
6002414005410480001000101014003213939412934725700104001020002100003001020000100001264477669353814325829014001101400351400471318180313243060010300201000020000600201005430000140047140047115002110910400101000010000110100000110000100100001100314021132213962340000660100001000040010140048140048140048140048140048
6002414004710490001000101014003213939412935925700124001020002100003001020105100001264477669353814325829014002301400471400481318150313243060010300201000020000600201000030000140047140047115002110910400101000010000110100000010000000100001100314021132213961940000666100001000040010140048140048140048140048140048
60024140050104900011001301014003213939412935925700124001020002100003001020108100001264443669353814325829014002301400471400471318150313243060010300201000020000600201000030000140047140047115002110910400101000010000010100000110000100100001100314021132213956640000666100001000040010140048140036140048140051140048
6002414004710490000100001014003213939512936025700124001020002100003001020000100001264443669294714325829014002401400351400471318150313242060010300201000020000603441000030000140047140047115002110910400101000010000010100000110000000100001100314021132213956640000666100001000040010140036140051140048140048140048
6002414004710490100100100014003213939412935925700124001020002100003001020000100001264443669294714325829014002301400471400351318150313243060010300201000020000600201000030000140048140047115002110910400101000010000110100000110000000100000100314021132213956940000066100001000040010140048140048140051140036140048

Test 3: throughput

Count: 8

Code:

  ld1 { v0.s }[1], [x6]
  ld1 { v0.s }[1], [x6]
  ld1 { v0.s }[1], [x6]
  ld1 { v0.s }[1], [x6]
  ld1 { v0.s }[1], [x6]
  ld1 { v0.s }[1], [x6]
  ld1 { v0.s }[1], [x6]
  ld1 { v0.s }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)03l1d tlb fill (05)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602051600561199010200057010116004121212159684162516010210080002800001008000080000500421998229395591160037160056160056139691314001416010020080000800002008000016000016005616005611802011009910010080000800000100800000430800390039800396139430005110216221600530101048000080000100160057160057160057160057160057
1602041600561198000000045010116004121212159684162516010210080002800001008000080000500421998229395471160037160056160056139691314001416010020080000800002008000016000016005616005611802011009910010080000800000100801300430800390039800396139430005110216221600530101048000080000100160057160057160057160057160057
1602041600561200001000150401011600412129159684162516010210080002800001008000080000500421017229395471160037160056160056139691314001416010020080000800002008000016000016005616005611802011009910010080000800001100800000430800391039800396139430005110216221601332101048000080000100160426160181160289160120160057
16020416005611980000000450101160041212121596901625160100100800028000010080000800005004219982294108811600371600561600561396913140014160100200800008000020080000160000160056160056118020110099100100800008000001008000004329880039004280169613943000511021622160053001048000080000100160057160057160057160057160057
1602041600561199000000045010116004121212159684162516010210080002800001008000080000500422041229395751160037160056160056139691314001416010020080000800002008000016000016005616005611802011009910010080000800000100800000430800392046800396139430005110216221600530101048000080000100160057160057160057160057160057
1602041600561199000000045010116004121212159684162516010210080002800001008000080000500422028229395471160037160056160056139691314001416010020080000800002008000016000016005616005611802011009910010080000800000100800000430800390039800396139430005110216221600370101048000080000100160057160057160057160057160041
1602041600561198000000045000116004121212159684162516010210080002800001008000080000511422041229395751160037160056160056139691181400141601002008000080000200800001600001600561600561180201100991001008000080000010080000043080039003980000613943000511021622160053001008000080000100160057160041160057160041160057
1602041600561199000000045010116002521212159684162516010210080002800001008000080000500422028229395591160037160056160056139691314001416010020080000800002008000016000016005616005611802011009910010080000800000100800000430800390039800396139430005110216221600530101048000080000100160057160057160041160057160057
16020416005611990000000450000160025212121596841625160102100800028000010080000800005004220282293954711600371600561600561396911714001416010020080000800002008000016000016004016005611802011009910010080000800001100800000430800390039800396139430015110216221600530101048000080000100160057160057160057160057160057
1602041600561198000010045010116004121212159684162516010210080002800001008015080000500422041229395470160021160056160056139691314001416010020080000800002008000016000016005616005611802011009910010080000800000100800000430800391039800396139430005110216221600530101048000080000100160057160057160057160057160057

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f181e1f22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160025160056119901000004401011600412112159690162516001210800028000010800008000050422394229395470160037160056160060139713314003616001020800008000020800001600001600561600561180021109101080000800001108000043080039040800396139430050200181681116005310104800008000010160057160057160057160041160057
160024160060119901001004500011600412121215968419251600121080002800001080000800005042284122939575116003716005616005613971331400361600102080000800002080000160000160056160056118002110910108000080000010800004308003903880039613943005020061611816005310104800008000010160061160057160057160057160057
16002416005611980000000450001160045211215968419251600121080002800001080000800005042241922946559116003716004016005613971331400361600102080000800002080000160000160056160056118002110910108000080000010800000080039242800396139430050200111611816005310104800008000010160041160057160057160057160061
16002416005611990100000450001160534212015968416251600121080002800001080000800005042259922939575016003716005616006013971331400361600102080000800002080000160000160040160056118002110910108000080000010800004308003803880039610430050200141610616005710100800008000010160057160057160057160057160057
160024160040119901000005700001600452121215968416251600121080002800001080000800005042211322939575016003716006016005613971331400361600102080000800002080000160000160040160060118002110910108000080000010800004308003903980039613943005020091691016005310104800008000010160061160057160057160057160041
1600241600561199010000044010116004121211596841625160012108000280000108000080000504225092293954701600371600561600561397133140036160010208000080000208000016000016005616005611800211091010800008000001080000008003903980039613943205020091681116013510104800008000010160057160057160246160057160119
160024160056119801000004568600116002521121594781625160012108000280000108000080000504224192293954701600371600561600561397173140036160010208000080000208000016000016005616005611800211091010800008000001080000430800390398003961394300502009169916005314144800008000010160057160057160061160057160057
1600241600561199010000045000116004121211596841925160012108000280000108000080000504221182294829701600371600561600561397133140036160010208000080000208000016000016005616005611800211091010800008000001080000430800380398004060394300502008168111600531004800008000010160057160057160057160057160061
160024160056119800000004500011600412121215968419251600121080002800001080000800005042261922939559116003716004016005613971331400401600102080000800002080000160000160056160056118002110910108000080000010800004308003803980039613843015020312167916005314144800008000010160057160057160057160057160061
16002416005611990000000600101160041212121596841625160012108000280000108000080000504223842293954711600371600561600561397173140036160010208000080000208000016000016005616005611800211091010800008000001080000430800380398003961394300504108169916003714104800008000010160107160057160061160057160057