Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1R (4H)

Test 1: uops

Code:

  ld1r { v0.4h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.003

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.003

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
6200528668214711111104105093282630116303200410031000100010005000119539226622824828273310200010001000100010002828728221116100110001000010042210030022100022311137471006371383211580199003324381911565728031145441312613887100010002845028472285192860828465
620042850021231112100800491628052011635220031004100010001000500011913822702281352831831020001000100010001000282302819911610011000100001002321002011210012121213735995771313430158196283376381821545827973149501298513882100010002839528235285092845328351
620042831921241110100410507328158001646420041003100010001000500011931322688282742852531020001000100010001000282242828211610011000100001001201003002110012131213764100397170335115919780334938084575828041150621274413203100010002834728525285052835628163
620042830421331201111410495128028011632320011001100010001000500011942922678282152830831020001000100010001000280072862711610011000100001003321004002210002231213398989370763212265197313302381114625927974147101256113507100010002849828544283412856628250
6200428371212312121003004966282180116449200110011000100010005000119134226162820928255310200010001000100010002820728454116100110001000110032210020021100121211139191005370993390464198043352380912585628001146871235713679100010002836328449285322834328380
620042833021231211100110505328252101646720031004100010001000500011940622648283162830231020001000100010001000282432825011610011000100001002221002000110012231113547986270173321065197393248380817636628102147641264113464100010002831228582283222830728332
6200428545212412121004105050281011116346200410031000100010005000119402226822818228545310200010001000100010002839128311116100110001000010022210020001100021211138741004970543254161196453150381817626327907152711296313655100010002839928647284502825728310
6200428333212412121004005090282641016362200410011000100010005000119428226262812128561310200010001000100010002828028305116100110001000010032210030011100021312137871013672133425260197263217380912645727897145901255413490100010002837628405282952834628503
62004282942123121310041049932815501165392004100110001000100050001193622268628204282963102000100010001000100028384284261161001100010001100223100200011000222111384299207175341735519739336738219645827974151921304013533100010002843628296282262841828379
6200428320211213121004105144281610116253200310031000100010005000119380227032825228294310200010001000100010002822828326116100110001000010033010011114100021311136511016571373402252196503276381118556627894148411268714022100010002830328262282182834528303

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1r { v0.4h }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
602051400571049000000001010014004413942712936525701024010020002100003010020000100001263803669402214310939114002701400351400541318003132389601003038910000200006020010000200001400511400511150201100991004010010000100001100100021010002001100001111132101139111395684000001010100001000040100140052140052140055140052140036
6020414003510490000000010000140036139406129363257010240100200021000030100200001000012640206693734143109391140030014005414003513179731323996042530200100002000060200100002000014005414005121502011009910040100100001000001001000001100000031000000000321011391113956540000131010100001000040100140042140061140061140042140058
602041403001050110000002000014005013940612936325701024010020000100003010020000100001263803669373414310939114002701400351400541318003132389601003020010000200006020010000200001400541400511150201100991004010010000100000100100023010003001100000111132101139111395504000010010100001000040100140052140055140036140055140055
602041400541049000000001000014004213943012937152701044010020002100003010020000100001263814669371714312591114005101400601400601318063132395601003020010000200006020010000200001400941400831150201100991004010010000100000100100000010000000100000010032101139111395654000010100100001000040100140061140058140061140061140061
602041400411049111100002010014003613940612936525701024010020002100003010020000100001263958669387814312488114003001400541400351318003132399601003020010000200006020010000200001400541400511150201100991004010010000100001100100000110000000100001010032101126111395464000801313100001000040100140055140055140052140036140052
6020414005410490000000040000140039139427129365257010040100200021000030100200001000012639586693878143087011140027014005414005413179731323896010030200100002000060200100002000014005414005111502021009910040100100001000001001000001100000001000010100321011271113955940000131013100001000040100140055140055140036140055140036
6020414005410490000000010100140052139456129371257010440100200041000030100200001000012639856694022143125911140033014006014006013180631323926010030200100002000060200100002000014005714005711502011009910040100100001000001001000001100000001000010100321011271113957040000131013100001000040100140061140061140042140042140061
602041401611049100000001000014002013940612934725701004010020000100003010020000100001264020669373414308701114003001400511400541318003132389601003020010000200006020010000200001400541400511150201100991004010010000100000100100000110001000100000010032101126101395594000010100100001000040100140036140052140036140052140055
6020414005910490000000010010014003613942712936345701024010020002100003010020000100001263958669387814308806114003001400511400541317933132389601003020010000200006020010000200001400511400351150201100991004010010000100000100100000110000000100001010032101139111395594000013130100001000040100140058140042140058140058140061
60204140057104911011000130101140036139427129365257010240100200021000030100200001000012639586692947143109391140027014005414003513180031323996010030200100002000060200100002000014003514005111502011009910040100100001000001001000201100000031000010100321011271113954640000131313100001000040100140052140055140052140052140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
600251401261048101002100114003813940012936425700144001020004100003001020000100001264507669383014326437114006314005314005313182103132436600103002010000200006002010000200001400491400561150021109104001010000100000101000321100020111000011112314061132213957240000666100001000040010140054140054140054140054140054
600241400531049111002000114002613940012936425700144001020004100003001020000100001264507669383014326437114002914005314005313182103132436600103002010000200006002010000200001400531400531150021109104001010000100000101000210100010141000011110314041132213957240000666100001000040010140054140054140054140042140054
6002414004110491010011100114002613946012936425700144001020004100003001020000100001264525669383014326437114002914005314005313182103132436600103002010000200006002010000200001400541400531150021109104001010000100000101000221100020111000001012314041132213956040000666100001000040010140054140054140054140054140054
6002414005310491110021001140038139400129364257001440010200071000030010200001000012643706694070143264371140029140053140041131809031324366001030020100002000060020100002000014005814005311500211091040010100001000001010001211000101101000011110314041132213957240000660100001000040010140042140042140054140054140054
600241400531049101002100114003813940012936425700144001020004100003001020000100001264507669383014326437114001714005314005313182103132436600103002010000200006002010000200001400571400531150021109104001010000100000101000411100010011000001110314041132213957240000600100001000040010140054140054140054140054140054
600241400531049111001100114003813940012936425700144001020004100003001020000100001264507669383014326437114002914005314005313182103132436600103002010000200006002010000200001400561400531150021109104001010000100000101000221100030111000011111314041132213957240000666100001000040010140054140054140054140054140054
600241400531049111002100114003813938812936425700124001020004100003001020000100001264507669383014326437114002914005314005313180903132436600103002010000200006002010000200001400561400531150021109104001010000100000101000331100010111000001111314041132213957240000660100001000040010140054140054140054140042140054
600241400531049111001100114003813940012936425700144001020004100003001020000100001264507669383014326437114001714005314005313182103132436600103002010000200006002010000200001400571400531150021109104001010000100000101000311100020111000011110314041702213957240000606100001000040010140042140042140042140042140054
60024140053104910000134100114003813940012936425700144001020004100003001020000100001264507669383014326437114001714005314005313182103132436600103002010000200006002010000200001400541400531150021109104001010000100000101000220100020111000011110314041132213957240000066100001000040010140054140054140054140054140145
600241400531049101002100114002613940012936425700144001020004100003001020000100001264507669383014326437114001714005314005313182103132436600103002010000200006002010000200001400531400531150021109104001010000100000101000311100021111000011112314041132213957240000066100001000040010140054140054140054140054140054

Test 3: throughput

Count: 8

Code:

  ld1r { v0.4h }, [x6]
  ld1r { v0.4h }, [x6]
  ld1r { v0.4h }, [x6]
  ld1r { v0.4h }, [x6]
  ld1r { v0.4h }, [x6]
  ld1r { v0.4h }, [x6]
  ld1r { v0.4h }, [x6]
  ld1r { v0.4h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16020526739200100100780100267132121216251601451008004580000100800008000050011731831887343126709267082673767610367071601002008000080000200800008000026737267371180201100991001008000080000010080019194380058012618004061194319051101161126734130180000800001002671626738267162673826729
160204267282000000006000026713200025160145100800458000010080000800005001174628188733402670926708267376732036672160100200800008000020080000800002672826728118020110099100100800008000001008000004380039000398003961394300511011611267051010080000800001002672926729267092672926729
1602042672820000000163010126713012121625160100100800008000010080000800005001168754188357312670926728267376758036695160100200800008000020080000800002672826728118020110099100100800008000001008000004380000000398003961394300511011611267251010080000800001002672926729267292672926729
160204267282000000006301002671301212025160145100800458000010080000800005001173183188527102670926728267376766036694160100200800008000020080000800002672826728118020110099100100800008000001008026004380000000398003961394300511011611267051010080000800001002672926729267292670926729
16020426728200000100297010126713212121625160145100800458000010080000800005001168754188733402670926728267376750036692160100200800008000020080000800002672826708118020110099100100800008000001008000004380000000398003960394300511011611267251010080000800001002672926729267092670926729
16020426728200000000630100267130000251601001008004580000100800008000050011746281887387026709267282673767500366951601002008000080000200800008000026728267281180201100991001008000080000010080000008000000039800396139000511011611267251010080000800001002672926729267092672926709
1602042672820000000187000026713201202516014510080045800001008000080000500117462818840111266892672826737675003669616010020080000800002008000080000267282672811802011009910010080000800000100801300080039000398003961394300511011611267341313080000800001002673826716267382673826716
16020426737200101000258010326722277192516016510080065800001008000080000500116929518794150267182671526737665903670016010020080000800002008000080000267372673711802011009910010080000800000100800192043800592016080039615943191511011611267341313280000800001002673826738267382673826745
160204267372011111006901012671321212162516014510080045800001008000080000500117462818873341267092672826715675203679316010020080000800002008000080000267082672811802011009910010080000800000100800000438003900039800396039430051101161226708010080000800001002672926729267292672926729
1602042672820000000048001012671301212162516014510080045800001008000080000500117318318873401267092672826737675803669516010020080000800002008000080000267082672811802011009910010080000800000100800000438003900039800390104300511011611267251010080000800001002672926729267292672926709

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
16002526743200110200144310410426718018183251600731080063800001080000800005011730931885113126714267332673366770367221600102080000800002080000800002673326737118002110910108000080000010800212042800561016280000615642190502014163326734998000080000102673426738267202673426734
16002426733200111101064010226725218181625160072108006380000108000080000501169681188267702671926734269846680036713160010208000080000208000080000267332673311800211091010800008000001080020194180057111598003861574219150203163726730998000080000102673826734267382673826734
16002426733200110100077000226718218181525160073108006380000108000080000501167999188380112671826733267336677036761160010208000080000208000080000267332673311800211091010800008000001080022194280057200628003761574219150203163326730998000080000102673426734267162673426716
16002426733200100100065010326718218181525160073108006180000108000080000501175054188364502671426733267336677036773160010208000080000208000080189268962673311800211091010800008000001080020204280057110628003861574219250203164326730998000080000102673426716267342673426734
1600242673320010110001280103267182181815251600731080061800001080000800005011715701887298026726267332673367219736722160376208000080000208000080000267332673311800211091010800008000001080020190800190029558003800574219250203163326730998000080000102673426716267342673426734
16002426715200101100083010326700218181625160071108006280000108000080000501167530187790902671426733267336722036713160010208000080000208000080000267332673311800211091010800008000001080020194280056100598003801584219250203167326730998000080000102673426734267342673426734
16002426733200100000080010326718218181525160029108006180000108000080000501167530187790912671426733267336735036713160010208000080000208000080000267332673311800211091010800008000001080021214280058101248000061584219150203163326712908000080000102671626734267342673426734
16002426733200100100071010126700218181625160073108006380000108000080000501171570188267702671426733267336768036708160010208000080000208000080000267332673311800211091010800008000001080020194280057001218003861574219250203163326730998000080000102673426734267372673426734
160024267332001111000287010326718218181625160071108006280000108000080000501164410187754402671426733267336771036717160010208000080000208000080000267332673311800211091010800008000001080021194280058100588003961574219150203163326730098000080000102673426734267342673426734
1600242673320010100108301042671821818162516007110800628000010800008000050116753018758480267172673726938673703671316001020800008000020800008000026733267331180021109101080000800000108001919428005700259800386157019050203163326730998000080000102673426734269222678126777