Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1R (8H)

Test 1: uops

Code:

  ld1r { v0.8h }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.002

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.002

retire uop (01)cycle (02)03mmu table walk instruction (07)09l2 tlb miss instruction (0a)0e0f1e223a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
62005293212191211400111462428828011723420021002100010001000500011929322630290462923831020001000100010001000291652911211610011000100001000210000010022231282590626832303654120717305338159263128345163991372615257100010002926129279293052930729237
62004292582191501300260469128806001724020021002100010001000500011934522618291412935531020001000100010001000292242917511610011000100001000210000210002031279791316890307463020601307038139374128344162591394514940100010002924929174292182934029329
620042923521911090021455028881001728720021004100010001000500011941222616290762924831020001000100010001000291582912811610011000100001000210000010002031281991366859311642820617308338159262728388164351411515068100010002926729294293952931129345
6200429226219100110031452528879001725520011002100010001000500011926122590290062925231020001000100010001000291542923011610011000100001000210000010002021303691896898313973320687304638135243128409163761382515037100010002922929261293172922529322
6200429260220130130001454628788001727920021001100010001000500011938222601291042931631020001000100010001000290822917311610011000100001000210000010002021290593316855305562620623304038135292728471161431388415091100010002915629223293442931429313
6200429269219801400181455928812001725620021003100010001000500011919022594291242929431020001000100010001000291672913211610011000100001000210000010001031294492086838307643020623306538209302928474165951406915087100010002932529299293652932129248
62004293242191201100271458228882001734920031002100010001000500011938322649290152930531020001000100010001000291292910511610011000100001000210000010002021282991886955308363220642315338167352628389164281385514940100010002924029277292952930429326
6200429267219130150020458728807001730320021002100010001000500011915222610291092933631020001000100010001000291572912711610011000100001000210000010002021297992696928310142220635308938205273428374162251395114892100010002929429267292672922829297
6200429270219901100214534287810017206200210021000100010005000119412225942908529333310200010001000100010002911029159116100110001000010002100000100020212871919468633053527206823095381413323128479163741411115133100010002932629264293352935529284
62004292592191101400204550287140017285200110021000100010005000119360226802903029178310200010001000100010002919629170116100110001000010002100000100020212939945269213013533206313041382010312928371163381394315190100010002930129327293002930129270

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1r { v0.8h }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
602051400591049000000101001400361394061293632570102401002000210000301002000010000126402066937341431093911400111400511400881317973132399601003020010000200006020010000200001400511400351150201100991004010010000100001100100000110000000100001100032101126111395594000010100100001000040100140052140052140052140036140036
6020414005110490000002200001400381394111293632570102401002000010000301002012310000126402066937341431093911400301400731400421317983132382601003020010053200006020010000200001401661405591150201100991004010010000100000100100000110000003100000000032101126111395594000001013100001000040100140052140052140052140052140052
6020414005110490000001010014003613940612936325701004010020002100003010020000100001264020669294714308701114002714005114005113179731323826010030200100002000060200100002000014005114005111502011009910040100100001000001001000001100000001000011000321011261113956540000101013100001000040100140052140055140052140052140052
6020414005110490000001000014003613941112936325701024010020002100003010020000100001264020669373414310939114001114003514005113179731323826010030200100002000060200100002000014005114003511502011009910040100100001000001001000001100000001000001000321011391113955940000131010100001000040100140036140036140036140055140055
6020414005110490000001000014002013941112936525701004010020002100003010020000100001264020669387814310939114002714005114003513179731323996010030200100002000060200100002000014005114005111502011009910040100100001000001001000001100000001000011000321011271113955940000101310100001000040100140036140052140052140036140036
602041400511049000000001001400361394061293472570100401002000210000301002000010000126402066937341430870111400301400521400511317973132399601003020010000200006020010000200001400511400511150201100991004010010000100000100100000110000000100001101032101127111395594000001010100001000040100140052140036140052140052140036
6020414005110480000000000014002013940612934725701024010020002100003010020000100001264020669373414310939114003014007714005113179334132460601003020010000200006020010000200001400511400511150201100991004010010000100000100100000110000000100001100032101126111395654000001010100001000040100140055140052140052140052140055
6020514005110490000009101001400201394271293632570102401002000010000301002000010000126380366937341431093901400271400351400511317973132399601003020010000200006020010000200001400511400511150201100991004010010000100000100100000110000000100001100032101126111395594000010010100001000040100140036140052140052140036140055
602041400541049000000101001400201394111293634470102401002000210000301002000010000126395866929471431093911400271400521400511317973132382601003020010000200006020010000200001400511400511150201100991004010010000100000100100000010000003100001100032101127111395654000010010100001000040100140036140055140052140036140052
6020414005110490000000000014008413942912936325701024010020002100003010020000100001263958669373414308701114001114005114005113180031323826010030200100002000060200100002000014005114005111502011009910040100100001000001001000000100000001000011000321011261113954640000101010100001000040100140052140052140036140052140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0055

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e1f22233f43494d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
600251400561050010002000140028111393901293662570014400102000410000300102000010000126499366953181432661211400310140055140055131811313242660010300201000020000600201000020000140443140057115002210910400101000010000010100000210001011000111100314051132313957440000606100001000040010140056140044140056140044140056
6002414005510490000010101400401013940212936611770014400102000410000300102000010000126495766946941432671711400190140055140055131825313244161185300201000020000600201000020000140055140043115002110910400101000010000010100000210008011000111000314021133413957440000666100001000040010140044140056140056140056140057
600241400551049000001010140063111394021293552570014400102000210000300102000010000127423066939261432715711400190140055140043131823313243860010300201000020000600201000020000140055140055115002110910400101000010000010100000210001011000111100314021132313957440000006100001000040010140428140044140056140426140056
600241400551049000003817610140040111394021293662570014400102000410000300102000010000126499366948301432681911400210140057140055131823313243860010300201000020000600201000020000140043140055115002110910400101000010000010100000210001511000111100314041134313957440000606100001000040010140044140056140056140056140044
600241400551049001002010140040111394021293552570012400102000210000300102000010000126453466940701432661211400310140055140055131823313242660010300201000020000600201000020000140043140043115002110910400101000010000010100000010001011000111000314041133313957440000066100001000040010140056140056140060140056140056
600241400551049010001010140028111394021293662570014400102000410000300102000010000126457666939741432539611400310140043140055131811313242660010300201000020000600201000020000140055140055115002110910400101000010000010100000210007211000101100314031132313957440000606100001000040010140044140056140044140044140044
600241400431049000002000140040101394021293662570014400102000410000300102000010000126493966939181432661211400190140055140055131811313242660010300201000020000600201027520000140048140056115002110910400101000010000010100000010001411000111020314021133413957440000660100001000040010140432140044140056140056140044
600241400581049000112000140040111393901293662570014400102000410000300102000010000126675867021451433142011400310140055140055131823313243860010300201000020000600201000020000140043140047115002110910400101000010000110100000010001011000111100314031133213957440000606100001000040010140056140056140044140044140044
600241400561050000002010140028101394021293552570012400102000410000300102000010000126493666949821432702501400310140043140043131823313243860010300201000020000600201000020000140055140043115002110910400101000010000010100000210001011000101100314031134213963140000660100001000040010140044140056140044140056140044
600241400431050000001010140040101394021293662570014400102000410000300102000010000126496666943581432681611400310140055140055131823313242660010300201000020000600201000020000140055140055115002110910400101000010000110100040210001011000111100314031133213957440000660100001000040010140044140056140056140056140056

Test 3: throughput

Count: 8

Code:

  ld1r { v0.8h }, [x6]
  ld1r { v0.8h }, [x6]
  ld1r { v0.8h }, [x6]
  ld1r { v0.8h }, [x6]
  ld1r { v0.8h }, [x6]
  ld1r { v0.8h }, [x6]
  ld1r { v0.8h }, [x6]
  ld1r { v0.8h }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1602052673420001000004501002671721212162516014510080045800001008000080000500117462818873871267092672826728665003668616010020080000800002008000080000267282672811802011009910010080000800001100800000430800390004280039613943511011611267330101080000800001002672926729267292672926729
16020426728200000000000101267132121216251601451008004580000100800008000050011758561883573126709267302672866500366881601002008000080000200800008000026728267281180201100991001008000080000010080000000800390003980039613943511011611267330101080000800001002672926729267292672926729
1602042672820000000004500012671321212162516014510080045800001008000080000500117462818835731267092672826728665003668616010020080000800002008000080000267282672811802011009910010080000800000100800000430800390003980039613943511011601267250101080000800001002672926729267292672926729
1602042672820100000004500012671321212162516014510080045800001008000080000500117318318835731267092672826728665003668616010020080000800002008000080000267282672811802011009910010080000800000100800000430800390003980039613943511011611267270101080000800001002672926729267292672926729
1602042672820000000004501012671321212162516014510080045800001008000080000500117462818873871267092672826728665003668616010020080000800002008000080000267282672811802011009910010080000800000100800000430800390003980039613943511011611268160101080000800001002672926729267292672926729
160204267282000100000450001267132121216251601451008004580000100800008000050011746281887334126709267282672866500366661601002008000080000200800008000026728267281180201100991001008000080000010080000043080000000398003961390511011611267250101080000800001002672926729267092672926729
160204267282000100000001012671321212162516014510080045800001008000080000500116875418835731267092672826728665003668616010020080000800002008000080000267282672811802011009910010080000800001100800000430800390003980039613943511011611267330101080000800001002672926729267292672926729
1602042672820000000004500012671321212162516014510080045800001008000080000500117462818873911267092672826728665003668616010020080000800002008000080000267282672811802011009910010080000800001100800000430800390003980039613943511011611267330101080000800001002672926729267292672926729
1602042672820000000004500012671421212162516014510080045800001008000080000500117462818873341266892672826728665003668616010020080000800002008000080000267082672811802011009910010080000800000100800000430800000003980039613943511011611267250101080000800001002672926729267292672926729
160204267282000001100450001267132121216251601451008004580000100800008000050011746281887334126709267282672866500366861601002008000080000200800008000026728267281180201100991001008000080000010080000043080039000080039013943511011611267250101080000800001002672926729267292672926729

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)d9daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600252673920110001100021010326700070025160029108001980000108000080000501170179188847502671826715267376660036717160010208000080000208000080000267372673711800211090101080000800000108001919008005810061800406159431915020716029726745013028000080000102673826717267382673826738
1600242673720010001010067000326700270192516007510800658000010800008000050116737118860960267182673726737668203671716001020800008000020800008000026715267371180021109010108000080000010800212000800581012280040605901905020101600111026762001328000080000102673826716267162671626716
160024267152001000000006600032672230720251600731080018800001080000800005011677221877160026696267152673766820366951600102080000800002080000800002673826737118002110901010800008000001080020194308005910060800406159431915020616009926823001328000080000102673826749267212673826738
1600242671520010100100066010026722377202516002910800658000010800008000050117017918831901267182671526716668203671816001020800008000020800008000026737267371180021109010108000080000010800202043080019101218004061194319150209160069267340131328000080000102673826738267382671626738
16002426737200100000000670000267223770251600291080063800001080000800005011677221877331126718267372673966810367171600102080000800002080000800002673826739118002110901010800008000001080019194308001901060800006158019050201016009626736013028000080000102673826738267382671626738
160024267372001010100006700022672237712516007510800658000010800008000050117017918854601267182673726715668203671716001020800008000020800008000026737267151180021109010108000080000010800202043080059000608004000194319050209160079267120131328000080000102673826716267382673826738
160024267152001010110009700032672239719251600751080065800001080000800005011692951878937126718267372673766820367171600102080000800002080000800002673726737118002110901010800008000001080019200080059100338004060190190502071600107267490131308000080000102673826716267382673826716
1600242673720110101000066000226722377192516007410800638000010800008000050116946118771601267182673726715668103669516001020800008000020800008000026737267371180021109010108000080000010800202043080059101618004061604319050206160010626742001318000080000102673826738267382673926738
16002426715201100010000670103267000771925160073108006580000108000080000501167371187829612671826716267376662036717160010208000080000208000080000267372671511800211090101080000800000108001921433180019110525280949015943196517768000141427928113018000080000102772227925278862784827741
1600242674220812100007711266160032784301574732671616351080929807801081301812465011842201932091127677278642788471161501307469162191208132381332208133481140278342786671800211090101080000800000108079919432338096901063028091001194319651791279001015280250131328000080000102788127861278832787827896