Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD3 (single, post-index, B)

Test 1: uops

Code:

  ld3 { v0.b, v1.b, v2.b }[1], [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.003

Integer unit issues: 1.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 3.003

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22243a3f464951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
640052955222844003001003000453728840001716350031000300310001000300010005000500035637032283629136292933105000100030002000600029235292591161001100010000100003100000001000002001309894076905319205320355319338261149532855510001628213346145091000300010002931229327292602939129409
640042947522703001001006000472628913001714350031000300010001000300010005000500035691062278429195292303105000100030002000600029219291071161001100010000100003100000001000000001312192826931316025620281315338231752552859910001623413335144521000300010002929529363295052929929231
640042928522702000001002000461228860001704550031000300310001000300010005000500035709022286729094292463105000100030002000600029229292181161001100010000100003100000471003003001304591046873310115520339322738181150552849410001591013258142091000300010002931529487292742936529344
640042917722703002001000000474029164011773750001000300310001000300010005000500035689082281829204295913105000100030002000600029525294621161001100010000100003100000031000203001307492736945313604620430309738241450572842310001631113444143931000300010002939429300294482933829282
640042937022702003001001400046172888100175415003100030031000100030001000500050003563107228162911829439310500010003000200060002932629239116100110001000010000010010001100120001471318393216938319905020505334038221559492867110001620313322145381000300010002940929464293352935229308
640042930322803000001002000459528876001715750001000300310001000300010005000500035703072284429140293113105000100030002000600029148291191161001100010000100002100000001000213001313694526913317505920380309338242051492843410001612213416143441000300010002936829448292302935529314
640042943222702003001002000472328942001714350031000300310001000300010005000500035621052282329137293953105000100030002000600029192292421161001100010000100000100000001000300001314593746907307414820318318538251551542850810001624213473144121000300010002946029287292662939129469
640042942622904004000000000463428904001697750031000300310001000300010005000500035806042283829192292863105000100030002000600029152292551161001100010000100000100000021000212001317892926885314405320395320638281750492857710001633513376142521000300010002941529360293422938329329
6400429243228020020010020004654288020017091500310003003100010003000100050005000356340022801292302926731050001000300020006000291352919311610011000100001000031000000110003030981329093476973318114720577324238231159522874710001606813330146461000300010002940429506294972954229412
6400429516237020020000020014715287111117152500010003000100010003000100050005000357270172283229123291383105000100030002000600029145290871161001100010000100003100100041001212001304092136906315615720324321038211753552833010001631513347142981000300010002917229220292682928429294

Test 2: throughput

Count: 8

Code:

  ld3 { v0.b, v1.b, v2.b }[1], [x6], x8
  ld3 { v0.b, v1.b, v2.b }[1], [x6], x8
  ld3 { v0.b, v1.b, v2.b }[1], [x6], x8
  ld3 { v0.b, v1.b, v2.b }[1], [x6], x8
  ld3 { v0.b, v1.b, v2.b }[1], [x6], x8
  ld3 { v0.b, v1.b, v2.b }[1], [x6], x8
  ld3 { v0.b, v1.b, v2.b }[1], [x6], x8
  ld3 { v0.b, v1.b, v2.b }[1], [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
32020616005611990000000034010016005016615988725400112801002400128000080100240000800004006344000952279296516004016004116005979927380047400100200800002400002001600004810441600591605344180201100991001008000080000110080006608002900128800226114180051100117111600620800000138000024000080100160066160066160067160066160066
320204160065124010000100600001600441061598382540010080100240000800008010024000080000480499400096227929651600371600591600597992438004740010020080000240000200160000480000160065160065118020110099100100800008000011008006362780029031348002561282761511001171116005608000013108000024000080100160042160060160042160060160042
320204160059124000001100210000160050166159887254001128010024001280000801002400008000048049940008422794693160046160065160065799143800234001002008000024000020016000048000016005916005611802011009910010080000800000100800000080000000318001461141800511001352116006208000013138000024000080100160066160066160047160066160047
320204160065124111010000650000160044166159876254001068010024000080000801002400008000040063440009922792965160037160059160059799243800484001002008000024000020016000048000016004616004611802011009910010080000800000100800636080028000288002361292650511001172116005608005810108000024000080100160060160117160060160057160060
320204160472131200000066193001016003116615985325400112801002400068005880100240000800004804994001122279469316004616006516006579933380041400100200800002400002001600004800001600591600561180201100991001008000080000010080000018800150109738001661142000511001172116006218005816138000024000080100160047160066160066160066160047
32020416006712411110000028900001600261601598242540010680100240006800008010024000080000480499400088227929651600401600591600417990938004740010020080000240000200160116480000160065160065118020110099100100800008000001008000662780028010308002361292750511001171116005618000010108000024000080100160057160060160061160057160060
32020416005912410010000022001116005016015985325400264801002400128000080100240000800004804994001172279469316004716006516006579933380015400100200800002400002001600004800001600571600561180201100991001008000080000010080000018800160120800166113200251100117111600621800001308000024000080100160066160066160066160066160066
3202041600461241100000013300101600440661598732540010680100240000800008010024016880000400634400094227893451600401600591600417990938004740010020080000240000200160000480000160065160065118020110099100100800008000001008006972780028011991800226128276051100117111600561800581308000024000080100160118160060160060160042160060
32020416005912410000013315217600016022816615974913040056880274240156801748027424050480174426585403034227921631601661602971606597885835800084009522008017424052220016034848104416029416023731802011009910010080000800001100801160188022800029108000061142000511001171116006208000013138000024000080100160066160066160066160066160066
3202041600651240100110003400101600261661598732540010680100240006800008010024000080000480499400093227929651600381600591600597992438004740010020080000240000200160000480000160065160066118020110099100100800008000001008000562780028010298000061282760511001171116005618000013138000024000080100160060160057160057160060160060

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f243a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200251603011242000000422833520016004016615987202540001680010240006800008001024000080000480049400090227927160116003616005516005579909380044400010208000024000020160000480000160041160041118002110910108000080000010800000138001300380000611017000502051746160038080000068000024000080010160056160056160056160056160056
32002416005512410000100040000160040106159863025400016800102400008000080010240000800004800494000902279271601160036160041160041799233800374000102080000240000201600004800001600551600551180021109101080000800000108000001380013001580010011400005020617651600521800001068000024000080010160056160056160056160056160057
320024160055124200000000190001600401001598730254000168001024000680000800102400008000048004940008822792716011600361600551600557992338002340001020800002400002016000048000016005516005511800211091010800008000001080000008000010198001361130000502051734160038180000968000024000080010160056160057160056160056160042
32002416004112400000000018000160040006159872025400016800102400068000080010240000800004800494000872279271601160022160041160055799233180037400010208000024000020160000480000160055160041118002110910108000080000010800000080000001380015611217000502081796160052180000998000024000080010160042160056160056160056160042
3200241600551241000010001900016002610615987202540001680010240006800008001024000080000480049400082227927160116002216005516004179909380023400010208000024000020160000480000160041160055118002110910108000080000010800000188000000080013501217000502061745160052080000008000024000080010160056160056160056160056160056
32002416005512400000000019000160040160159872025400016800102400068000080010240000800004800494000922279271611160036160055160055799233800374000102080000240000201600004800001600411600551180021109101080000800000108000001780013101680013610170005020517671600520800009108000024000080010160056160056160042160056160056
3200241600551240000000003100016004116615987202540001680010240006800008001024000080000480049400084227908590116002216005516005579923380037400010208000024000020160000480000160055160055118002110910108000080000010802282080128022880801840012170205171826056160301080116968000024000080010160177160296160234160294160232
320024160172124301001124619176001600261661598720254000108001024000680000800102400008000048004940009022792716011600361600551600557992338003740001020800002400002016000048000016005516005511800211091010800008000001080000013800120019388007360017001502061746160052180000968000024000080010160042160056160042160056160056
320024160055124100001000340001600401661598720254000168001024000680000800102400008000048004940008822789345011600361600551600557992338003740001020800002400002016000048000016005716005511800211091010800008000001080116013800670098001260017000502051778160052180000068000024000080010160056160056160113160056160056
3200241600551240000000002100016004016015987202540001080010240006800008001024000080000480049400082227927160116004316006216006579930380044400010208000024000020160000480000160062160062118002110910108000080000010800087228002400238001861252460050206176516005918000010108000024000080010160063160118160063160063160063