Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST1 (multiple, post-index, 4 regs, 8H)

Test 1: uops

Code:

  st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 4.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1i tlb fill (04)l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e18191e1f22233a3f464951schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
640052948923701200019000066110465429157042328650001000400010004000500021613140017005285152943831050004000900029146292191161001100010004005516004004014400040410132139151688031368492027633493795134948285841000160291294514565400010002934029385293802944829405
6400429411235001500180000841004627291184423309500010004000100040005000216042200170092852229328310500040009000292482921411610011000100040000000400000040000120001300393226922307010472020633403797174651286111000160971294214668400010002945629376294572948129340
640042931623500200019000029710046562916644232585000100040001000400050002159360017021285592938931050004000900029226293611161001100010004000012004000000400000000131449299685830548632037032673801185053286581000161201300314321400010002925629460293832948329481
64004293232350014001500003631004571291524423254500010004000100040005000216101000170222866329518328500040009000292432928711610011000100040000120040000004000012000131209454695231183482051733183808184650286791000164321299514502400010002939829388294162932129446
640042929123601161117100039911046362914044232975000100040001000400050002160700017045286332949531050004000900029313292781161001100010004000000040000004000080001319092376890312611442039332533807224646286071000165531318614412400010002942329344293102936429428
64004292992370018001700003691104651291120023225500010004000100040005000216075001703028558294513105000400090002940129243116100110001000400008004000000400000000131369526693031269462049432853812134252286021000160121318414715400010002948429349292892930129382
64004293422350018002100003841104613292310023347500010004000100040005000216106001702628712294753105000400090002927329294116100110001000400008004000000400008000132099518691331017492042633313816174544286601000162221313914330400010002937729267293692939829430
64004294252360019002000003481004670293010023275500010004000100040005000216100001703628652293753105000400090002930729317116100110001000400008004000000400000000132319466693431743442032832513810184045286101000163591288214504400010002936929383293512936129472
64004292322370017001700003960004788291390023557500010004000100040005000216234001706628550294683855005400090002927629221116100110001000400000004000000400000000130719619697330979432027632453810124544285661000159551307514322400010002944229267292942942929486
64004294982380119011610006650046742917500232865000100040001000400050002161550817066287962959931050004000900029147292711161001100010004006600040040174000412400131239345697731557462040032723810125243286061000162221311314230400010002930829504294172937429434

Test 2: throughput

Count: 8

Code:

  st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f2223243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
3202051600471240101100020100116003216164254001008010032000080100320000400518736002811600220160051160064799943800364001002003200002007200001600541601331180201100991001008000080000100320015144400320016112232000216441415110117111600518000032000080100160053160053160048160054160054
32020416005112411000004520000116032116161254001008010032000080100320108400519735993301600270160054160054799923800364001002003200002007200001600471600541180201100991001008000080000100320014144400320016112732000216441415110117111600518000032000080100160055160135160056160055160053
3202041600631241111010014000116017016161254001008010032000080100320108400519735995601600290160063160054799923800454001002003200002007200001600541600531180201100991001008000080000100320014154400320018102432030216441405110126111600448000032000080100160056160048160064160065160177
3202041600471241101000018000116004816163254001008010032000080100320000400518736002801601110160054160052800663800364001002003200002007200001600511600631180201100991001008000080000100320014154401320016011632006216441425110117111600518000032000080100160055160057160055160055160055
32020416005212411120000190011160039161612540010080100320000801003200004005197360028016002901601851600547999238003440010020032000020072000016005416005211802011009910010080000800001003200741444013200160080432000216441405110117111600518000032000080100160064160053160053160053160065
320204160055124010200001900011600371616225400100801003200608010032000040051873597180160027016005416018679991380037400100200320000200720000160051160063118020110099100100800008000010032001514440032001461163200621601405110117111600498000032000080100160048160055160055160048160053
32020416006312411210000180001160048161630254001008012932000080100320000400518736046001600270160054160052799903800374002372003200002007200001600551600541180201100991001008000080000100320015154400320016101632000216441405110117111600498002932000080100160189160048160054160053160048
3202041600471241101100018000116004916164254001008010032000080100320000400518736002801600390160054160052799908800294001002003200002007200001600521600551180201100991001008000080000100320014144400320016011432000216441405110117111605428002932000080100160319160187160183160320160339
32020416045912431020395402850001160395161623010540027880158320780809993231324753557499120016042201604511603308022423802264003742003202402007208101603251605913180201100991001008000080000100320014154401320016011932000216441415110117111600518000032000080100160055160055160048160055160055
3202041600631241100100121800011600371616125400189801003200008010032000040051873597190160029016006316005479992380037400100200320000200720000160054160057118020110099100100800008000010032001414001320016112232000216441415110116111600518000032000080100160055160055160055160055160055

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f22243a3f46494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)? ldst retires (ed)? int retires (ef)f5f6f7f8fd
320025160052124110000014418101160032016425400010800103200008001032000040006873600280001600290160054160056799973800374001472032000020720000160052160055218002110910108000080000103200141544003200161020320002164414105020321732121600498000032000080010160055160055160188160055160052
320024160054124110000013218001160038003254000108001032000080010320000400069735990800016002901600521600548000110800344000102032000020720000160054160052118002110910108000080000103200141444013200760122320002164414005020321730291600518000032000080010160056160055160064160064160055
32002416005212411001010170011601721616148834000998079332018080097320324479421737439200016035901605831604578030345802264005582032408020720540160596160464418002110910108000080000103201971444593320256032338320242164414005056284425281604108011632000080010160591160457160595160458160056
32002416004711991101000190011600321616525400010800103200008001032000040006873604380001600290160054160054799893800364000102032000020720000160054160053118002110910108000080000103200141444023200161014320000164414015020301712321600518000032000080010160064160055160064160064160055
3200241600521241110100018001160039161602540001080010320000800103200004000697359884000160029016005416005579992380037400010203200002072000016005416005411800211091010800008000010320014150003200160018320002164414005020291734301600518000032000080010160055160055160056160055160055
320024160052124111010012190011601171616325400010800103200008001032000040006973599080001600220160047160054799923800294000102032000020720000160054160054118002110910108000080000103200151544013200160018320002164314105020341729271600498000032000080010160052160053160053160055160055
320024160054124111000012170011600391616325400010800103200008001032000040006973599561001600290160054160054799913800364000102032000020720000160054160054118002110910108000080000103200141442003200160016320004164414005020311731311600498000032000080010160055160054160056160055160053
32002416005412411000000190011600381616225400010800103200008001032000040006873605350001600303160054160052800013800344000102032000020720000160054160052118002110910108000080000103200141544023200161119320002164414105020311712321600518000032000080010160056160055160048160055160055
32002416005412401000000220011600321616225400010800103200008001032000040006973600520001600290160052160055799923800364000102032000020720000160052160054118002110910108000080000103200151444013200160016320002164414105020261726291600518000032000080010160048160055160048160057160054
320024160054124110010012170011600391616225400010800103200008001032000040006973603880001600290160052160054799923800374000102032000020720000160052160054118002110910108000080000103201341444890320196082347320122164414005057334430261604108011632000080010160456160326160593160460160316