Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

ST3 (multiple, post-index, 8B)

Test 1: uops

Code:

  st3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 0
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 4.000

Issues: 5.000

Integer unit issues: 1.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 2.000

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)18191e1f22243a3f4951schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)5e5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd store (99)inst ldst (9b)l1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafl1d cache miss st nonspec (c0)c2c9cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
640072950023724003000030004647293340184605000100020002000100020002000500021622160000221835293062953331050002000200050006000295302935411610011000100020000402000000200060013153941969153182106120439330238142064682870710001618113113143972000200010002951529495294562943029396
64004294942372300290000100472129257018349500010002000200010002000200050002161816000022183329023295123105000200020005000600029374292971161001100010002000040200000320004001330693916943317016612042132693816764672873710001638213176144832000200010002946529546295142954129396
6400429435237200033000180104561292490183065000100020002000100020002000500021602160000021870291492953131050002000200050006000293912940211610011000100020000402000000200000013273938269733161116020550329538191465642869310001614813155143792000200010002937029360294722947529473
640042939623623003100091004666292800183155000100020002000100020002000500021620160000321780291762938431050002000200050006000294142947511610011000100020000402000000200040013160943869493138136320487325538202375692881410001610313219143822000200010002953729316295412949029563
640042943523626002400090004684292530183455000100020002000100020002000500021616160000021816291632942431050002000200050006000293452935611610011000100020000002000000200040013305928069083168116320410329138192167602877110001605113069141092000200010002949229515294332948329432
640042945423723002500051000475529257018389500010002000200010002000200050002160416000002185629114295773105000200020005000600029435293971161001100010002000000200000020006001332094226981313876420432328038171264592882310001632813176143452000200010002949429488295772956829341
6400429472237240023000150004662292000184455000100020002000100020002000500021610160000321849290662953731050002000200050006000293922931211610011000100020000402000000200040013201929669503182116620364322838201659622880010001631813225145682000200010002949629482295122954129507
64004295022362600290009100461829226018235500010002000200010002000200050002161816000032180829190294193105000200020005000600029374292971161002100010002000040200000020004001313995106932319666320372326638151468642879610001595813324145402000200010002952729634295242956129405
640042950123723003200092004569292750183785000100020002000100020002000500021610160000021858291922944531050002000200050006000293392938711610011000100020000402000000200040013091931669383170116520535323938192366602879410001596713153144332000200010002939529509295312952029360
640042951023729002600091004801291790183945000100020002000100020002000500021607160000021839292442953231050002000200050006000293942937311610011000100020000002000000200040012902950069083124176820405327038231265622877010001633213014146042000200010002942929492294232945629482

Test 2: throughput

Count: 8

Code:

  st3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  st3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  st3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  st3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  st3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  st3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  st3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  st3 { v0.8b, v1.8b, v2.8b }, [x6], x8
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e18191e1f23373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3202078007162101001000121505981280035000254083298010016439316000080100160000160000481907231993913015710800258005080050073140010020016000016000020040000048000080241804452180201100991001008000080000100160012123800161034101516000214380151091171180047800000160000160000801008005180052800508005980051
32020480050620110010000150194328003501612540523580100164007160000801001600001600004804993679430128940008002580051800500331400100200160000160000200400000480000802168005011802011009910010080000800001001600121238011600141015160002143812051091171180048800000160000160000801008006380050800508006280051
3202048005062010000000015012182800461600254034358010016473516000080100160000160000480499227979712953160800238004580050033240010020016000016000020040000048000080051800491180201100991001008000080000100160013038011600140114160000123812051091171180045800000160000160000801008006380052800518006080050
32020480050621100000006120132800321615225400108801001600091600008010016000016000048049923199391298756080025800508005103314001002001600001600002004000004800008004980049118020110099100100800008000010016001312380116001201279160000143812051091171180058800000160000160000801008005180062800518005080051
3202048006162011000000018044202800471616025402850801001640421600008010016000016000048049923199391297912080025800518005203404001002001600001600002004000004800008004480058118020110099100100800008000010016001312320216001400185160002143812151091171180046800000160000160000801008005180062800508006280052
320204807026341000000001304584280042161602540425780100166064160000801001600001604324804993686258129822308002580051800500332400100200160120160000200400000480000800618006211802011009910010080000800001001600131338011600141130616000214380151091171180047800000160000160000801008005180051800628005080050
320204800506201000000012150146928003516160254063938010016399616000080100160000160000480499239991612913600800258004980051034340010020016000016000020040000048000080049800621180201100991001008000080000100160013133801160014021516000014012151091171180046800000160000160000801008005080050800498006080060
320204800496201000000001302426280035161602540421480100160019160000801001600001600004804983599288129527108002580049800510332400100200160000160000200400000480000800628005711802011009910010080000800001001600121300016001401188160002123812051091171180048800000160000160000801008005080046800518006280050
320204800506211100000001602913280035160025406236801001655491600008010016000016000048049936794061287342180025800578006203314001002001600001600002004000004800008005080048118020110099100100800008000010016001213383016001411300160002143812051091171180055800000160000160000801008005080062800518005180052
320204800596201100100001601128003516162254025298010016616616000080100160000160000480499223517112891651800248006180059033240010020016000016000020040000048000080051800461180201100991001008000080000100160013133801160000001416000214012051091171180047800000160000160000801008006280051800628005180063

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0006

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)18191e1f222324373a3f46494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd store (99)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss st (a2)a4ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafbcl1d cache miss st nonspec (c0)l1d tlb miss nonspec (c1)c2cdcfd0l1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0e7? int output thing (e9)? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
3200278006062110020061500024472800320160254049158001016266516000080010160000160000480049215927812948070800248021980049033240001020160000160000204000004800008005080061118002110910108000080000101600000001160002011716000023812005019003173380042080000160000160000800108005180046800468004680046
32002480045620000000030004998080036000254000188001016000816000080010160000160000480049263988512887460800258004580050033240001020160000160000204000004800008004480050118002110910108000080000101600610320016000200111600022320005019003173380046080000160000160000800108004680050800468004680047
320024800496200001000151005204080029161602540314680010164662160000800101600001601084811053679391129605008002380050800500331400010201600001600002040000048000080219800501180021109101080000800001016001200001600120014160000143812005019003173380047080000160000160000800108004680046800518005080046
320024800506200000001216000594928003716022540602380010166465160000800101600001600004800492399913129337708003680049800450340400010201600001600002040000048000080045800441180021109101080000800001016000003200160002002160002000005019003173380042080000160000160000800108005080045800468005080052
320024800456201100000160001825080036000254043218001016001716000080010160000160000480049223989412884110800258004980049033240001020160000160000204000004800008005980061118002110910108000080000101600130030160012008160002143812005019003173380051080000160000160000800108004580045800458006380049
3200248006162100000001510043702800341616049403792800101654441600008001016000016000048004921552611297159080024800458004403274000102016000016000020400000480000800448004411800211091010800008000010160000130001600121021600022012005019003173380058080000160000160000800108022080062800518005080062
320024800446211000001215000548108004616160254054968001016424216000080010160000160000480049231993912949880800258021780050034440001020160000160000204000004800008005080050218002110910108000080000101600751232201600140015160002143812105019003173380046080000160000160000800108004980045800458005180051
3200248005162001100101800038052800341616125405074800101640841600008001016000016000048004936809051296280080025800508005003314002952016000016000020400000480000800508004311800211091010800008000010160000143800160060111516000223612005019003173380046080000160000160000800108005180051800518005180050
320024800456201000006160004928080035161602540092980010160008160000800101600001600004800492399926128003508002580044800500343400010201601201600002040000048000080049800511180022109101080000800001016001314002160014109301600000320005077103433380587080177160000160000800108068980052800458005080052
320024805166251002334863830004683280036161602540002080010165994160000800101600001600004800492151620129436008002480049800490328400010201600001600002040030048000080050800491180021109101080000800001016000000001600020001600002320005019003173380120080000160000160000800108004580047802178005080062