Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LD2 (single structure, S)

Test 1: uops

Code:

  ld2 { v0.s, v1.s }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 3.000

Issues: 3.002

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 2.002

retire (01)cycle (02)0304070a0e0f1e223a3f43464951inst issue (52)~issue fp/simd (54)~issue ld/st (55)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing ld/st (5a)huge thing fp/simd (5b)5f696d6escheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map op fp/simd (7e)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8a9acafb5bbdcache load miss (bf)c2cfd0d2icache miss (d3)itlb miss (d4)d5d6d9dadbddinst fetch restart (de)e0eaebecld/st retires (ed)? fp/simd (ee)f5f6f7f8fd
630052949922145511504489288780001738230022002100020001000500023858122834292152944131030001000200010004000293212930511610011000100011000310010001000202012784936468513053060207223099382833525328346165461424215313100020002941229439295122944029463
6300429489220243003014526289000001732330022002100020001000500023893622718291272942131030001000200010004000292712932711610011000100011000210000001000201012858918668973049153207953036382529474628470166521418615224100020002948929407294632935329460
630042946422044300214738287850001731230022002100020001000500023858422745292042940331030001000200010004000292742924611610011000100001000210000001000212012887922969953093046208023114382627525028454166261432615188100020002938929373294302940129500
6300429426220424002114601289240001745430022002100020001000500023902522775290802944131030001000200010004000292942919711610011000100011000210000001000202013058915668573037348208633100382626535128434166441439115314100020002942729437295552944529465
630042943122022300214519288800001734330022002100020001000500023852022746291342946131030001000200010004000291942932811610011000100001000210000001000202012807917368423064149209003083382827524828453164581442415158100020002942529368294832945929303
630042948822113400214554287880101728930022002100020001000500023871522752292212935431030001000200010004000292862930011610011000100001000210000001000210012782909168313029143206473052382525495428314165031427315471100020002945129502294442939229409
630042945822124200214586289900001739330022002100020001000500023865322756291962942431030001000200010004000292962934711610011000100011000310000001000202012948914568363110247207273108382230494828432166741425815199100020002949329338295052942329482
630042942922023500314663289080001738230022002100020001000500023863422782292012960631030001000200010004000293162931511610011000100011000210000001000203012924921968313057148209133070382629504828459164671415515207100020002950129369295292954629520
630042945522132200414521288751001730830022004100020001000500023856222770292312946731030001000200010004000293072921011610011000100001000210000001000202012859914468483085350207873118382525534828432166971412515418100020002938329506294482934429408
630042944422123411314558289080001737730022002100020001000500023892422750291932940731030001000200010004000293202929711610011000100001000110000001000202012808917268283130452207843079382626515228459165301420615253100020002943729508294132941129417

Test 2: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.s, v1.s }[1], [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire (01)cycle (02)03090e0f18191e1f22233f4d4e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
70205140054104900100101014003613940413934612934125801034010030003100003010030000100001264390669373120079451114001114005114005113056231311267010030200100003000060200100005000014003714005111502011009910040100100001000011001000000100000001000010100032102128111395664000001013100002000040100140055140052140036140052140055
702041400511048000001000140020139404139346129360258010040100300031000030100300001000012643906693731200818431140030140051140035130531313115070100302001000030000602001000050000140051140051115020110099100401001000010000010010000011000000010000101000321011281113954440000000100002000040100140055140036140055140036140055
702041400541049000006790001400201394041393461293412580103401003000010000301003000010000126439066937312008184311400271400351400541305313131126701003020010000300006020010000500001400541400511150201100991004010010000100000100100000010000000100001000003210112811139566400001300100002000040100140036140036140055140055140055
702041400541049000000000140020139608139346129357588010340100300031000030100300001000012643106692791200818431140015140051140054130562313112670100302001000030000602001000050266140140140051115020110099100401001000010000010010000001000000010000101000321011281113956640000131013100002000040100140055140036140055140055140036
7020414005410490000000101400391396081393441293602580103401003000010000301003000010000126439066927912008184311400111400541400511305623131126701003020010000300006020010000500001400351400511150201100991004010010000100000100100000010000100100001010003210113311139544400001300100002000040100140055140036140055140052140057
70204140038104900000101014002013940413934412936025801034010030003100003010030000100001264390669373120081843114003014005414005413053131311507010030200100003000060200100005000014005114003511502011009910040100100001000001001000001100000001000010000032101133111395444000010013100002000040100140052140055140055140055140036
70204140054104901000100114003613940413932512936025801034010030003100003010030000100001264390669279120081843114002714005414003513053131311507010030200100003000060200100005000014003514003511502011009910040100100001000001001000001100000001000011100032101133111395644000001310100002000040100140036140055140055140036140055
70204140054104800000640001140020139608139346129360258010340100300031000030100300001000012643546693731200794511140030140035140054130531313115070100302001000030000602001000050000140054140051115020110099100401001000010000010010000011000000010000101100321011281113956640000131313100002000040100140036140036140036140055140036
70204140054104900000000014003913960813934912936025801034010030003100003010030000100001264390669373120079451114003114005114005413056231311507010030200100003000060200100545000014005414005111502011009910040100100001000001001000000100000001000010100032101128111395664000001310100002000040100140036140036140055140055140036
70204140038104900000000014003913960813934612936046801034010030003100003010030000100001264310669373120081843114002714005414005413053131311267010030200100003000060200100005000014005114005111502011009910040100100001000011001000001100000001000010000032101128111395664000013130100002000040100140055140055140036140052140036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire (01)cycle (02)030e0f191e1f22243f4d4e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8a9acafb5dcache load miss (bf)c5cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
7002514005310491100010140032139446139338129356258001340010300031000030010300001000012647486693915200794510114001101400471400501305653131173700103002010000300006002010000500001400471400471150021109104001010000100000101000011000000010000100031401012071113956940000069100002000040010140051140051140051140051140051
70024140035104911000101400201394911393431293562580013400103000310000300103000010000126477166927912008126301140026014003514005013056831311757001030020100003000060020100005000014004714004711500211091040010100001000001010000010000000100001100314010121101413957240000666100002000040010140051140051140036140051140051
70024140035104900010101401291394911393431293562580013400103000310006302923000010000126477166928872008126301140028014005014003513055331311757001030020100003000060020100005000014004714003511500211091040010100001000001010000110000000100001100314012121121213956940000060100002000040010140051140036140051140051140048
7002414004710491100010140035139487139338129353258001340010300001000030010300001000012647716692791200812630114001101400351400501305683131161700103002010000300006002010000500001400351400471150021109104001010000100000101000011000000310000110031401012111813955740000999100002000040010140036140051140051140048140051
7002414003510491001000140035139487139338129356258001340010300031000030010300001005512650596692791200812630114002601400471400501305653131175700103002010000300006002010000502651402741400471150021109104001010000100000101000011000000010000110031401012011813956940000996100002000040010140051140048140048140048140051
7002414004710490001000140020139491139325129356258001040010300001000030010300001000012647196693535200816970114002601400351400501305533131173700103002010000300006002010000500001400501400471150022109104001010000100000101000011000000010000110031401312014913957240000906100002000040010140048140048140048140048140048
700241400471049000510101400351394911393381293412580010400103000610000300103000010000126471966935352007945101140026014003514005013055331311617001030020100003000060020100005000014003514004711500211091040010100001000001010000010000103100001000314011120101413956940000906100002000040010140051140051140036140051140048
70024140050104900010001400321394461393381293562580013400103000610000300103000010000126477166935352008126301140026014011914006813056531311737001030020100003000060020100005000014005114004711500211091040010100001000001010000110000001210000010031401112112913956940000966100002000040010140051140051140048140051140036
70024140047104900010101400351394911393431293412580013400193000610000300103000010000126474866933882007945101140026014005014003513056831313237001030020100003000060020100005000014004714003511500211091040010100001000001010000110000100100001100314010121121013957240000909100002000040010140051140051140051140036140036
700241400351049110101014003513949113934312935625800104001030006100003057030000100001264780669353520081263011400260140035140035130554313117570010300201000030000600201000050000140050140047115002110910400101000010000010100001100001001000001003140812013913955740000909100002000040010140051140051140036140051140048

Test 3: Latency 2->3 roundtrip

Chain cycles: 3

Code:

  ld2 { v0.s, v1.s }[1], [x6]
  fmov x1, d1
  eor x8, x8, x1
  eor x8, x8, x1
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0887

retire (01)cycle (02)030e0f1e22233f4d4e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5bbdcache load miss (bf)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
702051402601050001010140457139770140175129342258011640145300031000030100300001000012643196733631201119301140012014088714088713139103131355701003020010000300006020010000500001408871400361150201100991004010010000100000100100001100000010000100003210112911139545400001300100002000040100140037140367140888140888140888
70204140887105000130014024514043913932613018925801034010030003100003010030000100001264319670361320079595114001201408871408871313910313198270100302001000030000602001000050000140891140260115020110099100401001000010000010010000110000001000010100321011291114039640000131313100002000040100140685140037140888140037140888
7020414088710491000014087714043914017513018925801034010030003100003010030000100001272055669283920111930014086301400361400361313910313112770100302001000030000602001000050000140036140036115020110099100401001000010000010010000110000001000010000321011291113954540000131013100002000040100140517140037140888140888140888
7020414003610480020014002313977013955212934225801034010030003100003010030000100001272055669283920079595114023601402601400361313910313198270100302001000030000602001000050000140887140260115020110099100401001000010000110010000110000001000000100321011291114039640000101313100002000040100140888140890140888140888140037
702041408871051001101409071401761392951293422580103401003000310000301003000010000127205567336312007959501408630140887140887131391031319827010030200100003000060200100005000014026014026011502011009910040100100001000001001000011000000100001100032101129111403964000013013100002000040100140037140888140889140037140888
702041402601051000001406621404391401751301892580103401003000310000301003000010000127205567336312020200411408630140473140296130532031319827010030200100003000060200100645000014088714026011502011009910040100100001000011001000001000000100000010032101129111397724000010013100002000040100140888140888140888140888140888
7020414003610490011014002214043913932613018925801034010030003100003010030000100001266786669283920111930114023601408871408871305320313198270100302001000030000602001000050000140887140260115020110099100401001000010000010010000110000091000010100321001331114039640000101010100002000040100140888140888140888140888140888
7020414026010510061014087213940514017513018925801034010030000100003010030000100001272055672391720111930114086301400361400361313910313198270100302001000030000602001000050000140684140260115020110099100401001000010000010010000110000031000010100321011291114039640000101013100002000040100140888140037140261140037140037
7020414003610510092500140873140439140175130189258010340100300031000030100300001005612721276733823200795951140236014088714003613053203131982701003020010000300006020010000500001408871400362150201100991004010010000100001100100001100003310000101003210112911140396400001000100002000040100140037140261140888140037140264
7020414088710500001014087213940513932612934225801034010030003100003010030000100001264319673363120079595014086301408871408871313910313198270100302001000030000602001000050000140887140036115020110099100401001000010000110010000110000690100001012032101133111395454000013010100002000040100140888140888140261140531140888

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0458

retire (01)cycle (02)030508090b0e0f1e22233a3f4d4e4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f60696d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd2icache miss (d3)d5d6dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
700251402601051001010110014002813971614017513018925800134001030003100003001030000100001272485673363120111930011402361402601402601307770313138470010300201000030000600201000050000140887140260115002210910400101000010000010100000110000100100001000003176102122001113978140000101010100002000040010140888140037140261140261140888
7002414003610510000006000140547139716139326130189126800104001730000100003001030000100001266662669283920111930011402361402601400361314010313201170010300201000030000600201000050000140887140260115002110910400101000010000110100000010000100100001010003140001122001114040540000101010100002000040010140263140261140888140097140526
700241406161049000010100014024514039414017513018925800134001030003100003001030000100551272485669283920111930011400441408871408871314017313186870010300201000030000600201000050000140036140260215002110910400101000010000110100000010000003100001010003140001122001114040540000132713100002000040010140888140888140888140888140037
70024140268105100001010001408721394881393261293422580013400103000310000300103000010000127248566975822011193001140863140260140887131401031320117001030020100003000060020100005000014088714026011500211091040010100001000001010000001000000010000001000314000112200111404054000010010100002000040010140261140261140888140261140266
70024140260105000000000001408721397161401751293422580013400103000310000300103000010000126472867036132020200401140236140260140260130554031311627001030020100003000060020100005000014088714026011500211091040010100001000001010000001000000010000101000314000112200111398174000013130100002000040010140888140037140888140888140888
700241402601049000000110014087214039414017512934225800134001030003100003001030000100001272485673363120202004011408631408871402601307780313201170010300201000030000600201000050000140887140036115002210910400101000010000010100000110000000100001011103140001121012214020440000101310100002000040010140888140888140888140037140888
70024140036105100010060001400211397161393261301892580010400103000310002300103000010000127248567336312007959500140863140887140887131408031311627001030020100003000060020100005000014088714003611500211091040010100001000001010001111000000010000101000314000212200111395584000010130100002000040010140888140037140037140888140888
70024140039105000000060001408721403941393261301892580013400103000010000300103000010000126472867336312020200401140236140036140260131401031320117001030020100003000060020100005000014088714026011500211091040010100001000001010001101000000310000100000314000112000221404054000013010100002000040010140888140261140888140888140686
70024140887105100000011001400211403941393261301892580013400103000310000300103000010000126472867336312007959501140863140260140260131401031311627001030020100003000060020100005000014088714026011500211091040010100001000001010000011000000010000100000314000112200111404054000013100100002000040010140888140037140888140888140888
70024140036105100000011001408721403941395531301892580013400103000310000300103000010000127248567336312020200400140863140887140887131401031320117001030020100003000060020100005000014068514068211500211091040010100001000011010003211000100310000100000314000112000111404054000013130100002000040010140261140261140261140037140888

Test 4: throughput

Count: 8

Code:

  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  movi v0.16b, 0
  movi v1.16b, 0
  ld2 { v0.s, v1.s }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.6262

retire (01)cycle (02)03080e0f191e1f2223373a3f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst neon or fp (9a)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
400205501293760110001001501000101031262401001001603118000010016000080000500400126192594215008905006750114003542401002008000016000020080000320000501335010811240201100991001008000016000080000010080000080000008001961029151101116814500640000800003200001005006850068501035006850243
4002045006737500103971081027605011820105026240352100160000800001001600008000050040000019169670500480500675006703639024010020080000160000200800003200005011550100112402011009910010080000160000800000100800002380026025800266002915110716975012010106800003200001005006850134500685013450068
40020450067375000000001501500982101002624034910016025280000100160000800005004000001916967050048050067501110031062401002008000016000020080000320000500675012111240201100991001008000016000080000010080000080019025800266019231511091688501360066800003200001005012350109500995006850068
4002045006737500002601039515005221000262403711001600008000010016000080000500400126191696705004805006750067003902401002008000016000020080000320000501175009311240201100991001008000016000080000010080000080019025800000119231511071679500641066800003200001005014550068501235010050068
400204500673750000320001580500520100026240100100160412800001001600008000050040006719259420500940501335011000354240100200800001600002008000032000050067501361124020110099100100800001600008000001008000023800000080026601901511010168115006400106800003200001005006850068501155011550068
4002045010237600000010005009900002624024010016015780000100160000800005004001351920159050048050099500960773242401002008000016000020080000320000500675014611240201100991001008000016000080000010080000080072026800250025291511011169115009000100800003200001005009750110501095011050068
40020450067375000025010005011820100262405121001604128000010016000080000500400126192594205004805010550156029324240100200800001600002008000032000050107500731124020110099100100800001600008000001008000008002501980025611929151101316975006400100800003200001005012950068501585016750103
40020450067375000000100150127010100262403821001602178000010016000080000500400000192671305007805009650096023324240100200800001600002008000032000050067500931124020110099100100800001600008000001008000029800190080026610291511081610115006400106800003200001005008250068500685010050100
40020450067375010032010005005221010132624038210016041280000100160000800005004001271925232050114050067501170038024010020080000160000200800003200005010750100112402011009910010080000160000800000100800000800260080000011901511091679501300000800003200001005011450134500685006850068
4002045009737500103201010205005801010426240411100160292800001001600008000050040012619276650501230501105008105532424010020080000160000200800003200005012850067112402011009910010080000160000800000100800002380026008002600001511071697500701000800003200001005009750115501155006850068

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.6260

retire (01)cycle (02)030507080b0e0f1e222324373a3f4346494e51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)5f6061696d6edispatch stall (70)74scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst neon or fp (9a)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd0d2icache miss (d3)d5d6d9dadbddinst fetch restart (de)e0e7? int output thing (e9)eaebld/st retires (ed)? fp/simd (ee)gpr retires (ef)f5f6f7f8fd
400025501523760000008000001501732900262405811016000080000101600008000050400192191124921105008750085500801683232400102080000160000208000032000050092500941124002110910108000016000080000010800000298000000080025612529000150272294331784422225010900711080000320000105008550045501135014650146
4000245018837600000016210046015006929002624080310160000800001016000080000504001921911249211050126500445010100377240010208000016000020800003200005017350079112400211091010800001600008000011080000008002500318003201263500015027251042017104423235008600711080000320000105008150097501465009950045
4000245004437600000056100175050029290172624054210160157800001016000080000504002041911249211050025500845004414037524001020800001600002080000320000500965008011240021109101080000160000800000108000002980031002680000012535000150292511624171055202050093001151480000320000105014650097500815004550090
40002450044375000000500000175050029091002624021310160247800001016000080000504001931911249211050025501015009400367240010208000016000020800003200005007350044112400211091010800001600008000001080000008003200080031613200001502922104211786423155008610901480000320000105009250085500455007850097
400024500803750000006151002150500652092226240010101603488000010160000800005040012619213072110501265018850080010336324001020800001600002080000320000500845008011240021109101080000160000800000108000002980031003280031013135000150272294201784420205012210751080000320000105004550085500815008550081
4000245008937500001146600017905008520902624001010160247800001016000080000504000001926032111050126500855008007032324001020800001600002080000320000500445007911240021109101080000160000800000108000002980031003280000612535000150272294201784420205009300701480000320000105014650045500455014650045
40002450078375000000323100142050081009312624001010160263800001016000080000504002081930261211050066501455014401033232400102080000160000208000032000050044501441124002110910108000016000080000010800000080031003280031610350001502725104211784423215027020741480000320000105008150097500815004550090
4000245007937500010027700000501300991726240358101605158000010160000800005040020419112492110500745008450044003512400102080000160000208000032000050188500441124002110910108000016000080000010800000298003100328000061313500015031281152117106523215004110761080000320000105004550085500815004550077
40002450084375000000521000128150081210100262400101016053280000101600008000050400192191124911105007350089501880523232400102080000160000208000032000050096500831124002110910108000016000080000010800000080000003180000010000115027124421179442222500521081680000320000105014250134500805008750100
40002450087375000000350002470500291101002624032210160165800001016000080000504001261911249215500685010050044029366240010208000016000020800003200005008650044112400211091010800001600008000001080000023800260028800006019000015027205522177542425500410087680000320000105016450085500955007650102