Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDURB

Test 1: uops

Code:

  ldurb w0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e0f1e22243a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10054022114400138301016251000100010001527413983982213256100010001000398771110011000100001000431038038103861394473116113980001000375399375399399
10043983014400135920102510001000100015274137439822132561000100010004047711100110001000010004310000381000613944731161139501041000399375399399399
1004374301440013832111925100010001000152081398374221325610001000100039856111001100010000100043103803810386138447311611395141401000399399399375375
100439831044001383201192510001000100015274139837422132561000100010003747711100110001000010004310380381000003843731161139114001000395375375375375
10043743004500038320121925100010001000149891398398197323210001000100039856111001100010000100043103803810380039437311611395141471000399399375399399
100439831044001388201192510001000100014060139840019732331000100010003987711100110001000010000103803810396139447311611395141441000399375402399399
10043743104410035920119251000100010001527413983982213232100010001000374771110011000100001000010000381038003907311611395141471000399399399375399
1004398300010137900102510001000100014060137439819732321000100010003987711100110001000010000103803810386038447311611371141401000396399375395375
1004374200450013592112192510001000100014060139439821732321000100010003987711100110001000010004310380381000613944731161139501401000399399399375399
1004374201450003830121202510001000100014060139439419732321000100010003945611100110001000010004310000010380039447311611391141441000399399399399399

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldurb w0, [x6, #1]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0054

retire uop (01)cycle (02)030e0f1e22243f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057005752511110700206978559713254010430103100013010010000616014334239849669717005870035646500364957401003020010000602001000070051351140201100991001000030100100001100100001100000010000110000261027111697983000313101310000301007005570055700557003670055
4020470035525001107003969785597102540100301001000030100100006161753342398496697170053700636465003649544010030200100006020010000700353511402011009910010000301001000001001000001000000100001100002610171116981730003130010000301007005570055700557005570052
40204700545250010070020697855971325401043010310001301001000061601433414704966971700567005664631036493840100302001000060200100007003535114020110099100100003010010000010010000110000001000001000026101711169817300000101410000301007005570036700367003670036
4020470051525001007003969764597132540104301031000030100100006161753341470496697170037700586465003649574010030200100006020010000700543511402011009910010000301001000011001000011000000100000100002610171116981730003130010000301007003670055700557005570055
40204700545250010070020697645971325401043010310001301001000061604133423984966973704577005664673036495440296302001000060200100007005435114020110099100100003010010000110010000010000001000001000026101711169817300031010010000301007003670052700527003670055
402047005452400010700206976459713254010430103100013010010000616041334239849669717005470097646470364954401003020010000602001000070054351140201100991001000030100100000100100000100000010000110000261017111698143000310131010000301007005570055701037005670055
40204700515250011070039697855969525401003010010001301001000061617533423984966971700547005564650036493840100302001000060200100007005135114020110099100100003010010000010010000010000001000000000026101711169772300031010010000301007005570055700367005270036
4020470054524001007003969782596952540100301031000130100100006161753342398496708170078700546465303649384010030200100006020010000700543511402011009910010000301001000011001000011000361010000110000261017111697983000313101310000301007005270052700557003670052
402047005452500251070020697825971025401003010310000301001000061601433422544966971700517005964647036495740100302001000060200100007005135114020110099100100003010010000010010000010000001000010000026101711169798300031010010000301007005270036700367005270036
402047003552500100700206978259695254010430103100013010010000616014334239849669717040970051646510364954401003020010000605941000070051351140201100991001000030100100001100100000100000010000010000261017111698143000310101310000301007005570055700367003670036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570057524111000740101700426978159701254001830016100023001010000616995334254214966977070107700616467536497640010300201000060020100007005135114002110910100003001010000010100011110001001110000101000252017111698203000610101010000300107004270042700587005870058
40024700575251000002010070026697815971625400183001310002300101000061704533425420496696107005870062646753649824001030020100006002010000700573511400211091010000300101000001010002101000310111000011011025201711169804300061001010000300107005870058700587005870058
4002470057524111000201017002669702597182540018300161000230010100006169953341769149669770700967004364675364982400103002010000600201000070057351140021109101000030010100000101000230100010024100001111102520171116982030006010010000300107005870058700427005870058
40024700575241100002010170042697025970125400103001010001300101000061706833422540496697107013570058646753649824001030020100006002010000700573511400211091010000300101000001010003111000300011000011110025201711169820300060101010000300107005870058700587005870058
400247009552510000020000700426978159716254001830016100023001010000617045334254214966977070125701046467736498240010300201000060020100007005735114002110910100003001010000010100022010001001110000111120252017111698203000610101010000300107004270058700587005870058
40024700575241010041010170042697025971625400183001610001300101000061704533425421496697707005770061646753649824001030020100006002010000700413511400211091010000300101000001010003111000200111000011110025201711169820300061010010000300107005870058700587005870058
40024700575251110002000170042697815971625400183001610002300101000061704533425420496697707013770057646753649824001030020100006002010000700573511400211091010000300101000001010001211000200011000011111025201711169804300060101010000300107005970044700587004270058
40024700575241100002000170042697815970125400183001610002300101000061704533425420496698007005770060646593649824001030020100006002010000700573511400211091010000300101000001010001211000100211000011111025201711169820300061001010000300107005870058700587005870058
4002470057524110000201007004269781597162540018300161000230010100006170453342542049669770701027005864675364982400103002010000600201000070057351140021109101000030010100000101000231100020111100001111002520171116982030006100010000300107004270042700427005870058
400247007252411100020000700266978159701254001430013100023001010000617045334254204966977070118700966468036498240010300201000060020100007005735114002110910100003001010000010100032010002002110000110110252017111698203001510101010000300107005870058700597004270042

Test 3: throughput

Count: 8

Code:

  ldurb w0, [x6, #1]
  ldurb w0, [x6, #1]
  ldurb w0, [x6, #1]
  ldurb w0, [x6, #1]
  ldurb w0, [x6, #1]
  ldurb w0, [x6, #1]
  ldurb w0, [x6, #1]
  ldurb w0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03090e0f18191e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526722200100004500226704212180258010010080000100800155001167303049236420267222672716655616674801142008002420080024267277111802011009910080000100800000100800003980035039800396135011151180160026770000800001002672826708267282672826728
8020426707200000000002267232120162580100100800001008001450011658561492363202672226707166356166798011520080024200800242672756118020110099100800001008000001008000039800390398003961353911151180160026769604800001002672826723267082672826728
8020426707200000000000267072120162580100100800001008000050011665251492362702672226707166503166858010020080000200800002672771118020110099100800001008000001008000039800000080039613943000511011611267071064800001002670826708267232672826708
80204267072000000000022671201201625801001008000010080000500117703804923627026722267271665031668580100200800002008000026727561180201100991008000010080000010080000398003503980039613539000511011614267301064800001002672826728267282672826708
80204267272000000054002267120121216258010010080000100800005001167808149236470267272672716650316665801002008000020080000267075611802011009910080000100800000100800004380039008003901390000511011611267390100800001002672826708267232672826728
8020426707200000314500226712218002580100100800001008000050011770380492364702672726727166503166858010020080000200800002670756118020110099100800001008000001008000039800390398003960043000511011611267111002800001002672826708267082670826708
802042672220000000450022671200016258010010080000100800005001166525049236470267272672716650316685801002008000020080000267077111802011009910080000100800001100800003980035008000061350000511011611267141060800001002672826728267082672326728
80204267272001100045000266942018025801001008000010080000500116652504923647026727267271665031668680100200800002008000026707561180201100991008000010080000010080000398000000800396135000051101161126817600800001002670826723267232672826708
80204267222000000045002266922001625801001008000010080000500116578904923647026707267271665031666580100200800002008000026707561180201100991008000010080000010080000398003903980039613543000511011611267111064800001002672826728267082670826728
8020426727200000000102266922121216258010010080000100800005001166525149236270267072672216630316665801002008000020080000267277111802011009910080000100800000100800003980039039800396104300051101161126724064800001002670826708267282672826708

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252672220001000045010226707218181625800101080000108000050116675014923647267282672816672316707800102080000208000026727711180021109108000010800001108000039800350398003961353905020201692226891106280000102672826709267292672926709
80024267272000000004500012671221212162580010108000010800005011676050492364826728267221665231670780010208000020800002672771118002110910800001080000010800003980000008000061350050202216112226881010280000102672326728267092672326729
800242670820000000025710810126713012121625800101080000108000050116675004923647267272672216672316702800102080000208000026708711180021109108000010800000108000008000000800006035431502091622112672806280000102670926709267282672926709
800242672720000000000002266930120122580010108000010800005011668860492364726728267081665231670780010208000020800002672856118002110910800001080000010800003980000044800350004305020221611222685106280000102670926709267292672926709
80024267222000000004500012670701212162580010108000010800005011667500492362826727267081665231670780010208000020800002672771118002110910800001080000010800003980000039800006004305020221622926890100080000102672926723267232672826729
8002426728200000000001012671221212162580010108000010800005011688430492362826728267271667231670880010208000020800002670871118002110910800001080000010800003980039008000060354305020101611222679066080000102672826728267092670926729
80024267082000000004501022669321212162580010108000010800005011688430492362826727267271667231670780010208000020800002672856118002110910800001080000010800003980035008000001354305020221622102670506480000102670926709267292672926728
8002426727200000000212000226693218121625800101080000108000050116688604923628267082672716652316707800102080000208000026728561180021109108000010800000108000008000003980035603543050202216221127091610280000102672826710268732693226709
80024267082000010000000126721218121648801401080000108000050116688614923651267322672816672316688800102080000208000026708711180021109108000010800000108000008004013800396039005020221622222672400080000102673226729267092672926709
80024267082000000104501022686621802258001010800001080000501166352149236482672226727166673167078001020800002080000267277111800211091080000108000001080000398000000800396135005020916221126705106480000102672826709267232673226732