Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRH (register)

Test 1: uops

Code:

  ldrh w0, [x6, x7]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100539931010165103367000152510001000100014540399398222325710001000200042681111001100010000101920010571002110006157019173216223799901000400382400399400
1004399311100651033842181816251000100010001529139839922132571000100020003998211100110001000010191942105700159103861584219173216223959921000400400399400400
1004398311100651033841181815251000100010001533439839922132571000100020003988111100110001000010201942105710259103861574019173216223969921000400400403400400
1004399311100651033842181816251000100010001537539939822232571000100020003998111100110001000010192142105610159103861574219273216223969921000400400400399400
1004399311100651033842181815251000100010001533439939922132571000100020003998111100110001000010192142105710159103761574219173216223959921000399400400400400
1004399311100641033832181816251000100010001536239939922232561000100020004008111100110001000010202042105610159103861574219173216223969921000400400400399400
1004399311100651033842181815251000100010001528239939922132571000100020003988111100110001000010201942105810259103861574219273216223969921000402400400399400
1004399311100651033832181815251000100010001537539939922132561000100020003998111100110001000010202042105710259103861574219273116223969921000400400400399399
10043983111006510338321818172510001000100015300399398222325710001000200038971111001100010000100003910350003510356135390073216223886621000390390390390390
10043913000004110137421818122510001000100014838389389212324710001000200038976111001100010000100003910350003510356135390073216223866621000390390390390390

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrh w0, [x6, x7]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0054

retire uop (01)cycle (02)03mmu table walk data (08)090e0f1e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057012852500001010700396978259713254010430103100013025210000616230334249404966974700517003564647364955401003020010000602002000070051351140201100991001000030100100001100100000100000234100001126102711169814300001010010000301007005570036700367005270055
4020470038525000010107002069764597132540100301031000130100100006160503342590149669557005470054646473649544010030200100006020020000700543511402011009910010000301001000001001000011000002431000011261017111699053000313101310000301007005570052700367005570055
402047003552500001000701906976459713254010430103100023010010000616023334263814966971700517003564647364954401003020010000602002000070054351140201100991001000030100100000100100001100003132100001126101711169817300000101310000301007003670052700557003670055
40204700885250010101070036697825971025401043010310001301001000062506333426380496697470054700546465036495740100302001000060200200007005435114020110099100100003010010000010010000110000099100001126101711169798300031301310000301007003670055700557005570055
4020470118525000060107003969785596952540104301031000130100100006160233342494149669717003570051646503649574010030200100006020020000700543511402011009910010000301001000001001000011000002581000011261017111698173000013131010000301007003670055700367005270036
40204700795250010100070039697645969525401043010310001301001000061602333423980496695570054700356463136495440100302001000060200200007005135114020110099100100003010010000010010000110000024910000112610171116979830009013010000301007005570036700557005570036
402047013952500001000700206978559695254010430103100013010010000616050334171014966955700547003564631364954401003020010000605962000070035351140201100991001000030100100000100100001100000225100001126101711169817300031310010000301007005270052700557005270052
4020470166525000020007002069785597142540104301031000130100100006162393343342049669557005470051646313649384010030200100006020020000700513511402011009910010000301001000001001000011000000100000126101711169798300001301310000301007003670036700557003670055
40204701425250000200070020697645971025401043010310000301001000062537233424461496697970054700546463136495440100302001000060200200007003535114020110099100100003010010000010010000110000018610000012610171116981730003001310000301007005570036700367005570055
402047013652500001010800700396978559713254010430100100003010010000616193334254214966971700547005464650364954401003020010000602002000070054351140201100991001000030100100000100100000100001231100001126101711169817300030131010000301007003670036700527005570056

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0051

retire uop (01)cycle (02)030e0f191e22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570057524000141070036697755971025400143001310001300101000061699133425420496697470051701026473036496640010300201000060020200007005435114002110910100003001010000010100000010000001000010000252017111698143000310101010000300107015370156700527005270036
400247005152500021070036697755971025400103001310001300101000061701833422540496697170162700656467336497640010300201000060020200007005435114002110910100003001010000010100000110000001000010100252017111698143000313101310000300107003670052700367005270052
400247005452500011070020697755971025400103001310001300101000061699133422541496698070163700796467736497640010300201000060020200007005135114002110910100003001010000010100000010000001000010100252017111698143000313101310000300107003670036700367005570052
400257005152500010070039697435971325400103001310001300101000061699133422540496697170151701296467336497940010300201000060020200007005435114002110910100003001010000010100000110000001000010100252017111698143000310101010000300107005270055700527005270052
40024700515250001107003669775597102540014300131000130010100006170183342398049669747014270089646783649764001030020100006002020000700513511400211091010000300101000001010000011000000100001010025201711169798300031010010000300107005270052700527005270052
400247003552400000070039697755969525400143001310001300101000061699133414700986696170119700586465936498240010300201000060020200007005735114002110910100003001010000010100021110000011000011100252017111698213000310101010000300107005270036700527005470148
40024700515240001107003669743597102540010300131000030010100006169913342254049669747014270085646793649764001030020100006002020000700543511400211091010000300101000001010000011000001100001011025201712169822300061013010000300107005570052700557005270052
40024700355250001107003669775596952540014300131000130010100576170903342686149669557016770108646783649764001030020100006002020000700543511400211091010000300101000001010000011000000100001010025201711169819300031010010000300107005270052700527005270052
4002470051525010110700366977859695254001030013100013001010000616991334225404966971701037006264674364976400103002010000600202000070051351140021109101000030010100000101000001100000010000101002520171116981730003130010000300107005570055700557005570052
400247005152400010070036697755971025400143001310001300101000061701833423981496697470133700936467736496040010300201000060020200007005435114002110910100003001010000010100000110000001000010100252017112697983000310101010000300107005570055700527003670052

Test 3: Latency 1->3 (with chain penalty)

Chain cycles: 3

Code:

  ldrh w0, [x6, x7]
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40205700475250001010007003569781597062540104301031000230100100006160593342542049669737005670056646493649534010030200100006020020000700503511402011009910010000301001000011001000220100010011000011010261017111698193000696610000301007005770057700427005770057
40204700565241010010007004169702597152540108301061000230100100006160593341769049669767005670041646523649594010030200100006020020000700563511402011009910010000301001000011001000121100021111000001110261017111698193000399910000301007005770042700577005770057
40204700535251010010007004169702597152540108301031000230100100006160593342494049669737005670041646523649444010030200100006020020000700563511402011009910010000301001000001001000210100020011000001011261017111698043000696910000301007005470042700577005770057
402047005652510100210070026697875971225401083010610002301001000061605933417690496697670056700566464936495940100302001000060200200007005635114020110099100100003010010000010010002211000201101000011110261017111698193000390610000301007005770042700547005770057
40204700415251100020017004169702597152540108301061000230100100006160593342494049669737005670041646523649594010030200100006020020000700563511402011009910010000301001000011001000211100020111000011111261017111698043000309910000301007004270042700577005770057
40204700415251100020007002669702597152540108301031000330100100006160593342494149669767005670056646523649594010030200100006020020000700413511402011009910010000301001000001001000111100010111000001110261017111698163000699910000301007005770057700577005770057
40204700415241110011007004169792597152540104301061000130100100006160783341769149669737004170056646373649444010030200100006020020000700563511402011009910010000301001000001001000330100020211000011111261017111698193000399910000301007005770057700547005470057
40204700565241110020007002669784597152540108301061000230100100006160593342494049669767005670041646523649594010030200100006020020000700533511402011009910010000301001000011001000321100020111000011111261017111698163000306610000301007004270057700427005770057
40204700565251100020007004169787597122540108301061000230100100006160593342350049669767005370053646493649444010030200100006020020000700563511402011009910010000301001000011001000121100010011000011111261017111698193000600610000301007005470042700427005770058
402047004152410001141007007869787597012540108301061000230100100006160783345362049669767004170056646523649594010030200100006020020000700563511402011009910010000301001000001001000121100010211000011111261017111698043000600010000301007005770057700577005770042

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0056

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f223a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257005352511100001001700386978059701254001830016100023001010000617009334235004966973700627005664674736497840010300201000060020200007005335114002110910100003001010000010100023010003021100001111000252017111698193000666610000300107007970042700577005470054
400247005352511100001010700386978059715254001830016100023001010000617036334249404966973700567006164659036496640010300201000060020200007005335114002110910100003001010000110100023010003021100000111300252017111698193000696010000300107017370054700427005570057
400247004152410100002011700266970259715254001830016100023001010000616995334176904966973700537009464674036505840010300201000060020200007005335114002110910100003001010000010100023110002001100001111200252017111698193000696910000300107005770057700577004270057
400247005652410100002010700416977759701254001430016100023001010000617036334249404966973700797007664674036498140010300201000060020200007005635114002110910100003001010000010100011010001007100001111100252017111698193000696910000300107004270057700577005770057
400247005652510000002010700416978059701254001830013100023001010000617009334249404966973700567005964659036498140010300201000060020200007005335114002110910100003001010000010100013110001011100001101000252017111698163000306610000300107005770054700427004270042
400247005652411000001011700416977759715254001430016100023001010000617036334249404966973700537005564674036498140010300201000060020200007004135114002110910100003001010000010100021110002001100001111101252017111698163000666610000300107004270042700427005470042
400247006352510100001010700416977759715254001430016100023001010000617009334249404966973700427006364659036496640010300201000060020200007005335114002110910100003001010000010100011110001011100001111000252017111698163000366610000300107004270042700427004270054
400247004152411011002010700266977759712254001430016100023001010000617009334235004963959700577005364674036496640010300201000060020200007005335114002110910100003001010000010100011110001011100001111100252017111698193000606610000300107005470057700547004270054
400247004152410000002000700386978059712254001830016100023001010000617036334176904966973700557005764659036497840010300201000060020200007005335114002110910100003001010000010100012110001004100001111100252017112698043000666010000300107005770042700427004270054
400247004152611010002001700386978059712254001830013100023001010000616995334176904967070700557005364674036497840010300201000060020200007005335114002110910100003001010000010100011110002021100000111200252017111698193000699910000300107005770042700577005770042

Test 4: throughput

Count: 8

Code:

  ldrh w0, [x6, x7]
  ldrh w0, [x6, x7]
  ldrh w0, [x6, x7]
  ldrh w0, [x6, x7]
  ldrh w0, [x6, x7]
  ldrh w0, [x6, x7]
  ldrh w0, [x6, x7]
  ldrh w0, [x6, x7]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526736201111100000210026699377402580100100800001008001650011678254923656267372671416668616671801162008002420016004826736641180201100991100800001008000011008002020430800580006480040615843192111511811611267341300800001002673926737267232673726738
802042671420011111000067032672137712580100100800001008001350011676284923656267362671416664616666801152008002420016004826736851180201100990100800001008000001008002020430800591122180040611901911115118116112673310135800001002673926737267402674626738
802042673720011011000021022672227002580100100800001008001650011673774923657267362673616664616688801162008002420016004826736851180201100990100800001008000011008001920430800580006080040015843191111511811611267331300800001002675026737267422673726737
802042673620011011000066122669927720258010010080000100800155001169949492365626714267141664261668880115200800242001600482673785118020110099010080000100800000100800192000800190006080000615943191111511821611267331300800001002672426737267432671526737
802042673620011011000067032669937020258010010080000100800165001166590492363426736267361664261666680115200800242001600482673685118020110099010080000100800000100800191943080059000618003961580191111511811611267111305800001002674826737267462673726737
80204267142001101100006600267210776825801001008000010080015500116975549236572671426736166646166888011620080024200160048267366511802011009911008000010080000010080020194308005800061800406159431911115118116112671113135800001002673826741267452672026716
802042671420111111000066022672137720258010010080000100800165001167377492365726736267361666461668880115200800242001600482673685118020110099010080000100800000100800201943080019001648000001590190111511811611267331305800001002673726738267382671526737
80204267362001101000008802266992702025801001008000010080016500116737749236572671426736166646166928011220080024200160048267368611802011009901008000010080000010080021204308006000121800000159431901115118116112671113135800001002674526742267232671526715
80204267142001101000007013267210771258010010080000100800155001181321492366026714267531691561668880117200800242001600482671485118020110099010080000100800001100800202000800581326180039606043190111511811611267341305800001002675626742271692672226737
8020426736200111100000220132669937002580100100800001008001550011676284923656267142671416642616691801162008002420016004826714851180201100990100800001008000011008002019430800591006180040615843191111511811611267330135800001002672326737267482673826737

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f181e1f22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526736202111010006601032670030712580010108000010801955011668331492364726727267271667231670880010208000020160000267287711800211090108000010800001108000004308003900039800380100000150201216111126725010780000102672926729267322670926728
8002426728200000000004500012671321120258001010800001080000501173870149236472672826708166723167088001020800002016000026708771180021109010800001080000010800000430800000003980039615845190005020161614142671100580000102673726737267382671626737
80024267372001100100067010226699277125800101080000108000050117266514923648267272672716672316707800102080000201600002673177118002110901080000108000001080000000800000003880000613943000050201216101526724140080000102672826732267292672926728
800242672820000000000000012671621116258001010800001080000501172435149236572673626715166603167168001020800002016000026715851180021109010800001080000010800191943080058100218005461194319100502014161517267331313080000102673726715267372671626715
80024267372000000000000001267162112192580010108000010800005011711211492365126731267311667631671180010208000020160000267085611800211090108000010800000108000004308003800008004060594319000502017161716267331313080000102671626737267372673826737
80024267362011000000067000126721300192580010108000010800005011716151492365126731267081667631671180010208000020160000267087711800211090108000010800000108000004308003900039800406119019000502017161411267331313580000102673826716267372673826716
80024267362001100000067000326721270192580010108000010800005011717131492362826731267311667631668880010208000020160000267317711800211090108000010800000108000004308003800039800386139440000502014161310267051414080000102670926732267292670926709
8002426731200110000007000022672127719258001010800001080000501171266149236572673626739166813166948001020800002016000026736861180021109010800001080000010800192100800592006180038603900000502018161515267281410480000102673226732267322670926732
800242670820100000000104000126716211192580010108000010800005011713651492362826731267311665231671180010208000020160000267317711800211090108000010800000108000000080038000388004060594319000502017161216267331313580000102673826737267372673826716
80024267362001100011067010226722377182580010108000010800005011736691492365126708267311667631671180010208000020160000267317711800211090108000010800001108000004308000000054800386039430000502014161317267241410780000102673226732267322673226729