Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRSB (32-bit)

Test 1: uops

Code:

  ldrsb w0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e223a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10054033111116712388377192510001000100015509040340320432621000100010004038611100101000100011020204310590016010406161431917331633400131351000404404404404403
10044033111006902388377192510001000100015531140340322532611000100010004138511100101000100011020214310590005910406159431917331633400131351000404407404403404
10044033111006603388377202510001000100015555140240322532611000100010004038511100101000100001020204310590006110396159431907331633400131351000404404404403404
10044033100006703388377192510001000100015526140240322532611000100010004048511100101000100001019194310581006110406159451907331633400131351000404404403403404
10044033111116703388377212510001000100015526140240322532611000100010004038511100101000100011019194310581015910406159431917331633400131351000404404404403404
10044033100006603388377182510001000100015555140240322532611000100010004038511100101000100001021204310590006110396159431917331633399131351000403404404404404
10044023111006612387277192510001000100015526140340322532611000100010004038611100101000100001019194310591026010406159431917331633400131351000404403403404404
10044033110006903388377202510001000100015548140340322532611000100010004038511100101000100001019194310580026110406158431907331633400131351000404404404403403
10044033101006703387377202510001000100015480140340222532611000100010004038511100101000100001020214310590026010406159431927331633400131351000404404404404404
10044033101006703388277202510001000100015480140340322532611000100010004038511100101000100011020194310601016110406158431917331633400131351000404404404404403

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrsb w0, [x6]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0121

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057006052511112000200007002669787597012540104301061000230100100006160593342638149669767005670056646373649444010030399100006020010000700413511402011009910010000301001000011001000220100020014010000110110261047111698163000660910000301007009570042700577004270042
402047012452510110000201007003869702597152540108301061000130100100006160413342830149669767005670053646373649564010030200100006020010000700563511402011009910010000301001000001001000221100060013110000110100261017111698043000606910000301007005770057700427005770057
4020470141524111000001001070026697025971525401083010310002301001000061608633425901496696170053700566463736495940100302001000060200100007005335114020110099100100003010010000010010001111000200313310000111110261017111698193000609910000301007005770042700577004270057
40204701515251101000010011700416970259701254010830106100013010010000616077334254214966973700567005664652364944401003020010000602001000070056351140201100991001000030100100000100100011110001011410000011100261017111698043000396010000301007005770057700577004270042
4020470146525110110002001070041697875971525401043010610002301001000061605033430221496697670056700566463736495940100302001000060200100007004135114020110099100100003010010000010010001211000300113010000011100261017111698193000690010000301007004270054700547004270057
4020470126525101010001000070041697025971525401083010610002301001000061604133424941496697670056700416463736495640100302001000060200100007005635114020110099100100003010010000110010001111000300217210000110111261017111698193000600910000301007004270042700577005770050
4020470142525110000002000070041697875971825401083010310002301001000061606833425900496697370053700536464936495940100302001000060200100007005335114020110099100100003010010000110010001111000100125010000111100261017111698163000609010000301007004270042700577005770057
4020470094524101100002000070041697025971525401083010610002301001000061605033426860496392770056700416465236495940100302001000060200100007005635114020110099100100003010010000010010002211000101023210000111110261017111698043000699910000301007005770057700577004270042
4020470126525101000002001070041697025971525401083010610002301001000061605933424460496697670053700536465236495940100302001000060200100007005635114020110099100100003010010000110010003101000100123810000111110261017111698193000390610000301007005470054700547005470054
4020470058525111000001001070041697875970125401083010310002301001000061608633428301496696170056700566465236495640100302001000060200100007005635114020110099100100003010010000010010002311000111022610000111120261017111698043000690610000301007005470057700547005770042

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0053

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f434d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257004752400100100001007003216974359695254001430013100133001010000616952334147014966967700477003564653364960400103002010000600201000070047351140021109101000030010100000101000001100140001000010100252127123698103000366610000300107004870048700487003670048
400247003552500000000100007003206972859706254001430013100013001010000616952334206214966967700477003564665364972400103002010000600201000070035351140021109101000030010100001101000321100020141000011011252127122698163000606610000300107006170054700427006470054
4002470053525111110002010170038069702597122540018300161000230010100006169953342350149669737005670418646713649784001030020100006002010000700533511400211091010000300101000001010001111000110161000011110254527122698163000660010000300107005770148701387006170054
400247005352510110000201007003806970259712254001430013100023001010000617009334235014966973700477023564676364966400103002010000600201000070053351140021109101000030010100000101000321100022111000011011252127123698163000666010000300107004270054700547005470372
400247005352510100000101017003806977759701254001830016100023058610000617009334235014966961700537004164807364978400103002010000600201000070041351140021109101000030010100001101000331100020141000011110252127122698163000366610000300107004270042700577004270054
400247004152511110000201007003806977959715254001430016100023001010000617009334235014966973700537005364671364977400103002010000600201000070053351140021109101000030010100000101000131100030211000011012252127122698043000606610000300107004270054700547005470054
400247005352411020000492641017003806977759712254001830016100013001010000617009334235014966961704237006164659364978400103002010000600201000070053351140021109101000030010100000101000131100032111000011111252137122698163000366010000300107005470054700547005770417
400247004152511100000200017004106977759712254001830016100023001010000616995334235014966973700537005364659364978400103002010000600201000070041351140021109101000030010100001101000221100010011000011110252127122698043000666610000300107005470042700547004270054
400247005352511010000101017002606977759701254001830016100023001010000617009334235014966978700537005364671364978400103002010000600201000070041351140021109101000030010100001101000210100030211000611110252127122698163000366610000300107005470054700547005470054
400247005352411101000201017003806977759712254001830016100023001010000617009334176914966973700537005364671364978400103002010000600201000070053351140021109101000030010100001101000211100030211000011110252127122698163000360610000300107005470054700547005470054

Test 3: throughput

Count: 8

Code:

  ldrsb w0, [x6]
  ldrsb w0, [x6]
  ldrsb w0, [x6]
  ldrsb w0, [x6]
  ldrsb w0, [x6]
  ldrsb w0, [x6]
  ldrsb w0, [x6]
  ldrsb w0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f191e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802052673720010100067002268952771925801001008000010080013500116659049236562673626736166636166888011220080024200800242673685118020110099010080000100800000100800211943080059012618004061584319211151182162226733005800001002673726737267372673726715
80204267142001010006900126865377192580100100800001008001350011665904923656267362671416663616689801142008002420080024267366411802011009901008000010080000010080019204308005910021800396119431911115118216222673513130800001002673726715267372673726737
8020426877200110000670032686800720258010010080000100800175001167215492363426736267361666461666680117200800242008002426736851180201100990100800001008000001008002019430800580016080040005843192111511821622267340130800001002671526738267372673726737
8020426736200100000670022685437720258010010080000100800165001169949492365726736267141666361666680115200800242008002426736861180201100990100800001008000011008001919430800590016180040011943191111511821632267330135800001002673826715267152673726737
802042673720011111024102269132700258010010080000100800155001167628492365626714267361664261668880116200800242008002426737851180201100990100800001008000001008002119430800580006180040605843191111511821622267341305800001002671526738267372673726737
8020426736200100000210032689730720258010010080000100800155001167761492363426736267361664261668880115200800242008002426736641180201100990100800001008000001008001919008005911021800396158431901115118216222673413135800001002671526737267382673726737
8020426715200111000671022686830020258010010080000100800155001166501492365626737267141666461668880116200800242008002426714641180201100990100800001008000011008001919430800590006180040015943191111511821622267111305800001002671526737267152673726720
8020426736200101000671002685139702580100100800001008001650011676284923634267362673616664616666801162008002420080024267368511802011009901008000010080000110080019194308001910023800390159431901115118216222671113130800001002673726715267372673726715
8020426714200101000690032672237020258010010080000100800165001167584492365726736267381666461666680115200800242008002426714861180201100990100800001008000011008002019430800580002180040615943191111511821622267331300800001002673726715267382671526737
8020426895200110000211022672127712580100100800001008001350011665904923656267142673716663616688801152008002420080024267148511802011009901008000010080000010080019194308001900061800416159019011151182162226733000800001002673726737267372671526715

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)d9ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252672720000441012671221121625800101080000108000050116688614923647267312673116676316711800102080000208000026731771180021109108000010800000108000008003803880038613944150207160101226731010080000102672926729267322670926709
80024267272000044101267122112025800101080000108000050116612514923631267392673516680316691800102080000208000026742771180021109108000010800000108000043800001388003861394305020101609926728100780000102673226750267292672926709
800242673120100000126712210025800101080000108000050116712414923648267312670816676316711800102080000208000026727561180021109108000010800000108000043800390388003860384405020716468267261010780000102672726728267322672926732
8002426727200004410126712311219258001010800001080000501167501049236482670826731166523167118001020800002080000267277711800211091080000108000001080000438003914180038613944050201116091026728100780000102672826732267092673226717
800242673120010441012671221212162580010108000010800005011671241492365126731267311667231670880010208000020800002673177118002110910800001080000110801324380039108003801384405020916010122670500780000102673326754267522672826709
80024267312000001012671221002580010108000010800005011667500492364726731267311667631671180010208000020800002672856118002110910800001080000010800004380039041800386139005020816099267281014080000102670926732267322673226732
8002426731200004400126713012119258001010800001080000501166750049236512673226708166763166888001020800002080000267277711800211091080000108000011080000438003800800006138430502091626926728010080000102673226709267092670926732
80024267272000044101267122111925800101080000108000050116675014923628267312673116676316707800102080000208000026731561180021109108000010800001108000043800390080000013844050209160982670500780000102673226709267322673226732
800242673120000440002671201212192580010108000010800005011667500492365126731267311665231670780010208000020800002670877118002110910800001080000010800004380038008003861044050209160117267051010480000102672926728267092672826732
80024267312000044101267123121192580010108000010800005011667500492365126731267081667631668880010208000020800002673177118002110910800001080000110800004380039038800380138440502010160101426725010480000102677326761267092673226709