Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRSH (64-bit)

Test 1: uops

Code:

  ldrsh x0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e0f1e22243a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005403211000037901212162510001000100015018139739421732521000100010003947711100110001000010000103900103961007311611391101001000375395395395395
100439420045101359012016251000100010001406013943941973252100010001000374771110011000100011000010000010396100731161139510041000395401375375399
10043983004400037920102510001000100015208137937419832321000100010003747711100110001000010004310390381039600447311611371141401000399399399375399
100437420044101383212119251000100010001501813983981973256100010001000398561110011000100001000431038041103861384473116113950041000399395395375399
100439830044100383211025100010001000140601398374197323210001000100039477111001100010000100043103803810396139437311611391141001000395375399375375
100439830044001359200025100010001000152741374398221325610001000100037456111001100010000100043103803810386039447311611391141401000395399399399399
100439431144001383201219251000100010001527413743982213256100010001000398771110011000100001000441038038103801394373116113950071000399375375399375
100437430001003832112025100010001000152671398398221325610001000100039477111001100010000100043100003810006138447311611395141001000399399399375399
1004374310000137921121925100010001000140601398395197323210001000100039456111001100010000100001039138100000390731161139510001000399399399395375
1004398301000138320002510001000100014060137439419732321000100010003987711100110001000110000103803810386139447311611371141471000375399399399399

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrsh x0, [x6]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)030e0f1e22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057004752401100700356978159709254010430103100013010010000616175334220614966967700507003564646364938401003020010000602001000070050351140201100991001000030100100000100100001100000010000101000261027111697983000366010000301007005170051700517005170051
402047005052400100700326973559695254010030100100003024510000618383334277014966970700357005064643364938401003020010000602001000070050351140201100991001000030100100000100100001100000010000000000261017111698133000399910000301007005170051700367005170051
402047003552400100700356978159706254010430103100003010010000616015334206214966970700507005064646364953401003020010218602001000070037351140201100991001000030100100000100100001100000010003101000261017111698103000000010000301007005170036700517005170036
402047003552400110700356973559709254010430100100013010010000616005334206214966970700507005064631364954401003020010000602001000070047351140201100991001000030100100000100100001100000010000101000261017111698133000360910000301007003670051700367005170051
402047003552510100700356973559695254010430100100013010010000616015334147014966970700507003564631364950401003020010000602001000070047351140201100991001000030100100000100100001100000010000011000261017111698133000090010000301007005170051700487005170051
402047003552500110700266976459695254010030103100013010010000616175334220614963906700507005064646364938401003020010000602001000070050351140201100991001000030100100000100100000100000010000000000261017111698103000369010000301007004870051700367005170051
4020470050524006110700356978159695254010430100100013010010000616005334220614966955700357005064631364938401003020010000602001000070035351140201100991001000030100100000100100001100000010000101000261017111697983000309910000301007003670051700367003670036
402047003552400010700206976459709254010430100100013010010000616175334220614966970700357003564646364953401003020010216602001000070047351140201100991001000030100100000100100031100000233610000101000261017111698133000399910000301007004870051700367009670036
402047004752400110700206976459709254010430114100013010010000616041334220614966970700507005064643364938401003020010000602001000070050351140201100991001000030100100000100100000100000310000101000261017111698133000309610000301007005670056700517003670051
402047005052500000700206978659709254010830100100013010010000616175334220614966970700507005064631364953401003020010000602001000070050351140201100991001000030100100001100100001100000310000000000261017111697983000360910000301007005170036700517004870051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f181e2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd2d5map dispatch bubble (d6)d9daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570047524001001010000700356976059834254001830016100023001010000616982334147049669557003570035646683649784001030020100006002010000700413511400211091010000300101000001010000011000000010000111112520037100336981430003061810000300107005170036700367008270084
4002470035525000000011000700356974359796254001430013100003001010000616982334220649669707005070050646682064960400103002010000600201000070050351140021109101000030010100000101000001100020011000011111252005710042698163000699010000300107004270057700627005770055
400247005652510110001000070041697025979625400143001310000300101000061698233414704966970700507005064653364975400103002010000600201000070035351140021109101000030010100001101000001100000001000010100252006710063697983000396010000300107005270050700517039870048
4002470050525000000010000702116972859709254001030049100013001010000617086334220649669557004770047646533649604001030020100006002010000700473531400211091010000300101000001010000011000201110000111122520031510023698193000399910000300107004270042700577013070036
40024700355240000000161000700206976059794254001430010100003001010000617068334220649669677005070035646683649754001030020100006002010000700503511400211091010000300101000001010000011000067031000310100252003710033698443000096910000300107003670051700517005470061
4002470056525100100013000070038697775970625400143001010001300101000061697033422064966955700507005064653364980400103002010000600201000070051351140021109101000030010100000101000001100001022301000310100252003710065698133001290910000300107015170036702407012770036
4002470050525000000012000070032697085970625400143001310001300101000061697933420624966970700357003564669364960400103002010000600201000070035351140021109101000030010100000101000001100003031000010100252003710056698013000096910000300107003670054700517014470054
40024700535251001000557100170043697055974825400143001310001300101000061698233414704966955700507004764668364960400103002010000600201000070047351140021109101000030010100000101000001100000001000010000252005710065698133000390610000300107003670060700577007370046
4002470056525101000021000700266970259792122400143001010001300101000061698233422064966955700507005064668364975400103002010000600201005670050351140021109101000030010100000101000001100000001000000100252006710037697983000966910000300107005170036700517004870036
400247005052500001101100070032697605978225400183001610001300101000061699533424944966976700567005664674364981400103002010000600201000070041351140021109101000030010100000101000111100020111000001011252007710066698193000699910000300107005770057700577005770042

Test 3: throughput

Count: 8

Code:

  ldrsh x0, [x6]
  ldrsh x0, [x6]
  ldrsh x0, [x6]
  ldrsh x0, [x6]
  ldrsh x0, [x6]
  ldrsh x0, [x6]
  ldrsh x0, [x6]
  ldrsh x0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03090e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
802052672820000000001002671201212162580100100800001008001550011673031492364726727267271663561667980116200800242008002426727771180201100991008000010080000010080000043080039003980039613943011151181161126729010104800001002672826708267282672826728
8020426727200011006900012671201212025801001008000010080015500117711614923647267272672716655616679801152008002420080024267277711802011009910080000100800000100800000008003900398003900394301115118116112672500104800001002672826728267282670826728
802042670720000000450000267122121216258010010080000100800155001167303149236472672726727166556166798011420080024200800242672777118020110099100800001008000001008000004308003900080000613943011151181161126739010104800001002673126728267282672826728
8020426727200000004500012671220016258010010080000100800155001167303149236472672726727166556166798011520080024200800242670759118020110099100800001008000001008000004308003900428003961390011151181161126730010104800001002672826708267282670826708
802042672720000000450101267242121202580100100800001008001650011659521492422026738267271665561668180115200800242008002426729771180201100991008000010080000010080000043080039003980000614043011151181161126724010100800001002675226744267302672826730
802042673220000000510101267122121216358010010080000100800145001167560149236272672726727166666167218011520080024200800242673977118020110099100800001008000001008000004308003900398003961394301115141116112672401004800001002672826821267282672826708
8020426727200000004501012671220121625801001008000010080014500116730314923647267272681716673616679801152008002420080024267897711802011009910080000100800000100800000008003900398003960390011151181161126719010100800001002670826728267372670826711
8020426727200000004500012669220016258010010080000100800155001167303149236272685927026166556168938011520080024200800242672756118020110099100800001008000001008000004308003910080039613943011151181161126732010104800001002673127631274602762626887
80204267312000005570844010127662000300167808821048078010181268522120383714924674276282773417356581738381190204813752088118527759568180201100991008000010080000010081042401918095110478280819613943011151421161126710010104800001002672826708267282672827727
80204275802040100032700012746001212161738088410280780104810825001199626149246492762027737173315117384813692068137420481185277517781802011009910080000100800001100807822432398076012556880949013943400052672642227535210100800001002762527637270312717827733

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)d9daddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025267362001101000067010326721279192580010108000010800005011739160492363426736267151668231668880010208000020800002671485118002110901080000108000001080020194308001810261800406159019005020516003626733130580000102673726737267162671526737
800242673620010100000670102267213701925800101080000108000050116678004923656267362673716681316716800102080000208000026715851180021109010800001080000110800192043080058002648004061594519105020516004526734013580000102673826738267152673726738
8002426714201100000002101032672137002580010108000010800005011683940492365726736267361668131669480010208000020800002671486118002110911080000108000001080020204308005810121800396058451910502051600352673400080000102671526737267382671626737
80024267162011111000021000226721077202580010108000010801985011737900492363526736267361666031669580010208000020800002671485118002110901080000108000001080020190080059200218004060190191050206160053267331313580000102673826715267152673726738
80024267382001101000021000026700077202580010108000010800005011682210492363426715267371668131669480010208000020800002673685118002110901080000108000001080149190080059101608004060194319005020516005326733013080000102671626716267372673826737
80024267362001010000066000126721077425800101080000108000050117688004923657267362673716681316716800102080000208000026739851180021109010800001080000010800191944080058001618003960194319005020516005326733013580000102673726715267372673726738
80024267192001110000021010326699007192580010108000010800005011717471492363526736267361677531669480010208000020800002673685118002110911080000108000001080019200080059002618004061594319105020316005326712013580000102673726737267162673826737
80024268132001001000021000326721307125800101080000108000050116983904923702267362673616681316723800102080000208000026881851180021109010800001080000010801522100800192532668004061590191050203160035267341313580000102672426737267372671626738
80024267152001010000021000326721377222580010108000010800005011671490492363426715267371668131669480010208000020800002673764118002110901080000108000001080020194308001910261800406119019105020316005326733013080000102671626716267372673826738
800242674720011001000210102267213001925800101080000108000050114627504923656267172671416681316695800102080000208000026737641180021109010800001080000110800192043080060002218004001580190050204160045267121313580000102673826716267382673826737