Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRSW

Test 1: uops

Code:

  ldrsw x0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03090e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005394310045101379212121625100010001000150370394394220325210001000100039477111001100010000100043103903910396139437311611391101041000395395398395395
1004394300045001382212121625100010001000149890394394217325210001000100039477111001100010000100043103903910396139437311611391101041000395395395395395
100439430004500137921121925100010001000149890394398216325610001000100039477111001100010000100043103903910386139447311611395101041000395399395395395
100439830004500137921212025100010001000149891394394217325210001000100039477111001100010000100043103903810396138447311611391101471000399396395399399
10043943000440013832111625100010001000149890398394221325610001000100039477111001100010000100043103803910396139447311611393141071000399395395396395
100439420004400137921211625100010001000152670394394216325210001000100039477111001100010000100043103803910396138447311611395101071000399395399395395
1004394300045101379212121625100010001000149890394394217325210001000100039477111001100010000100043103903810396138437311611391101041000399400399395396
100439830004500137921121625100010001000150180394394217325610001000100039477111001100010000100043103803910396138447311611395101041000395395395399399
1004394300045001383212121625100010001000152740394394217325610001000100039477111001100010000100043103803910386139437311611397101071000395399395400395
1004398300045101379212121625100010001000150370394398221325210001000100039477111001100010000100043103803910396138437311611395101071000395395399395395

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrsw x0, [x6]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570047524100100000700356976459709254010430103100013010010000616015334147014966955700507005064646364953401003020010000602001000070050351140201100991001000030100100001100100000100000010000100261027111697983000306010000301007003870051700487005170051
4020470050525000001010700356976459709254010430103100013010010000616175334220614966955700507005064631364953401003020010000602001000070050351140201100991001000030100100000100100001100001010000110261017111697983000399610000301007003670051700367005170051
4020470050525000001010700206978159695254010430103100013010010000616005334147014966970700477003564646364953401003020010000602001000070047351140201100991001000030100100000100100000100000010000100261017111698133000396610000301007005170051700517003670051
40204700355250000010107003269781597092540100301031000130100100006160053342206149669707004770050646311264953401003020010000602001000070050351140201100991001000030100100000100100001100000010000110261017111697983000396610000301007004870048700487004870051
4020470050524000001000700356976459709254010430103100003010010000616005334220614966955700507005064646364953401003020010000602001000070050351140201100991001000030100100000100100001100000010000110261017111698133000390610000301007003670051700517005170036
4020470035524000000000700326978159709254010030103100013010010000616175334220614966970700477004764646364953401003020010000602001000070050351140201100991001000030100100001100100001100000010000000261017111698133000399610000301007005170051700367005170056
4020470035525000002010700356978159709254010030103100013010010000616032334220614966970700507005064646364938401003020010065602001000070050351140201100991001000030100100000100100000100000310000110261017111697983000396610000301007003670048700367005170056
4020470050524000001000700356978159791734011330114100013010010000616175334220614966970700507005064646364953401003020010000602001000070050351140201100991001000030100100001100100001100001010000110261017111698103000009610000301007003670048700487005170051
4020470050524000000000700346978659706254010030103100003010010000616005334220614967056700357005064646364953403123020010000602001000070050351140201100991001000030100100000100100001100001010000000261017111698133000300910000301007005170051700517005170051
4020470050524000100000700356978159706254010030103100003010010000616005334147014966970700507005064646364953401003020010000602001000070035351140201100991001000030100100001100100000100000010000010261017111698133000090910000301007004870036700517004970051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40025700575250000010100070039697755971325400103001310001300101000061706833423980496697170054700546465336497940010300201000060020100007003535114002110910100003001010000010100000110000000100001000025204715569814300031310010000300107005570055700557005270036
40024700515250000000110070039697785971325400143001010001300101000061699133422540496698070060700606467836498540010300201000060020100007005735114002110910100003001010000010100011010001011100001010025204714469814300031313010000300107005570055700557005570055
40024700515240000000610070020697435971325400143001010000300101000061701833423981496695570054700546467236497640010300201000060020100007005435114002110910100003001010000010100000110000000100001000025204714469817300031010010000300107005570055700527003670036
400247005452500001001000700396977859695254001430010100013001010000617018334239804966963700547003564653364979400103002010000600201000070051351140021109101000030010100000101000001100001001000011110252047144698043001213101010000300107005870058700427005870061
40024700415241000000110070039697785969525400143001310001300101000061699133423981496698070041700606467836498540010300201000060020100007006035114002110910100003001010000010100022110002011100001000025204716469798300000131310000300107003670055700557003670036
400247003552400000100000700206977859695254001430013100013001010000617018334147004966955700357005464653364979400103002010000600201000070035351140021109101000030010100000101000001100000001000011110252047144698233000313101310000300107005870061700427006170061
400247006052510001001000700396974359710254001830013100003001010000616991334147004966971700357005164669364979400103002010000600201000070060351140021109101000030010100000101000000100002001000010100252047164698173000310101010000300107024070239700577007570055
400247024452500010001000700366977859770100400383001310007301551000062182733422541496697470054700516465376506640010300201000060020100007005435114002110910100003001010000010100000110002031100001111025204714569820300061001310000300107006170061700427005870052
400247005452500000000100700396977859695254001430013100013001010000617018334239804966974700547005464672364979400103002010000600201000070054351140021109101000030010100000101000001100041111000011110252047144698203000610101310000300107005870066700587016970055
40024700355250000000110070039697785971625400183001310002300101000061707233426860496696170060700436465936498540010300201000060020100007006035114002110910100003001010000010100011110005224671100001010025203714569821300121313010000300107023370059700527005870140

Test 3: throughput

Count: 8

Code:

  ldrsw x0, [x6]
  ldrsw x0, [x6]
  ldrsw x0, [x6]
  ldrsw x0, [x6]
  ldrsw x0, [x6]
  ldrsw x0, [x6]
  ldrsw x0, [x6]
  ldrsw x0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267322011010100651032671731818162580100100800001008001650011677334923652267332673216736616684801162008002420080024267326411802011009910080000100800000100800181942800571159800386157019111151182162126729992800001002673426734267342673426733
802042673220010001006510326722218181625801001008000010080015500116975749236342673226714166606166858011520080024200800242673281118020110099100800001008000001008001919428005702598003861574219011151182162226730990800001002673326733267342673426733
80204267142001010010651032672221818162580100100800001008001550011591914923653267322673216660616685801162008002420080024267338111802011009910080000100800000100800192042800571159800386157019111151182162226729992800001002673326733267332673326715
802042673220010101006510126718318181525801001008000010080016500116709549236522671426732166606166848011520080024200800242673281118020110099100800001008000011008002020428005701598003861574219111151182162226729992800001002671626733267332671526734
8020426733200100010066003267353181802580100100800001008001550011677634923653267322673216660616684801162008002420080024267328111802011009910080000100800000100800192108005700218003861574219111151182162226729992800001002673326733267152673326733
80204267322001000100651032672221801525801001008000010080016500116975749236522673226732166606166858011520080216200800242673281118020110099100800001008000001008002021428005710598003861194219111151182161326729902800001002673426733267332673326733
802042673220010000002110326722218181625801001008000010080015500116975749236522673226732166606166668011520080024200800242673281118020110099100800001008000001008002120428005710598003861574219111151182162226729902800001002673326733267332673326715
802042673220010101102100226721218181925801001008000010080015500117960349236532673226732167651216902801152008002420080024267328111802011009910080000100800000100800211942800571059800396119019011151182162226730992800001002673426733267152673326715
8020426714200101011066102267172180025801001008000010080016500116956349236522673226732166606166668011520080024200800242673282118020110099100800001008000001008002020428005731598003961574219011151182162226730990800001002671526733267332671526715
802042673220010100006510326803018181625801001008000010080015500116683649236532673226732166606166848011620080024200800242673281118020110099100800001008000001008001921428005710218003860194219011151182162226729992800001002673326733267332673326733

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)dbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252673720010100670012672900120258001010800001080000501168843049236282672826727166723167078001020800002080000267088511800211091080000108000001080020194308005910021800406159431915020716017626725010480000102672826728267282672926729
8002426708200000014500226722277192580010108000010800005011677910492365626737267361668131671680010208000020800002671585118002110910800001080000010800000008003900039800396004300502017160817267331313080000102673726737267372673726716
8002426737200100002100126713212002580010108000010800005011668960492362826708267081667291682380010208000020800002670885118002110910800001080000010800000430800390003980039610000502014160817267251014480000102672826728267092672826729
8002426728200000004510126712200025800101080000108000050116688614923647267082672816672316688800102080000208000026728861180021109108000010800000108001819450800591006180000601943190502014160177267451010480000102672826709267292672926728
8002426708200000000101267132121202580010108000010800005011668861492362826728267281667231670780010208000020800002672886118002110910800001080000010800000008003900039800000139430050201716081726718010480000102670926728267092672926729
80024267282000000145001266932121216258001010800001080000501168843149236482672726727166723167088001020800002080000267278511800211091080000108000001080000000800000000800396104400502017160178267491010080000102670926728267292670926709
800242670820000000450012671321212162580010108000010800005011688431492362826727267081665231670880010208000020800002672885118002110910800001080000010800000008003900008003961394300502081601717267241010480000102670926728267092672926728
800242672720000000000126693212121625800101080000108000050116675014923647267082670816672101670780010208000020800002672885118002110910800001080000010800000430800390004380039610000502014160178267331313580000102673726737267162671626737
800242671420010000671012669321212162580010108000010800005011667501492364726727267281667231670780010208000020800002670885118002110910800001080000010800000008003900008003961043005020171601717267301010080000102672826709267092672926709
8002426727200000110001267132012025800101080000108000050116675014923648267272672716672316707800102080000208000026727851180021109108000010800001108000004308003900039800390139430050201716071326745130080000102671626737267372673826715