Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (S)

Test 1: uops

Code:

  ldr s0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e0f1e22243a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005398300451013792181216251000100010001483836439438921732521000100010003943911110011000100001000391039039103961354373116113866621000395395390395395
10043943004510137921818162510001000100015037364389389217324910001000100039438911100110001000010003910360391035613543731161139110641000395395396395390
10043943004510138221212162510001000100014989364394389217324710001000100039441011100110001000010003910390391039613543731161139110621000395395390390395
10043942114500237921818462510001000100015018366394389216324710001000100039438911100110001000010003910390391039613543731161139110621000395397395390395
100439431045102379212121625100010001000149893693943942173252100010001000394389111001100010000100039103903910356135397311611391101041000395395395395395
10043942004500237921812122510001000100014989369394391216325210001000100039438911100110001000010003910390391035613543731161139110621000395395395397395
1004389310450013792121816251000100010001483836939439421732471000100010003913911110011000100001000391035039103961353973116113866621000390390392390390
1004389300420023742181816251000100010001483836438938921732521000100010003893891110011000100001000391035035103561353973116113866641000399395397395390
10043943004510237921818162510001000100014838369394394216325210001000100039439111100110001000010003910390391039613939731161139110641000395395395395395
100438930045002379212121625100010001000149893693943942173252100010001000394389111001100010000100039103903910396135437311611391101041000395395395392395

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5020512005789900000000000120039119493109550256010340102100011000030100100001000010790355736380613631711200300120054120051113149031136715010030200100001000060200100001000012005412003511502011009910040100100001000001001000001100000001000011003211410744119661400021310121000040100120055120055120055120055120055
502041200518990000001010012003911951810944925601004010210001100003010010000100001079134575134161387251120587012005412005411314901511367150100303631000010000602001000010109120150120051115020110099100401001000010000010010000011000200123101001010003294810744119661400181310121000040100120163120052120054120148120052
5020412005489901000000000120126119692109467526010340102100031000030100100001014710790355736380613880411203650120054120054113521031136715010030200100001000060200100001000012005712005111502011009910040100100001000001001000001100000001000011003211410744119661400001313121000040100120052120055120052120052120052
50204120054899000000101001200391195181094492560103401021000110000301001000010000107908057363806136317012003001200541200541131490311367150100302001000010000614921000010000120052120051115020110099100401001000010000010010000011000000213100001000321141074411966140002101091000040100120055120055120036120055120093
502041200518990001001000012008011951010946725601034010210001100003010010000100001079035573638061363171120030012005412005411314903113671501003020010000100006020010000100001200541200511150201100991004010010000100000100100000110000200100001100321141074411966140002131091000040100120055120055120052120036120036
5020412005489900000010000120045119513109467256010340102100011000030100100001000010790355736380613631711200110120051120095113149031136715010030200100001000060200100001000012005112005111502011009910040100100001000001001000001100000001000011003211410744119658400021313121000040100120055120036120055120052120052
5020412005489900000010000120020119510109467256010340102100011000030100100001000010790355736380613446111200300120054120054113149031136585010030200100001000060200100001000012005112005111502011009910040100100001000001001000001100000001000011003211410744119658400021313121000040100120071120068120055120052120055
5020412003589900000070000120036119513109467256010340120100011000030100100001000010790085736380613631711200300120054120054113149031136685010030200100001000060200100001000012005412005111502011009910040100100001000001001000001100000001000011003211410755119661400021313121000040100120055120055120052120055120055
50204120035899000000100001200391195131094672560103401021000110000301001000010000107903557363806136317112001101200541200541131490311367150100302001000010000615061000010000120059120051115020110099100401001000010000010010000011000000301000011003211410744119661400021313121000040100120055120055120055120055120036
502041200548990011001010012003911949310944925601034010210001100003010010000100001079035573638061344610120030012005412005111314903113671501003020010000100006020010000100001200351200511150201100991004010010000100001100100000010000000100001100344241074411964640009010121000040100120055120038120055120036120055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0048

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)daddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50025120041899111000005000001200501194991094552560013400141000210000300101000010000107960257364766133764012003212005312005611317131136955001030020100001000060020100001000012004112005311500211091040010100001000001010002211000300211000011110314003107633119656400049081000040010120057120057120057120042120057
50024120041899100100002010001200411195141094552560016400141000210000300101000010000108097857376286133764012003212005612005611317631136925001030020100001000060020100001000012005612004111500221091040010100001000001010002111000200211000011111314004107433119671400049981000040010120057120057120057120057120042
50024120056899111111002000001200411195141094662560013400141000110000300101000010000107946357357506133764112002912005312005311317131136955001030020100001000060020100001000012005612005311500211091040010100001000001010002101000402011000011010314004107633119671400046981000040010120042120042120057120057120054
500241200538991110000023010001200261195141094692560016400121000210000300101000010000107957557357506133764012002912005312005611315931136955001030020100001000060020100001000012004112004111500211091040010100001000001010002201000100011000011110314004107643119671400049681000040010120042120042120057120057120057
5002412005389910101000200000120041119499109466256001640012100021000030010100001000010796025735750613376401200291200411200561131593113695500103002010000100006002010000100001200561200531150021109104001010000100001101000121100010007100001101131400417533119656400029681000040010120054120057120042120057120054
50024120053899100000002000001200411195191094692560016400141000210000300101000010000107960257363326133764012003212005612005611317431136955001030020100001000060020100001000012005612005311500211091040010100001000001010001111000100111000001110314003107523119671400049681000040010120057120042120057120057120042
50024120041899111000002000001200411195141094552560013400141000210000300101000010000107946357373246136244012003212005612005611317431136955001030020100001000060020100001000012004112005311500211091040010100001000001010001111000300211000001110314002107633119671400040081000040010120042120057120042120054120057
50024120056899100000002010001200411195161094692560016400121000110000300101000010000107946357363326133764012002912004112005611317431136805001030020100001000060020100001000012005612004111500211091040010100001000001010001201000101111000011110314002107523119784400049081000040010120041120048120048120048120048
500241200481015000010001000011200421195151094732560016400121000110000300101000010000107963857365246133968012003612004112005711315931136965001030020100001000060020100001000012004112004111500211091040010100001000001010002111000200011000011011314001010702311966640002130121000040010120055120055120055120055120055
500241200358990000010012000001200361194921094672560013400121000110000300101000010000107951757362366133662012003212005312005311317431136925001030020100651000060020100001000012005312009911500211091040010100001000001010001111000301011000011010314004107023119671400049681000040010120042120042120057120054120057

Test 3: throughput

Count: 8

Code:

  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526727200001000004501012671221212162580100100800001008001650011673031266822672726727166556166798011620080024200800242673026724118020110099100100800008000001008000000080039003980039613943011151180160126729010104800001002673126711267282672826728
802042672720000000000450101267122120162580100100800001008001450011673030267022672726707166556166598011420080024200800242670726803118020110099100100800008000001008000004308003900398003961394301115118016002672400104800001002672826728267282672826728
8020426707200000000000010126712212121625801001008000010080014500116730312670226727267271665561667980115200800242008002426733267341180201100991001008000080000010080000043080039003980000613943011151180160026724010100800001002672826728267282672826708
802042672720800000000450001267122012162580100100800001008001550011673031267022672726727166556166798011520080024200800242673726809118020110099100100800008000001008000004308003900428003901394301115118016002672400104800001002672826728267282672826728
8020426727200000000004500002671201212162580100100800001008001550011673031267022670726727166356166598011520080024200800242671926742118020110099100100800008000001008000004308003900080039603943011151180160026704010100800001002672826708267282672826728
8020426727200000000005401002671221212678265814061008052010681977532122219802805528208282221774479179598208220082332202819592806827917111802011009910010080000800000100813020433158133902569980819613943410052673733127681110144800001002766227627277782761727778
802042762020001000067972352101277352121254119681012110809101088124650012006351276452774927816169455117391813482028134320081350277592777281802011009910010080000800000100807822431908095110488780949613943400052681243127533010104800001002777627738277382718427772
8020427625208000000779696160002776221212531195808821008104010381246522120353912683527768277791740651174198135020081348200813482787127767718020110099100100800008000001008026004319080950104828809496139020005178165112769111004800001002776627764277752791027775
8020427776208010100671897040012690621212540196810121028026010081246500120537202783228077280501749061176248152420081151202817312806528031101802011009910010080000800000100802622432898120912637781170613943200053131161126724010104800001002670826728267282672826728
80204267271990000000045000126692212120258010010080000100800005001165789026702267272672716650316685801002008000020080000267352673811802011009910010080000800001100800000008000000398003901043000051101161126724010104800001002672826728267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526732200111111650002669301818122580010108000010800005011667501266972672226722166673167078001020800002080000268072671211800211091010800008000011080000080035008003561353950200616432670566280000102672926723267232672326723
80024267222000000004110226707218012258001010800001080000501166886126683267082670816672316707800102080000208000026724267141180021109101080000800001108000039800000358003561353950201416442670506080000102670926723267232672326728
800242670820000000041101267071181802580010108000010800005011676051266972672226708166523167028001020800002080000267282672811800211091010800008000001080000398000010358003561353950201416342671966280000102672326723267092672326723
8002426727200000000010026712201812258001010800001080000501166993126697267222672216667316707800102080000208000026722267221180021109101080000800000108000039800350358003961353950201416432671900080000102672326723267262672326710
800242672220000000000012671300022580010108000010800005011676051266832672226722166673166888001020800002080000267282701111800211091010800008000001080000398003500800356035050204416442671966280000102672326723267232672326709
8002426722200000000010226712218012258001010800001080000501166750126683267222670816667316688800102080000208000026852267191180021109101080000800000108000039800351358000061039502044164426719106080000102670926723267232672326723
80024267222000000004100126693201812258001010800001080000501167605126683267222672816667316702800102080000208000026869267341180021109101080000800000108000008003510800356004350204416432671960280000102672326723267092670926709
8002426708200000000000126712201802580010108000010800005011676051267022672226708166523167028001020800002080000268132673111800211091010800008000001080000080035035800350100502044164426719010480000102670926709267232670926709
80024267222000000004510126707018189258001010800001080000501167605126697267222672216672316707800102080000208000026730268491180021109101080000800000108000039800000358003560354350204316342670566080000102672326723267232672326709
8002426722200000000120012670721218162580010108000010800005011676051266972672226708166523166888001020800002080000267362717511800211091010800008000001080000080035008003500353950204416342670566280000102670926709267232672326709