Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (S)

Test 1: uops

Code:

  ldr s0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030e0f1e22243a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
1005398300451013792181216251000100010001483836439438921732521000100010003943911110011000100001000391039039103961354373116113866621000395395390395395
10043943004510137921818162510001000100015037364389389217324910001000100039438911100110001000010003910360391035613543731161139110641000395395396395390
10043943004510138221212162510001000100014989364394389217324710001000100039441011100110001000010003910390391039613543731161139110621000395395390390395
10043942114500237921818462510001000100015018366394389216324710001000100039438911100110001000010003910390391039613543731161139110621000395397395390395
100439431045102379212121625100010001000149893693943942173252100010001000394389111001100010000100039103903910356135397311611391101041000395395395395395
10043942004500237921812122510001000100014989369394391216325210001000100039438911100110001000010003910390391035613543731161139110621000395395395397395
1004389310450013792121816251000100010001483836939439421732471000100010003913911110011000100001000391035039103961353973116113866621000390390392390390
1004389300420023742181816251000100010001483836438938921732521000100010003893891110011000100001000391035035103561353973116113866641000399395397395390
10043943004510237921818162510001000100014838369394394216325210001000100039439111100110001000010003910390391039613939731161139110641000395395395395395
100438930045002379212121625100010001000149893693943942173252100010001000394389111001100010000100039103903910396135437311611391101041000395395395392395

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr s0, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)03080b0e0f18191e1f22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5dcache load miss (bf)c2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512005789900000000000120039119493109550256010340102100011000030100100001000010790355736380613631711200300120054120051113149031136715010030200100001000060200100001000012005412003511502011009910040100100001000001001000001100000001000011003211410744119661400021310121000040100120055120055120055120055120055
502041200518990000001010012003911951810944925601004010210001100003010010000100001079134575134161387251120587012005412005411314901511367150100303631000010000602001000010109120150120051115020110099100401001000010000010010000011000200123101001010003294810744119661400181310121000040100120163120052120054120148120052
5020412005489901000000000120126119692109467526010340102100031000030100100001014710790355736380613880411203650120054120054113521031136715010030200100001000060200100001000012005712005111502011009910040100100001000001001000001100000001000011003211410744119661400001313121000040100120052120055120052120052120052
50204120054899000000101001200391195181094492560103401021000110000301001000010000107908057363806136317012003001200541200541131490311367150100302001000010000614921000010000120052120051115020110099100401001000010000010010000011000000213100001000321141074411966140002101091000040100120055120055120036120055120093
502041200518990001001000012008011951010946725601034010210001100003010010000100001079035573638061363171120030012005412005411314903113671501003020010000100006020010000100001200541200511150201100991004010010000100000100100000110000200100001100321141074411966140002131091000040100120055120055120052120036120036
5020412005489900000010000120045119513109467256010340102100011000030100100001000010790355736380613631711200110120051120095113149031136715010030200100001000060200100001000012005112005111502011009910040100100001000001001000001100000001000011003211410744119658400021313121000040100120055120036120055120052120052
5020412005489900000010000120020119510109467256010340102100011000030100100001000010790355736380613446111200300120054120054113149031136585010030200100001000060200100001000012005112005111502011009910040100100001000001001000001100000001000011003211410744119658400021313121000040100120071120068120055120052120055
5020412003589900000070000120036119513109467256010340120100011000030100100001000010790085736380613631711200300120054120054113149031136685010030200100001000060200100001000012005412005111502011009910040100100001000001001000001100000001000011003211410755119661400021313121000040100120055120055120052120055120055
50204120035899000000100001200391195131094672560103401021000110000301001000010000107903557363806136317112001101200541200541131490311367150100302001000010000615061000010000120059120051115020110099100401001000010000010010000011000000301000011003211410744119661400021313121000040100120055120055120055120055120036
502041200548990011001010012003911949310944925601034010210001100003010010000100001079035573638061344610120030012005412005111314903113671501003020010000100006020010000100001200351200511150201100991004010010000100001100100000010000000100001100344241074411964640009010121000040100120055120038120055120036120055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0048

retire (01)cycle (02)030508090b0e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd0d5d6daddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50025120041899111000005000001200501194991094552560013400141000210000300101000010000107960257364766133764012003212005312005611317131136955001030020100001000060020100001000012004112005311500211091040010100001000001010002211000300211000011110314003107633119656400049081000040010120057120057120057120042120057
50024120041899100100002010001200411195141094552560016400141000210000300101000010000108097857376286133764012003212005612005611317631136925001030020100001000060020100001000012005612004111500221091040010100001000001010002111000200211000011111314004107433119671400049981000040010120057120057120057120057120042
50024120056899111111002000001200411195141094662560013400141000110000300101000010000107946357357506133764112002912005312005311317131136955001030020100001000060020100001000012005612005311500211091040010100001000001010002101000402011000011010314004107633119671400046981000040010120042120042120057120057120054
500241200538991110000023010001200261195141094692560016400121000210000300101000010000107957557357506133764012002912005312005611315931136955001030020100001000060020100001000012004112004111500211091040010100001000001010002201000100011000011110314004107643119671400049681000040010120042120042120057120057120057
5002412005389910101000200000120041119499109466256001640012100021000030010100001000010796025735750613376401200291200411200561131593113695500103002010000100006002010000100001200561200531150021109104001010000100001101000121100010007100001101131400417533119656400029681000040010120054120057120042120057120054
50024120053899100000002000001200411195191094692560016400141000210000300101000010000107960257363326133764012003212005612005611317431136955001030020100001000060020100001000012005612005311500211091040010100001000001010001111000100111000001110314003107523119671400049681000040010120057120042120057120057120042
50024120041899111000002000001200411195141094552560013400141000210000300101000010000107946357373246136244012003212005612005611317431136955001030020100001000060020100001000012004112005311500211091040010100001000001010001111000300211000001110314002107633119671400040081000040010120042120057120042120054120057
50024120056899100000002010001200411195161094692560016400121000110000300101000010000107946357363326133764012002912004112005611317431136805001030020100001000060020100001000012005612004111500211091040010100001000001010001201000101111000011110314002107523119784400049081000040010120041120048120048120048120048
500241200481015000010001000011200421195151094732560016400121000110000300101000010000107963857365246133968012003612004112005711315931136965001030020100001000060020100001000012004112004111500211091040010100001000001010002111000200011000011011314001010702311966640002130121000040010120055120055120055120055120055
500241200358990000010012000001200361194921094672560013400121000110000300101000010000107951757362366133662012003212005312005311317431136925001030020100651000060020100001000012005312009911500211091040010100001000001010001111000301011000011010314004107023119671400049681000040010120042120042120057120054120057

Test 3: throughput

Count: 8

Code:

  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  ldr s0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)030508090b0e0f18191e1f22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a8a9acafb5b6bbdcache load miss (bf)c2c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020526727200001000004501012671221212162580100100800001008001650011673031266822672726727166556166798011620080024200800242673026724118020110099100100800008000001008000000080039003980039613943011151180160126729010104800001002673126711267282672826728
802042672720000000000450101267122120162580100100800001008001450011673030267022672726707166556166598011420080024200800242670726803118020110099100100800008000001008000004308003900398003961394301115118016002672400104800001002672826728267282672826728
8020426707200000000000010126712212121625801001008000010080014500116730312670226727267271665561667980115200800242008002426733267341180201100991001008000080000010080000043080039003980000613943011151180160026724010100800001002672826728267282672826708
802042672720800000000450001267122012162580100100800001008001550011673031267022672726727166556166798011520080024200800242673726809118020110099100100800008000001008000004308003900428003901394301115118016002672400104800001002672826728267282672826728
8020426727200000000004500002671201212162580100100800001008001550011673031267022670726727166356166598011520080024200800242671926742118020110099100100800008000001008000004308003900080039603943011151180160026704010100800001002672826708267282672826728
8020426727200000000005401002671221212678265814061008052010681977532122219802805528208282221774479179598208220082332202819592806827917111802011009910010080000800000100813020433158133902569980819613943410052673733127681110144800001002766227627277782761727778
802042762020001000067972352101277352121254119681012110809101088124650012006351276452774927816169455117391813482028134320081350277592777281802011009910010080000800000100807822431908095110488780949613943400052681243127533010104800001002777627738277382718427772
8020427625208000000779696160002776221212531195808821008104010381246522120353912683527768277791740651174198135020081348200813482787127767718020110099100100800008000001008026004319080950104828809496139020005178165112769111004800001002776627764277752791027775
8020427776208010100671897040012690621212540196810121028026010081246500120537202783228077280501749061176248152420081151202817312806528031101802011009910010080000800000100802622432898120912637781170613943200053131161126724010104800001002670826728267282672826728
80204267271990000000045000126692212120258010010080000100800005001165789026702267272672716650316685801002008000020080000267352673811802011009910010080000800001100800000008000000398003901043000051101161126724010104800001002672826728267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire (01)cycle (02)030508090b0e0f1e22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)cficache miss (d3)d5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002526732200111111650002669301818122580010108000010800005011667501266972672226722166673167078001020800002080000268072671211800211091010800008000011080000080035008003561353950200616432670566280000102672926723267232672326723
80024267222000000004110226707218012258001010800001080000501166886126683267082670816672316707800102080000208000026724267141180021109101080000800001108000039800000358003561353950201416442670506080000102670926723267232672326728
800242670820000000041101267071181802580010108000010800005011676051266972672226708166523167028001020800002080000267282672811800211091010800008000001080000398000010358003561353950201416342671966280000102672326723267092672326723
8002426727200000000010026712201812258001010800001080000501166993126697267222672216667316707800102080000208000026722267221180021109101080000800000108000039800350358003961353950201416432671900080000102672326723267262672326710
800242672220000000000012671300022580010108000010800005011676051266832672226722166673166888001020800002080000267282701111800211091010800008000001080000398003500800356035050204416442671966280000102672326723267232672326709
8002426722200000000010226712218012258001010800001080000501166750126683267222670816667316688800102080000208000026852267191180021109101080000800000108000039800351358000061039502044164426719106080000102670926723267232672326723
80024267222000000004100126693201812258001010800001080000501167605126683267222672816667316702800102080000208000026869267341180021109101080000800000108000008003510800356004350204416432671960280000102672326723267092670926709
8002426708200000000000126712201802580010108000010800005011676051267022672226708166523167028001020800002080000268132673111800211091010800008000001080000080035035800350100502044164426719010480000102670926709267232670926709
80024267222000000004510126707018189258001010800001080000501167605126697267222672216672316707800102080000208000026730268491180021109101080000800000108000039800000358003560354350204316342670566080000102672326723267232672326709
8002426722200000000120012670721218162580010108000010800005011676051266972672226708166523166888001020800002080000267362717511800211091010800008000001080000080035008003500353950204416342670566280000102670926709267232672326709