Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (D)

Test 1: uops

Code:

  ldr d0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030e0f1e22243a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
1005399311411013592180122510001000100014838134937439221232321000100010003893891110011000100001000391000035100061353973316223866021000390390391375391
10043892010101374200122510001000100014774036438938921232481000100010003893891110011000100001000391000038103561353973216223866621000390375375375390
10043892000000374218181225100010001000140600364389389212324710001000100038938911100110001000110003910360010006035073216223860041000390390390390390
100438921141101374218016251000100010001483803493893891973249100010001000389389111001100010001100039103513510350003973216223710621000390392390390390
1004391200440013790121812251000100010001406013643893892123247100010001000391374111001100010000100001035039103561353973216223866601000390390390390390
1004389210410013742181802510001000100014838136438937421232491000100010003993891110011000100001000391000135103561353973216223710621000375390390375390
1004374200411013590018122510001000100014060136437438921232471000100010003893741110011000100001000391035035100000353973216223860621000375390390375375
10043742004100135900181225100010001000140601364374391212324710001000100039438911100110001000010000103503510366103973216223716001000390375395390375
1004391300411003590180112510001000100014844134938938919732471000100010003893911110011000100001000391035039103561353973216223966021000392390390390390
100439430045101374000025100010001000149821369389374212325010001000100038937411100110001000010000100003510356003973216223866621000389390375390390

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire (01)cycle (02)0308090e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5dcache load miss (bf)c2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
5020512004789901100010100012002011970810948025601034010210001100003010010000100001079200573604461383990120023012004712004711314131136585010030200100001000060200100001000012004712004711502011009910040100100001000011001000011100000010000110003210110111119646400006601000040100120036120048120118120049120048
50204120047899000000101000120020119594109449256010040100100001000030100100001000010792005736044613446101200110120047120035113143291136425010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100007010000110003210113511119646400000651000040100120051120145120052120048120051
5020412004789900000010000012003211957310946425601004010210001100003010010000100001078999573545561338180120023012004712004711314131136365010030200100001000060200100001006612003512004711502011009910040100100001000001001000001100000010000100003210110111119660400020051000040100120048120048120085120048120048
5020412003590000000000000012002011964010946125601034010210001100003010010000100001078862573545561338180120026012004712004711314331136585010030200100641000060200100001000012004712004711502011009910040100100001000001001000001100001010000110003210113511119660400026681000040100120036120080120066120057120048
5020412003589900000010000012002011961310946125601004010210001100003010010000100001078862573604461338180120026012004712004711314131136365010030200100001000060200100001000012003512003511502011009910040100100001000001001000001100000010000110003210113511119660400000051000040100120048120048120103120051120036
50204120047899000000100000120020119576109461256010040100100001000030100100001000010881485736188613454101200230120035120035113143171136585010030200100001000060200100001000012004712003511502011009910040100100001000001001000001100000010000010003210113501119646400006601000040100120036120107120083120048120048
50204120035899000000130010012003211957210946125601004010010000100003010010000100001078862573604461344610120023012003512004711323331136365010030200100001000060200100001000012003512003511502011009910040100100001000001001000001100000010000010003210113511119646400026051000040100120095120094120074120069120048
5020412003589900000000000012003211956710946225601034010010000100003026910000100001079200573604461338180120023312004712005011314131136585010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100002010000110003210110111119646400026051000040100120048120048120115120058120048
5020412004789900000010010012003211959410946125601034010210000100003010010000100001078862573604461344610120023012004712003511314131136585010030200100001000060200100001000012005012004711502011009910040100100001000001001000001100000010000010003210113511119685400026651000040100120048120054120111120049120036
50204120047900100101243593080000012340512136611070399160528404051007110072350621178711678116271658199886220481112002301200471200501135602531152855814335559118221183569536119801193912277112358734150201100991004010010000100001100100520010039013142510052002004093136712122204402696001000040100123147123668123440124035123799

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire (01)cycle (02)030508090b0e0f18191e1f2223243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd0d5d6d9dadbddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50025120057899100000002010001200421195181094552560013400141000210000300101000010055107969257366686133216012001112005412003511317231136935001030020100001000060020100001000012005412005111500211091040010100001000001010000011000000010000101003141021070002211966940000130121000040010120055120052120036120055120055
50024120051900000000001000001200361195121095412560013400121000010000300101000010000107955757363806132757112002712005412003511317231136745001030020100001000060020100001000012003512005111500221091040010100001000001010000011000000010000001003179021070002211966940002010121000040010120055120036120055120055120055
50024120035899000000001010001200391195121094492560013400101000110000300101000010000107962957363806132757012002712005412005411317231136935001030020100001000060020100001000012005412003511500221091040010100001000001010000011000000010000000003141021070002511966640000100121000040010120055120055120055120055120055
5002412005189900000000100000120039119513109449256001040012100001000030010100001000010795845736380613366211200301200351200351131723113674500103002010000100006002010061100001200351200511150021109104001010000100000101000001100000001000010100314102107000221196694000013091000040010120055120055120036120036120036
500241200548990000010000000012003911951210946725600134001010001100003001010000100001079584573638061336620120030120054120054113172311369350010300201000010000600201000010000120035120051115002110910400101000010000010100000110000000100001000031410210700132119669400021013121000040010120036120055120052120105120038
50024120057899000010006010001200201194921094492560013400121000110000300101000010000107958457363806133662012003312005712005711316231136805001030020100001000060020100001000012006012004111500211091040010100001000011010002111000210110000111123141021070002211967240004131001000040010120058120042120042120061120061
50024120041899100001000000001200391195121094642560013400121000110000300101000010000107951757363806133662012001112005412005411317231136935001030020100001000060398100001000012005112005411500211091040010100001000001010000011000000010000101003141051070002211966940002101091000040010120055120052120055120036120055
500241200518990000000010000012003911949210946725600134001210001100003001010000100001079584573545561336620120027120054120079113172311369350010300201000010000600201000010000120054120051115002110910400101000010000010100000110000000100001010031410210700022119669400021313121000040010120052120036120036120036120055
5002412005489900000000100000120042119499109473256001340014100011000030010100001000010794635736668613396801200611200351200541131723113693500103002010000100006002010000100001200541200511150021109104001010000100000101000000100001001000111110314102107000221196724000401301000040010120058120042120058120061120058
50024120057899110001001000001200421195181094702560016400121000210000300101000010000107963857365246134121012002712003512005111317231136935025330020100001000060020100001000012003512005411500211091040010100001000001010000011000000010000101003141021070002211966940002131301000040010120055120036120055120055120036

Test 3: throughput

Count: 8

Code:

  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)030e0f1e2223243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020526732200114510012670721218162580100100800001008000050011678082669726722267221665031669080278200800002008000026727267221180201100991001008000080000010080000008003903980039613543517811611267241064800001002672826728267282672826728
8020426722200004110022670721812122580100100800001008000050011665252669726722267271664531683280100200800002008000026722267231180201100991001008000080000010080019203980039042800006135051101161126726004800001002672326708267232672326723
8020426722200004110012670721812162580100100800001008000050011678082670226726267221665031679580100200800002008000026722267221180201100991001008000080000010080000039800000080039613539511011611267241060800001002672826728267282670826728
8020426727200004110022670721201625801001008000010080000500117703826682267272672216650316781801002008000020080000267222672211802011009910010080000800000100800000398003513580035613939511011611267241064800001002672826728267282672826728
8020426727200004510022671220121225801001008000010080000500117703826702267272672216650316787801002008000020080000267272672211802011009910010080000800000100800000398003903980000613543511011611267241004800001002672326723267232672826723
8020426722200004510022671221812262580100100800001008000050011685702670226727267271665031678480100200800002008000026727267221180201100991001008000080000010080000039800390398003960354351101161126719662800001002672326728267232672826723
802042670720000451000266922121816258010010080000100800005001166525267022672726727166503167948010020080000200800002672226722118020110099100100800008000001008000003980039039800356135051101161126724004800001002672826728267282672826728
8020426722200004510022671221212162580100100800001008000050011672312670226727267271665031677680100200800002008000026727267221180201100991001008000080000010080000008003903980039613543511011611267241062800001002672326723267282672826723
80204267272000045000126707212121625801001008000010080000500116723126682267272672716645316794801002008000020080000267272672211802011009910010080000800000100800000398003903980035613543511011611267041064800001002672826708267282672826728
80204267272000045100226712018181625801001008000010080000500116780826702267272673016633316684801002008000020080000267282672211802011009910010080000800001100800000398003903980039613505110116112671910104800001002672826728267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)030508090b0e0f18191e223a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8002526726200000011000012671721804725800101080000108000050117020402670702673226733166773166958001020800002080000267152671411800211091010800008000011080000039080039000398003961354300502019169926719100480000102672926729267292670926728
800242672220000000000450326718201814258001010800001080000501167298026707026732267141667731669580010208000020800002671526732118002110910108000080000010800211942080057100598003860194219050201116692672500480000102672826728267282670926745
80024267282000000000041022671301201625800101080000108000050116884302670202670826708166723167078001020800002080000267322673211800211091010800008000001080020214208005810062800396056019050436169726871610480000102694526886268922688526874
80024268702010100001149501270192012105748027010802601080356501175674026683027135267271667231668880010208000020800002672826708118002110910108000080000010800000008000000008000001350005020416682672999280000102673426716268202702926734
800242673720010000000211126717218012580010108000010800005011702040266890267152671716677316694800102080000208000026732271821180021109101080000800000108002020420800571516280038011901905020516572672909280000102673326733267342673326733
8002426714200111000006512267171180152580010108000010800005011675990267070267142671516677316695800102080000208000026740267141180021109101080000800000108001921420800191015180038005701905020916952672990080000102673326733267332673326733
80024267152001111110065022671720181258001010800001080000501165822026708026732267321667731671280010208000020800002672226722118002110910108000080000010800000390800000003980039610000502081689267251010080000102672826728267092670926719
80024267082000000000045102669300121225800101080000108000050116884302670302672826710166723167078001020800002080000267282672211800211091010800008000001080000000800000003980039013539005020816572670506080000102672826723267092672826729
80024267082000000000045002671220121225800101080000108000050116884302669702672826728166723166888001020800002080000267272672211800211091010800008000001080000000800390003580039610000502051657267251010480000102672826709267232672826728
80024267222000000100041102669321218122580010108000010800005011667500266970267082672816924316688800102080000208000026722267081180021109101080000800000108000003908003900039800000039430050209167826705010480000102670926709267092672326728