Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (D)

Test 1: uops

Code:

  ldr d0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e0f1e22243a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005399311411013592180122510001000100014838134937439221232321000100010003893891110011000100001000391000035100061353973316223866021000390390391375391
10043892010101374200122510001000100014774036438938921232481000100010003893891110011000100001000391000038103561353973216223866621000390375375375390
10043892000000374218181225100010001000140600364389389212324710001000100038938911100110001000110003910360010006035073216223860041000390390390390390
100438921141101374218016251000100010001483803493893891973249100010001000389389111001100010001100039103513510350003973216223710621000390392390390390
1004391200440013790121812251000100010001406013643893892123247100010001000391374111001100010000100001035039103561353973216223866601000390390390390390
1004389210410013742181802510001000100014838136438937421232491000100010003993891110011000100001000391000135103561353973216223710621000375390390375390
1004374200411013590018122510001000100014060136437438921232471000100010003893741110011000100001000391035035100000353973216223860621000375390390375375
10043742004100135900181225100010001000140601364374391212324710001000100039438911100110001000010000103503510366103973216223716001000390375395390375
1004391300411003590180112510001000100014844134938938919732471000100010003893911110011000100001000391035039103561353973216223966021000392390390390390
100439430045101374000025100010001000149821369389374212325010001000100038937411100110001000010000100003510356003973216223866621000389390375390390

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire uop (01)cycle (02)03mmu table walk data (08)090e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
5020512004789901100010100012002011970810948025601034010210001100003010010000100001079200573604461383990120023012004712004711314131136585010030200100001000060200100001000012004712004711502011009910040100100001000011001000011100000010000110003210110111119646400006601000040100120036120048120118120049120048
50204120047899000000101000120020119594109449256010040100100001000030100100001000010792005736044613446101200110120047120035113143291136425010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100007010000110003210113511119646400000651000040100120051120145120052120048120051
5020412004789900000010000012003211957310946425601004010210001100003010010000100001078999573545561338180120023012004712004711314131136365010030200100001000060200100001006612003512004711502011009910040100100001000001001000001100000010000100003210110111119660400020051000040100120048120048120085120048120048
5020412003590000000000000012002011964010946125601034010210001100003010010000100001078862573545561338180120026012004712004711314331136585010030200100641000060200100001000012004712004711502011009910040100100001000001001000001100001010000110003210113511119660400026681000040100120036120080120066120057120048
5020412003589900000010000012002011961310946125601004010210001100003010010000100001078862573604461338180120026012004712004711314131136365010030200100001000060200100001000012003512003511502011009910040100100001000001001000001100000010000110003210113511119660400000051000040100120048120048120103120051120036
50204120047899000000100000120020119576109461256010040100100001000030100100001000010881485736188613454101200230120035120035113143171136585010030200100001000060200100001000012004712003511502011009910040100100001000001001000001100000010000010003210113501119646400006601000040100120036120107120083120048120048
50204120035899000000130010012003211957210946125601004010010000100003010010000100001078862573604461344610120023012003512004711323331136365010030200100001000060200100001000012003512003511502011009910040100100001000001001000001100000010000010003210113511119646400026051000040100120095120094120074120069120048
5020412003589900000000000012003211956710946225601034010010000100003026910000100001079200573604461338180120023312004712005011314131136585010030200100001000060200100001000012004712004711502011009910040100100001000001001000001100002010000110003210110111119646400026051000040100120048120048120115120058120048
5020412004789900000010010012003211959410946125601034010210000100003010010000100001078862573604461344610120023012004712003511314131136585010030200100001000060200100001000012005012004711502011009910040100100001000001001000001100000010000010003210113511119685400026651000040100120048120054120111120049120036
50204120047900100101243593080000012340512136611070399160528404051007110072350621178711678116271658199886220481112002301200471200501135602531152855814335559118221183569536119801193912277112358734150201100991004010010000100001100100520010039013142510052002004093136712122204402696001000040100123147123668123440124035123799

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50025120057899100000002010001200421195181094552560013400141000210000300101000010055107969257366686133216012001112005412003511317231136935001030020100001000060020100001000012005412005111500211091040010100001000001010000011000000010000101003141021070002211966940000130121000040010120055120052120036120055120055
50024120051900000000001000001200361195121095412560013400121000010000300101000010000107955757363806132757112002712005412003511317231136745001030020100001000060020100001000012003512005111500221091040010100001000001010000011000000010000001003179021070002211966940002010121000040010120055120036120055120055120055
50024120035899000000001010001200391195121094492560013400101000110000300101000010000107962957363806132757012002712005412005411317231136935001030020100001000060020100001000012005412003511500221091040010100001000001010000011000000010000000003141021070002511966640000100121000040010120055120055120055120055120055
5002412005189900000000100000120039119513109449256001040012100001000030010100001000010795845736380613366211200301200351200351131723113674500103002010000100006002010061100001200351200511150021109104001010000100000101000001100000001000010100314102107000221196694000013091000040010120055120055120036120036120036
500241200548990000010000000012003911951210946725600134001010001100003001010000100001079584573638061336620120030120054120054113172311369350010300201000010000600201000010000120035120051115002110910400101000010000010100000110000000100001000031410210700132119669400021013121000040010120036120055120052120105120038
50024120057899000010006010001200201194921094492560013400121000110000300101000010000107958457363806133662012003312005712005711316231136805001030020100001000060020100001000012006012004111500211091040010100001000011010002111000210110000111123141021070002211967240004131001000040010120058120042120042120061120061
50024120041899100001000000001200391195121094642560013400121000110000300101000010000107951757363806133662012001112005412005411317231136935001030020100001000060398100001000012005112005411500211091040010100001000001010000011000000010000101003141051070002211966940002101091000040010120055120052120055120036120055
500241200518990000000010000012003911949210946725600134001210001100003001010000100001079584573545561336620120027120054120079113172311369350010300201000010000600201000010000120054120051115002110910400101000010000010100000110000000100001010031410210700022119669400021313121000040010120052120036120036120036120055
5002412005489900000000100000120042119499109473256001340014100011000030010100001000010794635736668613396801200611200351200541131723113693500103002010000100006002010000100001200541200511150021109104001010000100000101000000100001001000111110314102107000221196724000401301000040010120058120042120058120061120058
50024120057899110001001000001200421195181094702560016400121000210000300101000010000107963857365246134121012002712003512005111317231136935025330020100001000060020100001000012003512005411500211091040010100001000001010000011000000010000101003141021070002211966940002131301000040010120055120036120055120055120036

Test 3: throughput

Count: 8

Code:

  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  ldr d0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)030e0f1e2223243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526732200114510012670721218162580100100800001008000050011678082669726722267221665031669080278200800002008000026727267221180201100991001008000080000010080000008003903980039613543517811611267241064800001002672826728267282672826728
8020426722200004110022670721812122580100100800001008000050011665252669726722267271664531683280100200800002008000026722267231180201100991001008000080000010080019203980039042800006135051101161126726004800001002672326708267232672326723
8020426722200004110012670721812162580100100800001008000050011678082670226726267221665031679580100200800002008000026722267221180201100991001008000080000010080000039800000080039613539511011611267241060800001002672826728267282670826728
8020426727200004110022670721201625801001008000010080000500117703826682267272672216650316781801002008000020080000267222672211802011009910010080000800000100800000398003513580035613939511011611267241064800001002672826728267282672826728
8020426727200004510022671220121225801001008000010080000500117703826702267272672216650316787801002008000020080000267272672211802011009910010080000800000100800000398003903980000613543511011611267241004800001002672326723267232672826723
8020426722200004510022671221812262580100100800001008000050011685702670226727267271665031678480100200800002008000026727267221180201100991001008000080000010080000039800390398003960354351101161126719662800001002672326728267232672826723
802042670720000451000266922121816258010010080000100800005001166525267022672726727166503167948010020080000200800002672226722118020110099100100800008000001008000003980039039800356135051101161126724004800001002672826728267282672826728
8020426722200004510022671221212162580100100800001008000050011672312670226727267271665031677680100200800002008000026727267221180201100991001008000080000010080000008003903980039613543511011611267241062800001002672326723267282672826723
80204267272000045000126707212121625801001008000010080000500116723126682267272672716645316794801002008000020080000267272672211802011009910010080000800000100800000398003903980035613543511011611267041064800001002672826708267282672826728
80204267272000045100226712018181625801001008000010080000500116780826702267272673016633316684801002008000020080000267282672211802011009910010080000800001100800000398003903980039613505110116112671910104800001002672826728267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526726200000011000012671721804725800101080000108000050117020402670702673226733166773166958001020800002080000267152671411800211091010800008000011080000039080039000398003961354300502019169926719100480000102672926729267292670926728
800242672220000000000450326718201814258001010800001080000501167298026707026732267141667731669580010208000020800002671526732118002110910108000080000010800211942080057100598003860194219050201116692672500480000102672826728267282670926745
80024267282000000000041022671301201625800101080000108000050116884302670202670826708166723167078001020800002080000267322673211800211091010800008000001080020214208005810062800396056019050436169726871610480000102694526886268922688526874
80024268702010100001149501270192012105748027010802601080356501175674026683027135267271667231668880010208000020800002672826708118002110910108000080000010800000008000000008000001350005020416682672999280000102673426716268202702926734
800242673720010000000211126717218012580010108000010800005011702040266890267152671716677316694800102080000208000026732271821180021109101080000800000108002020420800571516280038011901905020516572672909280000102673326733267342673326733
8002426714200111000006512267171180152580010108000010800005011675990267070267142671516677316695800102080000208000026740267141180021109101080000800000108001921420800191015180038005701905020916952672990080000102673326733267332673326733
80024267152001111110065022671720181258001010800001080000501165822026708026732267321667731671280010208000020800002672226722118002110910108000080000010800000390800000003980039610000502081689267251010080000102672826728267092670926719
80024267082000000000045102669300121225800101080000108000050116884302670302672826710166723167078001020800002080000267282672211800211091010800008000001080000000800000003980039013539005020816572670506080000102672826723267092672826729
80024267082000000000045002671220121225800101080000108000050116884302669702672826728166723166888001020800002080000267272672211800211091010800008000001080000000800390003580039610000502051657267251010480000102672826709267232672826728
80024267222000000100041102669321218122580010108000010800005011667500266970267082672816924316688800102080000208000026722267081180021109101080000800000108000003908003900039800000039430050209167826705010480000102670926709267092672326728