Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (register, uxtw, Q)

Test 1: uops

Code:

  ldr q0, [x6, w7, uxtw]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03090e0f1e223a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100540221106111368174192025100010001000154110376382402225324010001000200038340211100110001000010000105505510256155447311611399101061000383403384403403
100438330016200387254020251000100010001467813774024022243260100010002000402402111001100010000100059105505510556155447311611400101061000403403403403403
10044023001610038718402025100010001000154040377402402224326010001000200040238311100110001000010000105505510250155447311611399101061000404403403403403
100440220016100367254032510001000100015471035838240222632601000100020003834021110011000100001000591025025105560550731161139901001000384403383403383
100440220006000367254020251000100010001547113774024052253260100010002000383402111001100010001100059105502510550125447311611399101001000403383403403403
1004402300125103671701920251000100010001458013774004032353240100010002000402382111001100010000100001056055102501250731161139901001000403383403403403
1004402200125103672500212510001000100015471137740242322532601000100020003834021110011000100001000010550551025015544731161138010061000385403383403383
100440220016111367174032510001000100015394137738240322532411000100020004033821110011000100001000010550251055605544731161138001001000383403384403403
1004383300061003872540202510001000100015411137740340522832401000100020004023831110011000100001000591025055105560250731161140210061000403384403384403
1004402200125003672540202510001000100015394137740238322532601000100020004023901110011000100001000010550551025605644731161137901061000383403404403384

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr q0, [x6, w7, uxtw]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire uop (01)cycle (02)030e0f181e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
502051200478991101000120020119519109461256010040100100011000030100100001000010792005736044613381811200110120047120047113143311366750100302001000010000602002000010000120047120047115020110099100401001000010000100100000110000001000011003210113511119646400026081000040100120051120051120048120048120036
502041200478990001010120032119493109463256010040102100011000030100100001000010789995736044613446111200110120047120047113143311365850100302001000010000602002000010000120047120047115020110099100401001000010000100100000110000001000011003210110711119646400026651000040100120048120048120036120048120048
50204120047899000117010120035119519109462256010340102100001000030100100001000010792005736188613381811200283120049120049113141311364050100302001000010000602002000010000120035120047115020110099100401001000010000100100000110000001000011003210110111119660400029651000040100120036120048120048120048120051
502041200508990000000120032119509109449256010340102100011000030100100001000010788625735455613381811200260120035120047113143311366750100302001000010000602002000010000120050120035115020110099100401001000010000100100000010000001000011003210110122119822400026681000040100120036120036120048120048120048
502041200508991101000120020119519109512256010340100100001000030100100001000010788625736044613446111200260120047120047113145311365850100302001000010000602002000010000120035120047115020110099100401001000010000100100000110000001000010003210113511119646400026051000040100120036120051120051120048120048
502041200478990006010120035119493109461256010340100100011000030100100001000010792005736044613381811200230120035120050113143311365850100302001000010000602002000010000120050120047115020110099100401001000010000100100000110000101000211003210113511119660400000001000040100120048120036120048120036120036
50204120050899000100112003211951910946125601034010210001100003010010000100001078999573618861344611120011012003512004711314331136365010030200100001000060200200001000012004712004711502021009910040100100001000010010000011000000100001100321011711119660400009051000040100120048120036120051120048120036
502041200479000001001120032119519109449256010340102100001000030100100001000010788625736044613381811200230120047120047113143311363650100302001000010000602002000010000120050120047115020110099100401001000010000100100000110000001000011003210113511119660400020001000040100120048120036120048120048120048
502041200358990009000120032119493109449256010340102100011000030100100001000010792005735455613381811200110120035120047113141311363650100302001000010000602002000010000120035120076115020110099100401001000010000100100000010000001000011003210110111119646400026001000040100120052120036120098120101120036
502041200478991100000120020119493109461256010040100100011000030100100001000010788625735455613381811200110120047120035113141311363650100302001000010065602002000010000120054120047115020110099100401001000010000100100000110000001000011003210110711119660400026601000040100120036120036120051120048120048

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0051

retire uop (01)cycle (02)030e0f1e1f22243f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
500251200518991110101200361195091094642560013400121000010000300101000010000107951757362366133662001200271200511200351131533113690500103002010000100006002020000100661200351200351150021109104001010000100000101000001000000100001100031401107111196664000210091000040010120052120052120052120052120052
5002412005189901000012003611950910944925600104001210001100003001010000100001079517573545561327571012002712005112005111316931136905001030020100001000060020200001000012005112005111500211091040010100001000001010000110000001000000000314011071111966640002101091000040010120052120052120036120052120052
5002412005189901001012003611949910946446600104001210001100003001010000100001079517573623661336620012001112005112005111316931136905001030020100001000060020200001000012005112005111500211091040010100001000001010000110000001000011000314011071111965040002101091000040010120052120052120052120052120052
500241200518990010101200201195091094642560010400121000110000300101000010000107955757362366133662001200271200511200351131693113701500103002010000100006002020000100001200511200511150021109104001010000100000101000011000000100001100031401107111196504000001091000040010120052120052120052120052120036
50024120035899011010120036119509109449256001040012100011000030010100001000010795175736236613366200120011120054120035113153311369050010300201000010000600202000010000120035120051115002110910400101000010000010100001100001010000112003140110711119666400000091000040010120052120052120036120036120036
50024120035899011000120036119565109449256001340012100011000030010100001000010795575736236613366200120027120051120035113153311369050010300201000010000600202000010000120037120054115002110910400101000010000010100001100000010000110003140110711119666400020001000040010120052120052120052120036120036
5002412003589901000012003611949210946725600134001010001100003001010000100001079517573623661336620012001112005112005111316931136905001030020100001000060020200001000012005112005111500211091040010100001000001010000110000001000010000314011071111965040000101001000040010120052120036120052120052120036
500241200518990110001200201194921094642560010400101000110000300101000010000107958457362366132757001200111200351200761131693113674500103002010000100006002020000100001200511200511150021109104001010000100000101000001000000100001000031401107111196504000201091000040010120036120052120036120052120052
5002412005189901001012003611950910944925600134001210001100003001010000100001079557573623661336620012001112005112005111316931136745001030020100001000060020200001000012005112005111500211091040010100001000001010000110000001000010000314011071111965040000101091000040010120052120052120052120052120036
500241200358990110001200361195091094642560013400101000110000300101000010000107955757362366133662001200271200511200351131693113690500103002010000100006002020000100001200511200351150021109104001010000100000101000001000000100001100031401107111196664000201001000040010120036120054120036120054120052

Test 3: Latency 1->3 roundtrip

Chain cycles: 3

Code:

  ldr q0, [x6, w7, uxtw]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire uop (01)cycle (02)03090e0f1e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50205120050899000101001200351195201094682560103401001000010000301001000010000107899957361886133818112001112005012005011314503113658501003020010000100006020020000100001200501200471150201100991004010010000100000100100000110000000100001100203210110711119657400009681000040100120048120111120059120049120051
50204120035899000000001200351195201094632560103401001000110000301001000010000107899957361886133818012002312005012005011314503113667501003020010000100006020020000100001200501200471150201100991004010010000100000100100000110000000100001100003210110111119657400029901000040100120048120036120048120036120051
50204120047899000100001200351195471094492560100401021000010000301001000010000107920057354556133818012002612003512003511314503113667501003020010000100006020020000100001200351200471150201100991004010010000100000100100000010000000100001100003210110711119657400029681000040100120048120106120052120049120051
50204120050899000000001200321195241094632560103401021000110000301001000010000107899957360446133818112001112003512003511314503113667501003020010000100006020020000100001200351200471150201100991004010010000100000100100000110000000100001100003210110711119646400009901000040100120048120083120071120037120096
50204120050899000100001200201195211094632560103401001000110000301001000010000107899957361886136166012002612003512003511314503113667501003020010000100006020020000100001200471200351150201100991004010010000100000100100000010000100100001140003210110111119660400006651000040100120080120080120048120048120039
502041200388990001000101200351195201094632560103401021000010000301001000010000107886257360446133818012001112004712003511314103113667501003020010000100006020020000100001200471200471150201100991004010010000100000100100000110000000100001100003210110711119660400029651000040100120048120126120082120056120048
50204120050899000100001200351195371094632560103401251000010000301001000010000107886257361886133818012001112005012005011314503113667501003020010000100006020020000100001200471200471150201100991004010010000100000100100000010000000100000100003210110111119646400029051000040100120048120102120103120063120051
50204120035899000100101200351195311094612560100401021000110000301001000010000108173657385406133870012001112005012003511314103113658501003020010000100006020020000100001200351200471150201100991004010010000100000100100000110000000100001100003210110721119646400029981000040100120048120094120075120053120051
5020412004789900010012122284120728110288710604394033510050100503352111319112321135017579561561949560121858122475122376113883031011493456423342391133311399618142256811352122346122399261502011009910040100100001000001001000330100011278235100231140003836132821215121448402289681000040100123597123305123815123190122992
5020412346095701131721584012123356121396110548106660514404481005710074345101152311827116033758046916218443012256712301312361911314301401150205763635802117991194069908200001030012087212041441150201100991004010010000100000100100000010000103100001100013210210711119832400020981000040100120048120126120050120048120051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0047

retire uop (01)cycle (02)03mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cfd0d5map dispatch bubble (d6)ddfetch restart (de)dfe0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50025120053899000100110112003211950510946125600134001210001100083001010000100001079523573604461334990012002312004712003511316524113686500103002010000100006002020000100001200471200471150021109104001010000100000101000001100010550010000110314861107118119663400026651000040010120048120048120048120048120060
5002412003589900100000011200321195051094612560010400121000110000300101000010000107952357360446132757001200231200351200471131663113686500103066710000100006002020000100001200471200351150021109104001010000100000101000001100000430310000000314862107118119662400346651000040010120048120048120036120036120154
50024120052899000000110112003211949210946125600104001210001100003001010211100001079523573545561334990012002312004712004711315331138875001030020100001000060020200001000012004812004711500211091040010100001000001010000011000003601210000110314861107118119666400026651000040010120049120048120051120048120059
5002412003589900000028001120032119604109492806001340012100111000430010103681000010809375740711613653800120023120035120047113153311368650010300201000010000600202000010000120050120419115002110910400101000010000010100000110000080010000110314861107118119650400020651000040010120048120429120036120048120050
50024120047899010040130011204151195511094642560013400121000110000300101005210049108169657375966133499001201731200471200471131653113686500103002010000100006002020000100001200471200471150021109104001010000100000101000000100000390010000110314861107118119663400026651000040010120048120048120048120048120054
5002412004789900000013001120020119505109463256001340012100011000030010100001019710795235736044613982500120023120047120047113153311368650010300201000010000600202000010000120297120035115002110910400101000010000010100000110000030010000100314861107118119662400006651000040010120048120048120048120048120070
5002412005089900000001011200321195051094612560013400121000010000300101000010000107952357361136133756001200231200471200351131533113686500103002010000100006002020000100001200471200471150021109104001010000100000101000000100000660010000100314861107118119662400026051000040010120048120048120036120048120076
5002412004789900000011011200321194921094612560013400121000110000301631000010049107956857412346133499001200231200471200351131653113686500103002010000100006002020000100001200471200505150021109104001010000100000101000001100000390010000110314861107118119650400026601000040010120048120036120036120048120095
50024120047902000000110112003211950510945225600244001010001100003001010000100001088727573618861334990012002312003512004711328731136865001030020100001000060020200001000012004712004711500211091040010100001000001010000211000011901432610000110314861107118119664400026651000040010120048120048120049120058120075
500241200478990000041002120032119552109461256001340012100011000030010100001000010795235736044613349900120023120036120047113153311368650010300201000010000600202000010000120035120047515002110910400101000010000110100004010000000010000110314861107118119664400020601000040010120048120048120036120048120096

Test 4: throughput

Count: 8

Code:

  ldr q0, [x6, w7, uxtw]
  ldr q0, [x6, w7, uxtw]
  ldr q0, [x6, w7, uxtw]
  ldr q0, [x6, w7, uxtw]
  ldr q0, [x6, w7, uxtw]
  ldr q0, [x6, w7, uxtw]
  ldr q0, [x6, w7, uxtw]
  ldr q0, [x6, w7, uxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03mmu table walk data (08)090e0f181e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfmap dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526723200011100010226727218121625801001008000010080015500116787512668226727267271665061667480114200800242001600482672226707118020110099100100800008000011008000039800000398000061039111511816102673401064800001002672826708267282670826728
802042672720000000000012670721212162580100100800001008001550011675471267012672726727166746166598011520080024200160048267272672211802011009910010080000800001100800004380035142800396100111511816002670400104800001002673126732267082670826728
802042672820010000198010226726218121625802301008000010080020500116720312668226727267271665561667980115200800242001600482670726707118020110099100100800008000001008000008003920800006000111511816002672501064800001002671826723267082673226723
8020427032201000014100012671021801625801001008000010080016500116707512670226707267271665961665980114200800242001600482672226711118020110099100100800008000001008000039800000398003901039111511816002672400104800001002672826728267362673126728
80204267072140010057000226716018120258010010080000100800145001166596126702267272672216635616659801152008002420016004826726267261180201100991001008000080000010080000080035008003960043111511816102676401002800001002671227006273302670827027
8020426707202001110192002266922001625801001008000010080016500116730312668626707267271665561668380116200800242001600482672726722118020110099100100800008000001008000039800351428000061350111511816102678804102800001002670826708267282670826728
80204267312000000045010226723200225801001008000010080015500116730312670226707267071665561665980115200800242001600482672726707118020110099100100800008000001008000039800000398000000043111511816002687200104800001002672326732267272671226714
80204267392000000000002267160121812258010010080000100800155001167303126702267222672216655616674801152008002420016004826727267221180201100991001008000080000010080000398003503580039013539111511816002693300102800001002670826708267282672326732
802042672220000000450001267070012025801001008000010080015500116688812670226727267271663561665980114200800242001600482672726722118020110099100100800008000001008000039800000358003501350111511816002674800104800001002672826708267282672826728
80204267272000100000000267072121216258010010080000100800155001167875126702267272672716635616674801162008002420016004826727267221180201100991001008000080000010080000080040035800396104311151181600268300664800001002672826708267232672826708

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)dbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025267272001001012671221121625800101080000108000050116675002668326708267281667631671180010208000020160000267312670811800211091010800008000001080000430800390388003861394405020916026726790010780000102670926732267322673226732
8002426731200004410126716011192580010108000010800005011675010267062673126708166763167118001020800002016000026731267081180021109101080000800000108000043080038038800006039005020716021726728140780000102672926709267292672826732
800242672720000010126716200025800101080000108000050116712402668326731267081667831670780010208000020160000267282672811800211091010800008000011080000430800390398000061000502011160911267431410780000102673226732267092673226732
80024267312000044001267163011625800101080000108000050116675002670626708267081665231668880010208000020160000267272673111800211091010800008000001080000430800390218800396139005020111601011267281410780000102673226709267282673226732
80024267312000044101267160000258001010800001080000501166750026683267312673116676316711800102080000201600002673126708118002110910108000080000010800004308003803880039603900502011160111126835014080000102673226732267092672826728
8002426731200000000266930101925800101080000108000050116712402670626728267281667231671180010208000020160000267312672811800211091010800008000001080000008000003980038613900502011160247268941414780000102673226732267092673226732
800242670820000440012671601200258001010800001080000501167124026706267082673116652316711800102080000201600002680626768118002110910108000080000010800000080038008000061394405020716025112684600780000102672926732267322670926728
80024267312000045101267160111925800101080000108000050116720102670626708267311667631671180010208000020160000267312672811800211091010800008000001080000008003800800396139005020111601711267371414780000102673226728267322670926709
8002426727200004400126716001025800101080000108000050116884302668326708267311665231671180010208000020160000267312670811800211091010800008000001080000430800001608000061000502071602111267121410780000102672826709267092673226732
8002526731200114510126712011216258001010800001080000501167117026706267082672716676316688800102080000201600002673126727118002110910108000080000010800004308003803880038003944050207160196268421010480000102672826729267092672926709