Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDUR (Q)

Test 1: uops

Code:

  ldur q0, [x6, #1]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030e0f1e22233a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8a9acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
1005394310450013592012162510001000100015133034939437421732321000100010003943941110011000100001000010391039103961043731161137101041000395395395398376
100437430044101383211212510001000100015267137340139822132561000100010003743941110011000100001000431000000103801390731161139514071000399395375399395
1004398300451013592112162510001000100015267037337439822232321000100010003983741110011000100001000010390001038613907311611395141471000399375395399399
1004374300440013590011925100010001000152671349398398197325210001000100039839411100110001000010000103800381038004007311611371141441000375395375399399
1004398300441003830011925100010001000140600373398398221325610001000100039839411100110001000010004310000038103860044731161139101401000375399399399375
10043983000101383000162510001000100015274137339839822132561000100010003943941110011000100001000431038003810386139447311611371141471000375395395375399
1004394300441013832011925100010001000153380373398398198325610001000100039839411100110001000010004410390038103860394473116113950001000399395399375375
1004398300440013832110251000100010001406003493943941973256100010001000398394111001100010000100001038200103860044731161139501071000399395395375375
10043743000100383210025100010001000152740349374398197323210001000100037439411100110001000010004310380038103900390731161139101041000399395395399399
1004378300440013592001925100010001000140600373394398221323210001000100039839411100110001000010004310380038100001043731161139514071000399395375375410

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldur q0, [x6, #1]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0050

retire (01)cycle (02)0305080b0e0f191e1f22243a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50205120053899111100593520001200301195151096032560103401021000210000301001000010000107905357463156136419012003201200561200411131510311367350100302001000010000602001000010000120041120053115020110099100401001000010000010010002211000200011000011011003210510723119663400040081000040100120057120042120057120058120057
50204120041899100000200001200421195131094662560106401021000210000301001000010000107902657363326133458012003201200561200561131480311367050100302001000010000602001000010000120056120053115020110099100401001000010000110010001101000200011000011111003210413533119660400049601000040100120042120057120057120057120057
50204120056899120004700001200351195091094612560103401351000010000301001000010000107899957361886136166012002601200351200501131410311366750100302001000010000602001000010000120050120047115020110099100401001000010000010010000011000000031000410000003210310133119657400046081000040100120057120042120054120042120432
50204120041899110000101001200321194931094632560100401021000110000301001000010000107899957361886136166012001101200501200351131450311366750100302001000010000602001000010000120050120047115020110099100401001000010000010010002211000100111000011111003210313533119654400049951000040100120054120057120054120042120054
50204120053899110000200001200201194931094612560103401001000110000301001000010000107899957361886133818012002601200501200501131450311365850100302001000010000602001000010000120050120047115020110099100401001000010000010010002211000200111000011011003210310733119663400020951000040100120051120051120036120051120051
50204120050899000000101001200381195151094692560106401041000210000301001000010000107905357364766136419012003201200561200561131510311367350100302001000010000602001000010000120056120041115020110099100401001000010000010010002211000300011000011110003210310733119663400029681000040100120057120057120054120057120057
50204120056899100000200001200261195151094552560106401041000110000301001000010000107909757364766133458012003201200561200531131510311367350100302001000010000602001000010000120056120053115020110099100401001000010000010010000011000000031000010000103210310133119657400006651000040100120051120051120051120048120036
50204120050899000000100001200351194931094492560100401021000010000301001000010000107899957354556134461012002601200501200501131450311365850100302001000010000602001000010000120050120047115020110099100401001000010000010010000011000000001000000000003210310733119657400026001000040100120051120036120051120051120051
50204120050899000000000001200321195091094612560103401001000110000301001000010000107920057354556133818012002601200501200471131450311363851071302001000010000602001000010000120037120048415020110099100401001000010000010010000011000001701000100102003237410733119660400020981000040100120036120037120038120054120039
502041200369010110101800001200361195191094612560103401021000110008301001000010000107920057360446133818012001101200471200531131450311363650100302001000010000602001000010000120050120035115020110099100401001000010000010010000011000000001000010104003210412463119657400020651000040100120048120052120051120051120054

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0061

retire (01)cycle (02)030508090b0e0f1e1f22233f43494d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
500251200618991101103010120046101195191094742560019400161000310000300101000010000107964757361406134172112003712006112004911317931137005001030020100001000060020100001000012006112004911500211091040010100001000001010002221000301210001120113141210722119676400060651000040010120050120062120106120062120062
500241200618991011103010120046111195071094742560019400141000210000300101000010000107964757361406133560112003712004912006111317931137005001030020100001000060020100001000012006112004911500211091040010100001000001010002221000301510001121113141310722119676400066601000040010120062120050120062120050120062
500241200498991101018010120046111195191094742560019400161000310000300101000010000107964757367166133560112002512006112006111317931136885001030020100001000060020100001000012006112004911500211091040010100001000001010003221000201210001121113141210722119676400066601000040010120050120062120062120062120050
5002412006189910010030001200461111951910946225600194001410003100003001010000100001079647573671661341721120025120049120061113179311368950010300201000010000600201000010000120061120061115002110910400101000010000010100022210004551210001121113141210722119664400066651000040010120062120062120062120050120050
500241200498991111008000120046101195651094622560019400161000210000300101000010000107954157367166134172112002512006112006111317931137005001030020100001000060020100001000012006112006111500211091040010100001000001010002301000300210001121103141210722119664400060651000040010120062120062120050120050120062
500241200498991111002000120046111195191094622560019400141000310000300101000010000107954157361406134172112003712006112006111317931136885001030020100001000060020100001000012004912004911500211091040010100001000001010003301000201210001121113141210722119676400066601000040010120050120050120062120050120050
5002412006189910000030001203941111951910976125600194001410003100043001010000100001079541575119761341721120449120061120498113167311368850496300201000010000600201000010000120061120062115002110910400101000010000110100012210003112100011211231412107281196764001262351000040010120052120062120050120062120062
500241200619001000003000120046111195191094622560019400141000310000300101000010000107964757367166134172112003712004912006111317931136885001030020100001000060020100001000012006112006111500211091040010100001000001010001121000300210001021113141210722119676400060651000040010120062120062120062120062120062
50024120049899100110800012004610119507109474256001640016100031000030010100001000010795505736716613417201200371200491200491133043113688500103002010000100006002010000100001200611200611150021109104001010000100000101000122100020121000112111314110107221196764000602301000040010120062120112120050120062120062
500241200968991011002010120080111195191094742560019400161000310000300101000010000107964757361406134172112002512006112006111316731137005001030020100001000060020100001000012006112006111500211091040010100001000011010001121000201210001121103141210722119676400066651000040010120062120062120062120062120062

Test 3: throughput

Count: 8

Code:

  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  ldur q0, [x6, #1]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)030e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8a9acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0e7eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
8020526728200000002267072001825801001008000011280000500116652512669726727267271665031666580100200800002008000026732268351180201100991001008000080000110080000080039003980000600435110116112671901062800001002672826708267282670826728
802042670720000450022671220121125801001008000010080000500116723112670226727267071664531668580100200800002008000026727267221180201100991001008000080000110080000398003500080039613505110116112672301064800001002672826728267282670826708
802042670720000450002671201212162580100100800001008000050011665251267022672726727166503166858010020080000200800002672726722118020110099100100800008000001008000039800001035800000035395110116112670401064800001002670826708267282672826708
802042670720011000226692212016258010010080000100800005001167808126702267072672716630316685801002008000020080000267272672211802011009910010080000800000100800000800000039800396135435110316112672406162800001002671326723267232670826728
80204267072000057000267122121216258010010080000100800005001165789126702267272670716650316685801002008000020080000267272670711802011009910010080000800000100800003980035004280039013905110116112672401000800001002672826728267282672326708
80204267282000045102267122181202580100100800001008000050011770381266822672726727166503166858010020080000200800002673726722118020110099100100800008000001008000039800000039800396139435110116112672400104800001002672826708267082672826728
802042672720000450022669221212162580100100800001008000050011665251267022672726727166503166808010020080000200800002672726722118020110099100100800008000001008000043800350039800006035435110116112670400100800001002672826723267082672826708
80204267222000000002671221212122580100100800001008000050011665251267022670726722166503166658010020080000200800002671326722118020110099100100800008000001008000043800390039800396035435110116112672401004800001002672826708267082670826723
8020426727200000102267120120122580100100800001008000050011672311267022672726707166303166658010020080000200800002670726722118020110099100100800008000001008000039800390039800390139435110516112672701000800001002688126884267282672826708
80204267072001142002266922120162580100100800001008000050011672311266822670726727166303166858010020080000200800002672726722118020110099100100800008000001008000008003500398003901350511011611267190064800001002672826708267282672826728

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire (01)cycle (02)0305080b0e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2c5branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
800252673620011100670012672230020258001010800001080000501167791126690026736267361668231671680010208000020800002673626736118002110901010800008000011080020194380059200218003961604319100502041666267331313580000102673826737267372673826738
800242673620011100671032672237718258001010800001080000501167793026712326744267451668131671680010208000020800002674126736118002110901010800008000001080020204380058000608004061194319000502081643267341313580000102673826737267162671626737
80024267372001010066103267003700258001010800001080000501168286026711026737267371668131671680010208000020800002673626736118002110901010800008000001080021214380059001618000061584319100502041687267341313580000102673726715267372673726715
80024267152001000021103267223702025800101080000108000050116828612671102673626736166813167168001020800002080000267362673611800211090101080000800000108001919080059110218004001584319100502091643267341313580000102673726738267382673726737
800242673720011000661012671601121625800101080000108000050116689612670302672826728166723166888001020800002080000267272672711800211090101080000800000108000004380039000080039010430020502041644267241010080000102670926728267092672826729
80024267312000000045101267160111625800101080000108000050116884312670202670826731166723167078001020800002080000267312672811800211090101080000800000108000004380039000398003961043000050207166726734130580000102673826738267372673726737
8002426736200101106910326722377182580010108000010800005011694511266890267362673616681316694800102080000208000026736267361180021109010108000080000010800212043800591026180000005901900050204164726733130580000102673726738267152673826716
80024267362001011167103267003072025800101080000108000050116779112671202671426737166823167178001020800002080000267362673611800211090101080000800001108002020438005910161800006159431900050207167826734130080000102673726737267382673726737
800242671420011000660032672437720258001010800001080000501166960126712326888267371665931671680010208000020800002671526736118002110901010800008000011080021214380059000608004061594319000502041644267331313580000102673726738267372673726738
80024267362001110021103267213772025800101080000108000050116828612669002673726715166813166958001020800002080000267362673611800211090101080000800000108001920438006010061800406158431920050204163426733130580000102673826737267372671626737