Apple M1 Microarchitecture Research by Dougall Johnson

Firestorm: Overview | Base Instructions | SIMD and FP Instructions
Icestorm:  Overview | Base Instructions | SIMD and FP Instructions

LDR (unsigned offset, D)

Test 1: uops

Code:

  ldr d0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire (01)cycle (02)030e1e22233a3f4346494f51inst issue (52)~issue ld/st (55)~dispatch ld/st (58)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op ld/st (7d)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst fp/simd load (98)inst ldst (9b)9da0a3a6a8acafb5b6bbdcache load miss (bf)cfd5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)f5f6f7f8fd
1005394304500137921212025100010001000151841369394394197325210001000100039439411100110001000110004310390391039613943731161139101041000395395395395395
100439430451013792121216251000100010001498903693943942173252100010001000394394111001100010000100043103904210390139437311611391101041000395399395412395
10043943045101379212122025100010001000149890370374394197325210001000100039439411100110001000010000103903910396139437311611391101041000395395375395395
1004394304510137921212162510001000100014060136937439421732521000100010003943941110011000100001000010390391039613943731161139110041000395395395375395
10043942045101379212120251000100010001501803693943942163252100010001000394394111001100010000100043103903910396139437311611391101041000395375395395395
100439430451013792121216251000100010001498913493743942173252100010001000374394111001100010000100043103903910396140437311611391101041000395395395395375
100439430451003792121216251000100010001498903693743741973252100010001000394394111001100010000100043103903910396139437311611391101041000399396375404395
100439430451013790121216251000100010001501813693943942173252100010001000394394111001100010000100043103904510396139437311611394101041000395395395395395
100439430010137921212162510001000100015018136939439421732521000100010003943941110011000100001000010390010390139437311611391101041000395395395395395
1004394204510037920016251000100010001501803693943942163252100010001000394394111001100010000100043103903910396139437311611371101041000395395395395395

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, #8]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire (01)cycle (02)0305080b0e0f18191e1f22233f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50205120047899000010010001200201194931095042560103401001000110000301001000010000107899957360446133818112002301200501200351131433113667501003020010000100006020010000100001200501200351150201100991004010010000100000100100000110000100100001000003210210111119660400026681000040100120051120051120036120054120051
502041200478990000000180001200351195091094612560103401021000010000301001000010000107899957360446134461112001101200351200351131413113658501003020010000100006020010000100001200351200471150201100991004010010000100000100100000110000000100001010003210110711119657400029651000040100120051120051120053120109120051
50204120047899000000010001200351195191095122560103401021000110000301001000010000107886257354556136749112002301200501200471131413113636501003020010000100006020010000100001200471200471150201100991004010010000100000100100000110000500100001000003210113511119657400029081000040100120036120051120051120115120048
50205120035899000000020001200351195191094652560103401021000110000301001000010000107886257361886133818112001101200501200501131453113745501003020010000100006020010000100001200351200471150201100991004010010000100000100100000110000003100001010003210113511119657400029001000040100120051120036120036120042120036
50204120102899000000010101200321195191095212560103401001000210000301001000010000107899957361886134461112001101200471200351131453113636501003020010000100006020010000100001200501200471150201100991004010010000100000100100000110000000100000000003210113511119657400029681000040100120036120036120077120054120051
50204120050899000000000101200351195091095222560103401021000210000301001000010000107905357357506133458112001701200411200411131373113630501003020010000100006020010000100001200411200531150201100991004010010000100000100100021110002021100001110003210113511119657400000681000040100120051120048120051120054120036
50204120047899000000000001200351195091094782560106401041000210000301001000010000107905357357506136419112001701200561200561131513113630501003020010000100006020010000100001200411200411150201100991004010010000100000100100012010002011100000101103210110711119663400029081000040100120042120057120057120042120042
502041200418991010000101012004111951210950725601064010410001100003010010000100001079053573647661364191120017012005612005611315131136305010030200100001000060200100001000012005612004111502011009910040100100001000001001003910100422384949100300111003933131811121673402129681000040100120147120055120060120058120237
5020412005689911000202533141760001224631208691105737466047140363100501006034354113111143311472425793998620324811223270120056120041113137311363053752340711093711184666681123211179121904122456231502011009910040100100001000001001002641100360677596100000101003210113511119654400029051000040100120055120052120058120114120079
502041200548990001100600012003911951310948225601034010210001100003010010000100001078862573623661344610120030012005112005411314131136585010030200100001000060200100001000012003512003511502011009910040100100001000001001000001100000001000000000032101107111198184000210001000040100120055120055120058120133120052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire (01)cycle (02)030508090b0e0f1e22233a3f4d4f51inst issue (52)~issue int (53)~issue fp/simd (54)~issue ld/st (55)~dispatch int (56)~dispatch fp/simd (57)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)huge thing fp/simd (5b)60696b6d6edispatch stall (70)71scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map op fp/simd (7e)~map lookup int (7f)~map lookup ld/st (80)~map lookup fp/simd (81)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a6a8a9acafb5bbdcache load miss (bf)dtlb miss (c1)c2branch mispredict (cb)cfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
50025120051899001010100012002011951210944925600164001410000100003001010000100001079517573638061336620120027012005412005611317703113690500103002010000100006002010054100001204171200521150021109104001010000100001101000601100019021100001010003140210711119675400021310121000040010120042120042120061120061120110
5002412004189911010014000120416119530109455256001640014100061000430010101041005010952965736716613758801200360120268120139113302027113699502533002010000100006002010213100001200631200591150021109104001010000100000101000111100028217100001111103140110711119672400041313121000040010120062120061120061120064120110
500241200628991100001610012004511951810945525600164001410010100003001010000100491079629573652461339680120017012006412025311317503113699500103002010000100006002010000100001204341200411150021109104001010000100000101000270100029711432610000110110314021071111972940004013121000040010120042120042120061120042120496
5002412005790011010120001200451195181094732560016400121000210000300101000010000107963857366686133216012003601200411200601131780311369950010300201000010000600201000010000120060120057115002110910400101000010000010100012010002710110000111100314011071111965640004131301000040010120058120042120058120061120087
5002412005789911010010001204251195181094552560016400141000110000300101000010000107961157365246133216012003301200571200601131780311369950010300201000010000600201000010000120041120057115002110910400101000010000010100031110002753110000011100314011071111965640002010121000040010120042120061120042120058120110
500241200419001100002000120045119518109455256001640014100011000030010100001000010796115736524613412101200360120060120063113159031137005001030020100001000060020100001000012004112014711500211091040010100001000001010001101000372041000001110031401107111196754000400121000040010120042120042120061120042120088
500241200608991100001000120026119499109473256001340014100021000030010100001000010796115736668613321601200360120057120060113159031136805001030020100001000060020100001000012006012005711500211091040010100001000001010003311000362316910000011120314011071111965640004131001000040010120061120061120042120061120116
5002412006089910010120011200451195181094552560016400141000210000300101000010000107961157366686133968012003601200601200571131780311369650010300201000010000600201000010000120057120060115002110910400101000010000010100032110002771110000111110314011071111967540004131091000040010120061120061120042120043120117
5002412018089911010010001200451195151094552560016400121000010000300101000010000107955757363806132757012001101200541200541131720311369050010300201000010000600201000010000120054120096115002110910400101000010000010100000110000740010000101000314011071111965040002100121000040010120036120055120055120036120094
5002412005490000000016000120036119512109449256001640012100001000030010100001000010795175735455613366201200300120054120054113172031136935001030020100001000060398100001000012005412003511500211091040010100001000001010000011000011300100000000003140110711119669400021013121000040010120100120058120061120058120113

Test 3: throughput

Count: 8

Code:

  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire (01)cycle (02)03090e0f181e22243a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)60696d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a3a6a8acafb5b6bbdcache load miss (bf)c5branch mispredict (cb)cdcfd5d6ddinst fetch restart (de)e0? int output thing (e9)eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
80205267282000010631012686321211034980100100800001008001551111665960266872688626876166356166598030020080024200800242671226728118020110099100100800008000001008000043800380418000060394411151183160026724014104800001002672826728267322673226728
80204267072000000450012671200102580100100800001008001550011665960267062670726727166356166798011520080024200800242674126720118020110099100100800008000011008000008000003880039600431115164016002672811400800001002673526737267322670826708
80204269922010111440012687021102580100100800001008001550011665960267102671126716166596166838011620080024200800242686326837118020110099100100800008000001008000043800381448016861394311151180160026728014104800001002673226728267082673026728
80204267272000000010126712012116258010010080000100800145001166596126706267362673516663616687801142008002420080024268662676311802011009910010080000800000100800004380039039800006139011151180160026704014107800001002673726736267322670826708
8020426707200000057101267162100258010010080000100800155001165620026706267312673116630316693801002008000020080000267322673711802011009910010080000800000100800000800390418000061394300051101161126713014140800001002673226735267322670826728
8020426732200000057001267182011925802301008000010080000500117312912670226731267311675531678680100200800002008019327035267271180201100991001008000080000010080000438003813880000013944000511011611267280004800001002670826728267322672826713
802042672720010004410126692210192580100100800001008000050011665250267062672726707166343166948010020080000200800002695426740118020110099100100800008000001008000008003803880038003900005110116112672800140800001002673226732267322672826728
802042673120000001200026716010192580100100800001008000050011671270266832673126707166503166898010020080000200800002691626730118020110099100100800008000001008000043800380388003861394300051101251126731010144800001002670826732267082673226728
802042672720000004400126716210025801001008000010080000500116723112668226707267311665431668980100200800002008000026912267501180201100991001008000080000010080000080038008000060394400051101161126728010107800001002673126708267082686826733
8020426733200000001012669221119258010010080000100800005001165556026706267072673116630316685801002008000020080000267362672711802011009910010080000800000100800000800380388003801394400051101411126728014107800001002673226732267082670826732

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire (01)cycle (02)0305080b0e0f1e22233a3f4346494f51inst issue (52)~issue int (53)~issue ld/st (55)~dispatch int (56)~dispatch ld/st (58)huge thing int (59)huge thing ld/st (5a)696b6d6edispatch stall (70)scheduler rewind (75)scheduler stall (76)~dispatch op (78)~map op int (7c)~map op ld/st (7d)~map lookup int (7f)~map lookup ld/st (80)8283pipeline redirect (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst b.cc (94)inst integer (97)inst fp/simd load (98)inst ldst (9b)9d9fa0a1a3a5a6a7a8a9acafb5b6bbdcache load miss (bf)dtlb miss (c1)c2cfd2d5d6ddinst fetch restart (de)e0eaebecld/st retires (ed)gpr retires (ef)f5f6f7f8fd
800252673320011110651032671721801625800101080000108000050116941126689026735267321665931669480010208000020800002683226740118002110910108000080000110800192042080057102218003860574219150200191617162671299080000102673326733267332673426733
8002426732201100102110326700218181525800101080000108000050116759926690026735267151667831671380010208000020800002684226743118002110910108000080000010800202042080057100598003861194219150200141617142672909280000102673326733267332673326733
8002426732200111116500226717200152580010108000010800005011669602670702671426732166783167138001020800002080000268422673711800211091010800008000001080020194208001910159800386157019050200291617112672999280000102673426734267342673426716
8002426733200111006510226717218181625800101080000108000050116582226708026732267321667731671280010208000020800002674026746118002110910108000080000110800192042080057101598000061574219150200351618162673699280000102673426734267332673326733
8002426715200110006500326700218181625800101080000108000050116729826690026777267381666031671280010208000020800002671726732118002110910108000080000010800191942080057100628003861574219150200241614162672999080000102671526733267332673326734
800242673320011000651022669901818162580010108000010800005011675992670702671426732166773167128001020800002080000268322671911800211091010800008000001080021214208005700259800380157019050200221618182674309280000102671526715267332673326734
800242671520010000661032671821818152580010108000010800005011675992670802673226732166783167138001020800002080000268562673711800211091010800008000001080020194208005711159800386057421905020025168162673299280000102673326716267342673426736
8002426715200101006510326717018181625800101080000108000050116766026689026733267141667731671280010208000020800002683726720118002110910108000080000010800191942080019100598003801574219050200241617142671299280000102673426733267332673326733
800242673220011000650032671801818025800101080000108000050116766026707026732267321668431671280010208000020800002674426835118002110910108000080000010800191942080057000218003860194219250200211617122686599280000102673326733267332673326734
800242673220110000211002671921818162580010108000010800005011702042670702673226733166773166948001020800002080000268412673921800211091010800008000001080019204229800180002180038011901905020025161692673099280000102673326733267162671626716