Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (unsigned offset, D)

Test 1: uops

Code:

  ldr d0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005394304500137921212025100010001000151841369394394197325210001000100039439411100110001000110004310390391039613943731161139101041000395395395395395
100439430451013792121216251000100010001498903693943942173252100010001000394394111001100010000100043103904210390139437311611391101041000395399395412395
10043943045101379212122025100010001000149890370374394197325210001000100039439411100110001000010000103903910396139437311611391101041000395395375395395
1004394304510137921212162510001000100014060136937439421732521000100010003943941110011000100001000010390391039613943731161139110041000395395395375395
10043942045101379212120251000100010001501803693943942163252100010001000394394111001100010000100043103903910396139437311611391101041000395375395395395
100439430451013792121216251000100010001498913493743942173252100010001000374394111001100010000100043103903910396140437311611391101041000395395395395375
100439430451003792121216251000100010001498903693743741973252100010001000394394111001100010000100043103903910396139437311611391101041000399396375404395
100439430451013790121216251000100010001501813693943942173252100010001000394394111001100010000100043103904510396139437311611394101041000395395395395395
100439430010137921212162510001000100015018136939439421732521000100010003943941110011000100001000010390010390139437311611391101041000395395395395395
1004394204510037920016251000100010001501803693943942163252100010001000394394111001100010000100043103903910396139437311611371101041000395395395395395

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ldr d0, [x6, #8]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50205120047899000010010001200201194931095042560103401001000110000301001000010000107899957360446133818112002301200501200351131433113667501003020010000100006020010000100001200501200351150201100991004010010000100000100100000110000100100001000003210210111119660400026681000040100120051120051120036120054120051
502041200478990000000180001200351195091094612560103401021000010000301001000010000107899957360446134461112001101200351200351131413113658501003020010000100006020010000100001200351200471150201100991004010010000100000100100000110000000100001010003210110711119657400029651000040100120051120051120053120109120051
50204120047899000000010001200351195191095122560103401021000110000301001000010000107886257354556136749112002301200501200471131413113636501003020010000100006020010000100001200471200471150201100991004010010000100000100100000110000500100001000003210113511119657400029081000040100120036120051120051120115120048
50205120035899000000020001200351195191094652560103401021000110000301001000010000107886257361886133818112001101200501200501131453113745501003020010000100006020010000100001200351200471150201100991004010010000100000100100000110000003100001010003210113511119657400029001000040100120051120036120036120042120036
50204120102899000000010101200321195191095212560103401001000210000301001000010000107899957361886134461112001101200471200351131453113636501003020010000100006020010000100001200501200471150201100991004010010000100000100100000110000000100000000003210113511119657400029681000040100120036120036120077120054120051
50204120050899000000000101200351195091095222560103401021000210000301001000010000107905357357506133458112001701200411200411131373113630501003020010000100006020010000100001200411200531150201100991004010010000100000100100021110002021100001110003210113511119657400000681000040100120051120048120051120054120036
50204120047899000000000001200351195091094782560106401041000210000301001000010000107905357357506136419112001701200561200561131513113630501003020010000100006020010000100001200411200411150201100991004010010000100000100100012010002011100000101103210110711119663400029081000040100120042120057120057120042120042
502041200418991010000101012004111951210950725601064010410001100003010010000100001079053573647661364191120017012005612005611315131136305010030200100001000060200100001000012005612004111502011009910040100100001000001001003910100422384949100300111003933131811121673402129681000040100120147120055120060120058120237
5020412005689911000202533141760001224631208691105737466047140363100501006034354113111143311472425793998620324811223270120056120041113137311363053752340711093711184666681123211179121904122456231502011009910040100100001000001001002641100360677596100000101003210113511119654400029051000040100120055120052120058120114120079
502041200548990001100600012003911951310948225601034010210001100003010010000100001078862573623661344610120030012005112005411314131136585010030200100001000060200100001000012003512003511502011009910040100100001000001001000001100000001000000000032101107111198184000210001000040100120055120055120058120133120052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 9.0054

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
50025120051899001010100012002011951210944925600164001410000100003001010000100001079517573638061336620120027012005412005611317703113690500103002010000100006002010054100001204171200521150021109104001010000100001101000601100019021100001010003140210711119675400021310121000040010120042120042120061120061120110
5002412004189911010014000120416119530109455256001640014100061000430010101041005010952965736716613758801200360120268120139113302027113699502533002010000100006002010213100001200631200591150021109104001010000100000101000111100028217100001111103140110711119672400041313121000040010120062120061120061120064120110
500241200628991100001610012004511951810945525600164001410010100003001010000100491079629573652461339680120017012006412025311317503113699500103002010000100006002010000100001204341200411150021109104001010000100000101000270100029711432610000110110314021071111972940004013121000040010120042120042120061120042120496
5002412005790011010120001200451195181094732560016400121000210000300101000010000107963857366686133216012003601200411200601131780311369950010300201000010000600201000010000120060120057115002110910400101000010000010100012010002710110000111100314011071111965640004131301000040010120058120042120058120061120087
5002412005789911010010001204251195181094552560016400141000110000300101000010000107961157365246133216012003301200571200601131780311369950010300201000010000600201000010000120041120057115002110910400101000010000010100031110002753110000011100314011071111965640002010121000040010120042120061120042120058120110
500241200419001100002000120045119518109455256001640014100011000030010100001000010796115736524613412101200360120060120063113159031137005001030020100001000060020100001000012004112014711500211091040010100001000001010001101000372041000001110031401107111196754000400121000040010120042120042120061120042120088
500241200608991100001000120026119499109473256001340014100021000030010100001000010796115736668613321601200360120057120060113159031136805001030020100001000060020100001000012006012005711500211091040010100001000001010003311000362316910000011120314011071111965640004131001000040010120061120061120042120061120116
5002412006089910010120011200451195181094552560016400141000210000300101000010000107961157366686133968012003601200601200571131780311369650010300201000010000600201000010000120057120060115002110910400101000010000010100032110002771110000111110314011071111967540004131091000040010120061120061120042120043120117
5002412018089911010010001200451195151094552560016400121000010000300101000010000107955757363806132757012001101200541200541131720311369050010300201000010000600201000010000120054120096115002110910400101000010000010100000110000740010000101000314011071111965040002100121000040010120036120055120055120036120094
5002412005490000000016000120036119512109449256001640012100001000030010100001000010795175735455613366201200300120054120054113172031136935001030020100001000060398100001000012005412003511500211091040010100001000001010000011000011300100000000003140110711119669400021013121000040010120100120058120061120058120113

Test 3: throughput

Count: 8

Code:

  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  ldr d0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03090e0f181e22243a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267282000010631012686321211034980100100800001008001551111665960266872688626876166356166598030020080024200800242671226728118020110099100100800008000001008000043800380418000060394411151183160026724014104800001002672826728267322673226728
80204267072000000450012671200102580100100800001008001550011665960267062670726727166356166798011520080024200800242674126720118020110099100100800008000011008000008000003880039600431115164016002672811400800001002673526737267322670826708
80204269922010111440012687021102580100100800001008001550011665960267102671126716166596166838011620080024200800242686326837118020110099100100800008000001008000043800381448016861394311151180160026728014104800001002673226728267082673026728
80204267272000000010126712012116258010010080000100800145001166596126706267362673516663616687801142008002420080024268662676311802011009910010080000800000100800004380039039800006139011151180160026704014107800001002673726736267322670826708
8020426707200000057101267162100258010010080000100800155001165620026706267312673116630316693801002008000020080000267322673711802011009910010080000800000100800000800390418000061394300051101161126713014140800001002673226735267322670826728
8020426732200000057001267182011925802301008000010080000500117312912670226731267311675531678680100200800002008019327035267271180201100991001008000080000010080000438003813880000013944000511011611267280004800001002670826728267322672826713
802042672720010004410126692210192580100100800001008000050011665250267062672726707166343166948010020080000200800002695426740118020110099100100800008000001008000008003803880038003900005110116112672800140800001002673226732267322672826728
802042673120000001200026716010192580100100800001008000050011671270266832673126707166503166898010020080000200800002691626730118020110099100100800008000001008000043800380388003861394300051101251126731010144800001002670826732267082673226728
802042672720000004400126716210025801001008000010080000500116723112668226707267311665431668980100200800002008000026912267501180201100991001008000080000010080000080038008000060394400051101161126728010107800001002673126708267082686826733
8020426733200000001012669221119258010010080000100800005001165556026706267072673116630316685801002008000020080000267362672711802011009910010080000800000100800000800380388003801394400051101411126728014107800001002673226732267082670826732

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252673320011110651032671721801625800101080000108000050116941126689026735267321665931669480010208000020800002683226740118002110910108000080000110800192042080057102218003860574219150200191617162671299080000102673326733267332673426733
8002426732201100102110326700218181525800101080000108000050116759926690026735267151667831671380010208000020800002684226743118002110910108000080000010800202042080057100598003861194219150200141617142672909280000102673326733267332673326733
8002426732200111116500226717200152580010108000010800005011669602670702671426732166783167138001020800002080000268422673711800211091010800008000001080020194208001910159800386157019050200291617112672999280000102673426734267342673426716
8002426733200111006510226717218181625800101080000108000050116582226708026732267321667731671280010208000020800002674026746118002110910108000080000110800192042080057101598000061574219150200351618162673699280000102673426734267332673326733
8002426715200110006500326700218181625800101080000108000050116729826690026777267381666031671280010208000020800002671726732118002110910108000080000010800191942080057100628003861574219150200241614162672999080000102671526733267332673326734
800242673320011000651022669901818162580010108000010800005011675992670702671426732166773167128001020800002080000268322671911800211091010800008000001080021214208005700259800380157019050200221618182674309280000102671526715267332673326734
800242671520010000661032671821818152580010108000010800005011675992670802673226732166783167138001020800002080000268562673711800211091010800008000001080020194208005711159800386057421905020025168162673299280000102673326716267342673426736
8002426715200101006510326717018181625800101080000108000050116766026689026733267141667731671280010208000020800002683726720118002110910108000080000010800191942080019100598003801574219050200241617142671299280000102673426733267332673326733
800242673220011000650032671801818025800101080000108000050116766026707026732267321668431671280010208000020800002674426835118002110910108000080000010800191942080057000218003860194219250200211617122686599280000102673326733267332673326734
800242673220110000211002671921818162580010108000010800005011702042670702673226733166773166948001020800002080000268412673921800211091010800008000001080019204229800180002180038011901905020025161692673099280000102673326733267162671626716