Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRH

Test 1: uops

Code:

  ldrh w0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10053892000101374218002510001000100014980389377212324710001000100037771111001100010000100039100003510356103973116113860021000390375390392390
1004389300000137420180251000100010001406038938921232491000100010003745611100110001000010003910350010356103973116113860021000375390375390395
1004394300411003590181816251000100010001483839439421632521000100010003947111100110001000010000103900100060363973116113866601000390395375395390
100437430001003740181202510001000100014838389389217324710001000100039471111001100010001100001035044103560353973116113716621000390390390390390
100438930041101374218002510001000100014989389389217323210001000100038956111001100010001100039100003610006035073116113716621000390375390375392
10043743004110137420181225100010001000148383913742123247100010001000374711110011000100001000391035035100060353973116113866041000375375375375390
10043893104100137401218122510001000100014060389389212324710001000100038956111001100010000100001035035103561353973116113860021000375375375375390
1004389200010135921818122510001000100014060374389212324710001000100038956111001100010001100039103500103501393973116113876621000375395392390390
1004389200411003592120122510001000100014303394389197323210001000100037456111001100010000100001035035103561039731161138610621000392390390390375
1004402310411013742001225100010001000148383943742123232100010001000394711110011000100001000010000010006003973116113866621000390378390375390

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrh w0, [x6]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0060

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057005752511110002560100070042697885970125401083010310002301001000061607833425424966980700417006064696364960401003020010000602001000070060351140201100991001000030100100000100100022110002000110000110110261027111698043000310101010000301007004270058701637017570042
4020470041525100110019000007004569702597192540108301061000230100100006160953342686496696170041700606465036494440100302001000060200100007006535114020110099100100003010010000010010002311000300211000001111026101711169823300061313010000301007004270058700617006170061
402047006052610000003800000700456979159716254010830106100023010010000616078334268649669807006070041646373649444010030200100006020010000700603511402011009910010000301001000001001000211100020011100001111002610171116980430006010010000301007006170061700427006170061
40204700605251110000410000070045697915971925401043010610001301001000061609533425424966961700607006064748176499040100302001000060200100007006535114020110099100100003010010000010010002211000300111000011011026101711169804300030101310000301007005870058700427004270102
4020470058525100000056000007004269788597012540104301061000230100100006160783342686496698070060700606472836496040100302001000060200100007004135114020110099100100003010010000010010002101000100011000011110026101711169895300060131310000301007006170042700617004270061
402047004152511000004101000700456979159719254010830106100023010010000616078334268649669617006070057647143649444010030200100006020010000700603511402011009910010000301001000001001000231100010001100001111102610171116982330006001010000301007006170042701677006170042
4020470060525101000026000027004569788597192540108301061000230100100006160783342542496697770060700416472436494440100302001000060200100007006035114020110099100100003010010000010010001311000100011000011112026101711169820300061310010000301007004270061700617006170042
4020470060525100000020100070042697045972125401083010610002301001000061609533428304966978704367005764770364960401003020010000602001000070041351140201100991001000030100100000100100012010001002110000111110261017111698233000613131010000301007006170061700427006170061
4020470057524111010070000070026697915971925401083010610002301001000061609533426864966961700607006064720364944401003020010000602001000070041351140201100991001000030100100000100100021110002001110000010110261017111698233000613131310000301007006170042700587004270042
40204700605251110100680100070045697915970125401083010610001301001000061607833426864966980700417005764738364944401003020010000602001000070060351140201100991001000030100100001100100011010001001110000110100261017111698043000610101310000301007004270061700587006170061

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0f1e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570047525000003940007003269744596952540014300321000230582100006260193342206049669707005070035647603649814001030020100006002010000700413511400211091010000300101000001010003211000100110000110101252057121698133000066610000300107003670051700517003670051
40024700505240000041007002069743596952540026300131000030010100006170683341470049669677003570047647613649604001030020100006002010000700473511400211091010000300101000001010000011000000010000101000252017111698193000660610000300107005770062700427004270042
400247005652410010401007002069728597092540014300101000030010100006170683342206049669707003570035647193649814001030020100006002010000700533511400211091010000300101000001010003201000302110000011100252017111697983000066010000300107004870036700487004870048
400247004752500000491007003269760596952540010300131000130010100006169523342062049669707003570035647413649754001030020100006002010000700473511400211091010000300101000001010000011000000010000101000252017111698163000666610000300107005770042700577005770042
400247004152411110241007003569743596952540010300131000130010100006170683342206049669707003570055647753649754001030020100006002010000700503511400221091010000300101000001010000011000000310000101000252017111698103000360610000300107004870048700487004870048
400247005052400000561017004169780597012540018300131000230010100006170363342494049669767004170056647323649754001030020100006002010000700353511400211091010000300101000001010000011001500010000101000252027111698133000309910000300107003670036700517003670036
4002470035525000004430007004169702597012540018300161000130010100006169953342494049669617004170056646913649604001030020100006002010000700353511400211091010000300101000001010000001000000010000101000252017111698103000360610000300107005170036700527005170036
40024700505250000011007002669702597012540014300161000230010100006170363341769049669737004170041647423649604001030020100006002010000700503511400211091010000300101000011010000011000000010000101000252017112698133000396010000300107003670036700367004870051
400247004752500000551007002069760597092540014300131000130010100006170683342206049669557005070035647253649754001030020100006002010000700503511400211091010000300101000001010000011000000010000100000252017111700323000096910000300107004870036700517005170056
400247005052400000800017003869777597152540018300161000230010100006170363342494049669767005670056647223649754001030020100006002010000700503511400211091010000300101000001010000011000000010000100000252017111698103000366610000300107004870048700367004870048

Test 3: throughput

Count: 8

Code:

  ldrh w0, [x6]
  ldrh w0, [x6]
  ldrh w0, [x6]
  ldrh w0, [x6]
  ldrh w0, [x6]
  ldrh w0, [x6]
  ldrh w0, [x6]
  ldrh w0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03090e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267272001001080012669221212162580100100800001008000050011672310492364926738267071665431668580100200800002008000026731772180201100991008000010080000110080000430800380008003861044195110116112670414107800001002673226708267082673226732
802042673120000001002669221119258010010080000100800005001168312049236512683826736166363166898010020080000200800002673177118020110099100800001008000001008000043080039003980039613943195110116112672814147800001002673226732267082673226732
8020426731200000600012671601116258010010080000100800005001165556049236512683326716166363166898010020080000200800002673177118020110099100800001008000001008000043080038003880038613843195110116112672810104800001002673226732267322670826732
8020426707201000451002671621102580100100800001008000050011683120492362726736267361665431668980100200800002008000026731771180201100991008000010080000010080000430800000039800006039019511011611267280147800001002670826732267322673226732
802042670720100089101267122100258010010080000100800005001165556049236272683526733166633166658010020080000200800002673177118020110099100800001008000001008000043080038003880038610440511011611267280104800001002672826732267322673226708
8020426731201000920002669221119258010010080000100800005001167127049236512674326738166523166968010020080000200800002673156118020110099100800001008000001008000043080038003880039613944195110116112672814107800001002670826708267322673226732
8020426732200000950002671221121925801001008000010080000500116712704923627268452671016661316689801002008000020080000267317711802011009910080000100800001100800004308003800388003861394319511011611267041407800001002673226732267322670826732
802042673120000024001267162110258010010080000100800005001167127049236272675126710167243166678010020080000200800002673177118020110099100800001008000001008000043080038010800006104405110116112670414107800001002673226732267082673226708
8020426731200000591012671601119258010010080000100800005001166525049236512683226713167583166898010020080000200800002673177118020110099100800001008000001008000000800380138800386039440511011611267041407800001002673226732267322670826732
8020426707200000920012669221211925801001008000010080000500116712704923651268392671516637316689801002008000020080000267315611802011009910080000100800000100800000080038003880000010005110116112670414144800001002673226708267322673226708

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cfd0d2d5map dispatch bubble (d6)dbddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252673320010110095103267172180162580010108000010800005011676481049236522683126715167413167128001020800002080000267148111800211091080000108000001080000008003500035800356035390005020002160222673099280000102673326733267332673326734
80024267332001101101011022671731818162580010108000010800005011676600049236352684426737166853167128001020800002080000267328111800211091080000108000001080020214280057001598003860194219105020002160222672999280000102673326733267332673326716
8002426733200101100116103267172181802580010108000010800005011702040049236522683826739166673166958001020800002080000267328111800211091080000108000001080020204280057102598003860584219015020502160222672999280000102673326733267332673326734
8002426733200101100980012671721818162580010108000010800005011687910049236522684426738167893167138001020800002080000267148111800211091080000108000001080020194280057101218000061194219005020502160222674190280000102671626734267342673426716
800242673220010010071000267172181816258001010800001080000501167660004923652268112673316736316712800102080000208000026733811180021109108000010800000108002020428001910059800386157018005020002160222671299280000102673426733267362673326733
8002426732200111100651012671701818162580010108000010800005011669600549236342682426741166773167128001020800002080000267336411800211091080000108000001080019214280057102598003861194219205020502160222673099280000102673326733267332673326716
800242673320011010065102267172181812580010108000010800005011702040049236522683526732168103167138001020800002080000267146411800211091080000108000001080019194280019011598003861574219205020002160222671199280000102673326733267332673426733
80024267412001110001071022671701818162580010108000010800005011676601049236522685226738166833167148001020800002080000267328111800211091080000108000001080020204280019101608003861574219005020042160222672990280000102673326733267332688926715
8002426732200110000871032671721818162580010108000010800005011676600549236522682826733166913167068001020800002080000267326411800211091080000108000001080019224180057100598000001194219105020502160222672990280000102671626716267162673426733
8002426732200110011151002670721818122580010108000010800005011675991549236522691726750168423166958001020800002080000267328111800211091080000108000001080019194280057000598003861574219205020542160222671190280000102673326733267332673326734