Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRSH (register, sxtw, 64-bit)

Test 1: uops

Code:

  ldrsh x0, [x6, w7, sxtw]
  mov x7, #4
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)030e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
10053893114110138321812122510001000100014838138938921632471000100020003897111100110001000010003910350391039613539732161138610641000390395395395395
1004394300461023792181812251000100010001483803943942173252100010002000394711110011000100001000391035039103561353973116113866621000390390390395390
100438930041002379218181625100010001000148381389389216324710001000200039471111001100010000100039103503510356135397311611386101021000390395390395395
10043893004500137921818122510001000100015018139438921632471000100020003897111100110001000010003910390391036613543731161138610621000390390395390395
1004389300451023742181216251000100010001498913913942173247100010002000389711110011000100001000391039035103961354373116113886621000392390390395395
10043942004500137421212162510001000100014838138939421732471000100020003947111100110001000010003910350351039613543731161139110621000398390395395390
1004389300410013792181213251000100010001501803943912163252100010002000391711110011000100001000391035039103561353973116114026621000395395390390390
1004394300411013792181212251000100010001501803893892123247100010002000394711110011000100001000391035039103561353973116113866621000395392395395398
10043893004110137421818122510001000100014838039138921232521000100020003947111100110001000010003910390391039613643731161138610621000390395395390390
10043893004100137921818112510001000100014989139139421232521000100020003947111100110001000010003910390351039613539731161139110621000390390392390391

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrsh x0, [x6, w7, sxtw]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
402057004152500000221007002069764597092540100301031000130100100006160053342206149669707003570050646460364953401003020010000602002000070035351140201100991001000030100100000100100000010000000010000110100261017111698193000606910000301007005770062700577004270057
402047004152510101010007003569781596952540104301001000130100100006161753342206149669707005070035646310364953401003020010000602002000070050351140201100991001000030100100000100100000010000000010000101000261017111698043000696910000301007005770054700427004270057
402047005352510110010007002069781596952540100301001000130100100006161753342206149669707005070050646310364953401003020010000602002000070035351140201100991001000030100100000100100000010000010010000000000261017111698133000366610000301007003670048700497004870048
402047004752400000011007002169764597092540100301031000130100100006160053342206149669707003570050646310364938401003020010000602002000070041351140201100991001000030100100000100100012110002001110000100000261017111698133000306910000301007005170051700517005170036
402047005052400000010007002069735597092540104301031000130100100006160053341470149669557011270056646430364938403133020010000602002000070050351140201100991001000030100100000100100000010000000010000101000261017111697983000306010000301007005170051700517004870051
402047003552501000010007002069787597012540108301001000130100100006161753342350149669707005070050646310364938401003020010000602002000070047351140201100991001000030100100000100100000010000000010000101000261017111698203000006910000301007005070036700517003670051
402047003552400000010007003569702597152540104301031000130100100006161753342062149669707005070050646310364953401003020010000602002000070041351140201100991001000030100100000100100000110000010010000101000261017111698133000399910000301007005170051700367005270051
40204700355250000006720007003269781597122540104301031000130100100006160053342206149669707005070047646460364938401003020010000602002000070050351140201100991001000030100100000100100000110000000010000100000261017111698103000390010000301007005170051700517005170051
402047004752500000060107003569764597092540104301031000030100100006160153342062149669707005070050646510364938401003020010000602002000070050351140201100991001000030100100000100100000010000000010000110100261017111698163000399910000301007003670051700367004870036
402047005052400000110007003269781597063840100301001000130100100006160053341470149669557003570047646460364950401003020010000602002000070050351140201100991001000030100100000100100000110000001110000101000261017111697983000399010000301007005170051700517005170051

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)030e0f1e22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
40025700475250010070032697285970625400143001310002300101000061695233420620496696770047700476466536497240010300201000060020200007004735114002110910100003001010000010100001100000010000110252017111698123000366610000300107004870048700487004870048
400247004752400110700326972859706254001430013100013001010000616952334206204966967700477004764665246497240010300201000060020200007005135114002110910100003001010000110100001100000010000110252017111698763000366610000300107004870048700487004870048
40024700475240011070032697285970625400143001310001300101000061695233420620496696770047700476466536497240010300201000060020200007004735114002110910100003001010000010100001100000010000110252017111698543000366610000300107004870048700487004870048
40024700475240061070032697285970625400143001310001300101000061695233420620496696770050700476466536497240010300201000060020200007004735114002110910100003001010000110100001100000010000110252017112698143000366610000300107005470048700487004870058
40024700475250111070032697285970625400103001310001300101000061695233449380496696770047700476466536497240010300201000060020200007004735114002110910100003001010000010100001100000010000110252017112698133000366610000300107004870048700487004870048
40024700475250001070032697285970625400143001310001300101000061695233420620496696770047700476465336497240010300201000060020200007004735114002110910100003001010000010100001100001010000110252017112698363000066610000300107004870048700367004870048
40024700475240111070032697285970625400143001310001300101000061695233420620496696770047700476466536497240010300201000060020200007004735114002110910100003001010000010100001100037010000110252027112698673000366610000300107004870048700487004870048
40024700355240011070032697285970625400143001310001300101000061695233420620496696770047700476466536497240010300201000060020200007004735114002110910100003001010000010100001100000010000111252017112698363000360610000300107004870048700487003670048
40024700475250011070032697285970625400143001310001300101000061695233420620496696770047700476466536497240010300201000060020200007004735114002110910100003001010000010100001100000010000110252017111698613000366610000300107010870049700577004870036
40024700495250011070032697285970625400143001310001300101000061695233420620496696770047700476466536497240010300201000060020200007004735114002110910100003001010000010100000100000010000110252017112698123000366610000300107004870048700547004870048

Test 3: Latency 1->3 (with chain penalty)

Chain cycles: 3

Code:

  ldrsh x0, [x6, w7, sxtw]
  eor x8, x8, x0
  eor x8, x8, x0
  add x7, x7, x8
  mov x7, #4
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0057

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f191e223a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570057525111110021170026697925971625401083010610002301001000061606833425424966977070057700576465336496040100302001000060200200007005835114020110099100100003010010000010010003111000201110000111120261017111698203000610101010000301007005870058700587005870058
4020470057525100101021170042697885971625401083010610002301001000061606833425424966977070057700576465336496140100302001000060200200007011835114020110099100100003010010000010010002111000301110000111110261017111698203000610101010000301007005870042700587005870058
4020470057525111100020170042697885971625401043010610002301001000061606833425424966980070057700576465936496040100302001000060200200007010635114020110099100100003010010000010010002211000301110000111100261017111698203000310101010000301007005870058700587005870058
40204700575251001000291170042697885971625401083010610002301001000061606833425424966977070057700416465336496040100302001000060200200007010535114020110099100100003010010000010010001101000200110000111100261017111698203000610101010000301007005870058700587005870058
4020470057525111000021170042697885971625401083011510002301001000061606833425424966977070061700576465336496440100302001000060200200007010535114020110099100100003010010000010010002211000201110000111100261017111698203000610101010000301007005870058700587005870058
4020470057524111100021170042697885971625401083010610002301001000061606833425424966977070495700576465336532640100302001000060200200007009435114020110099100100003010010000010010002211000200110000011110261017111698203000610101010000301007005870058700587005870058
402047005752510010002107004269788597162540108301061000230100100006160683342542496697707004170057646533649604010030200100006020020000700873511402011009910010000301001000001001000121100010011000011112026101711169820300061010010000301007005870058700587005870058
4020470057524111200051170042697885971625401083010610002301001000061606833425424966977070057700576465336496040100302001000060200200007006035114020110099100100003010010000010010001111000100110000111110261017111698203000610101010000301007005870058700427005870058
4020470057524101100021170042697885971625401083010610002301001000061606833425424966977070057700576465336496040100302001000060200200007005935114020110099100100003010010000010010002211000200110000111100261017111698203000610101010000301007005870058700587005870058
402047005752510110002117004269788597162540104301061000230100100006160683342542496697707005770057646533649604010030200100006020020000701043511402011009910010000301001000011001000121100010216010000111110261017111698203000610101010000301007005870058700587005870058

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)03mmu table walk data (08)0f1e1f2223243f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cdcfl1i cache miss demand (d3)d5map dispatch bubble (d6)d9ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257004752500100007003569728597092540014300101000130010100006169823342062049669677005070035646650364972400103002010000600202000070053351140021109101000030010100001101000011000000010000110025200171211698103000396010000300107003670051700517005170051
400247005052400100007002069760596952540010300101000130010100006169523342206149669677003570047646680364960400103002010000600202000070050351140021109101000030010100001101000011000000310000100025200171210698013000360910000300107005170052700517003670048
400247005052500100007003569760597092540010300101000130010100006170683341470049669557005070035646730364975400103002010000600202000070050351140021109101000030010100000101000011000000010000100025200171221697983000090910000300107005170036700487005170051
400247003552400601007002069760596952540014300131000130010100006170683342062149669557005070050646680364960400103002010000600202000070035351140021109101000030010100000101000001000000010000110025200171211698133000396010000300107005170036700517005170051
400247005052400000007002069728597092540014300131000130010100006170683342206149669707005070050646680364960400103002010000600202000070050351140021109101000030010100000101000011000000010000010025200171311697983000090910000300107007970127700517005170036
400247005052400100007002069760597092540014300131000130010100006169523342062149669557005070035646680364975400103002010000600202000070050351140021109101000030010100000101000011000000010000110025200171211698133000306910000300107005170036700367003670036
400247005052500100007003569760597092540014300131000430010100006169823342206149669557003570050646680364975400103002010000600202000070050351140021109101000030010100000101000011000000310000110025200171211697983000396010000300107004870051700367009470040
400247005152400600007003369728596952540014300131000130010100006169823342062149669707005070050646680364975400103002010000600202000070050351140021109101000030010100000101000001000000010000110025200171211698133000396610000300107003670051700517005170051
400247005052400000007002069760596952540014300101000130010100006169823341470149669707005070050646530364960400103002010000600202000070050351140021109101000030010100000101000011000000010000010025200171211698103000369610000300107005170036700517005170048
400247004752500101007003569760597092540014300131000030010100006169823342206149669707005070050646530364975400103002010000600202000070050351140021109101000030010100000101000001000000010000110025200171211698133000096010000300107005170051700517005170048

Test 4: throughput

Count: 8

Code:

  ldrsh x0, [x6, w7, sxtw]
  ldrsh x0, [x6, w7, sxtw]
  ldrsh x0, [x6, w7, sxtw]
  ldrsh x0, [x6, w7, sxtw]
  ldrsh x0, [x6, w7, sxtw]
  ldrsh x0, [x6, w7, sxtw]
  ldrsh x0, [x6, w7, sxtw]
  ldrsh x0, [x6, w7, sxtw]
  mov x7, 8

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526732201110006510026692218181225801001008000010080014500116787514923647267222672216655616674801142008002420016004826733881180201100991008000010080000110080020214280058000598003761574119011151180160026730902800001002673426715267332673326746
80204267142001110165002267182181817258010010080000100800165001167561149236532673226714166606166858011520080024200160048267257111802011009910080000100800000100800000398003600035800350135390011151180160026719662800001002672326708267082670826723
80204267072000000101022671721801625801001008000010080016500116956314923652267322673216659616683801152008002420016004826734811180201100991008000010080000010080019194280057002598003860574219111151180160026826662800001002672326723267232672326723
802042672220000000450012670721218122580100100800001008001450011771161492364226722267221665061665980114200800242001600482672871118020110099100800001008000001008000003980000000368003861574219111151180160026724062800001002672326723267232672326723
8020426722200000007410326717218181625801001008000010080015500116975714923634267322673216660616684801162008002420016004826736821180201100991008000010080000110080020204280057001598003861574219011151180160026723992800001002673426733267332673326734
8020426733200100006510326717218181525801001008000010080016500116975704923652267322673216660616685801152008002420016004826742811180201100991008000010080000010080019204280056100588003861574219111151180160026730992800001002673326733267332673326733
8020426732200100006500326717218181625801001008000010080015500116975704923652267322673216660616685801152008002420016004826737811180201100991008000010080000010080020204280058000598003860574219211151180160026729992800001002673426734267342673326733
802042673220011100641022671820181625801001008000010080015500117738904923652267322673316642616684801132008002420016004826735811180201100991008000010080000010080021194180057101608000061574219111151180160026734992800001002673326733267332673326734
8020426733200111006510026717218181625801001008000010080016500116956304923652267322671416660616684801142008002420016004826732811180201100991008000010080000010080019204280057001628000061574219111151180160026719662800001002670826723267232672326708
802042672220000000010126707018181225801001008000010080014500116787514923642267222672216650616674801142008002420016004826740811180201100991008000010080000010080019204280059000598003861574219211151180160026729990800001002673326734267342673426733

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3341

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
800252673620000000100570112672121212162580010108000010800005011667501492364826730267271668231684680010208000020160000267146411800211090108000010800000108002019430800591012180041611943190502071624267051010480000102670926728267282672826729
80024267282000000100057010267122121216258001010800001080000501168843049236472670826727166813167248001020800002016000026736641180021109010800001080000010800202000800591012180000005901925020316242672400080000102672926729267292672926728
80024267272000000000063010266932120162580010108000010800005011688431492364826727267271668231672280010208000020160000267366411800211090108000010800000108002121430800591016380040615943191502021635267051010480000102672926728267282672826729
800242672820100000000660112671321212825800101080000108000050116675004923648267272670816682316707800102080000201600002672877118002110901080000108000001080000043080000000398003961394300502041624267241010480000102672826728267282672826709
800242672720100000000450012671221201625800101080000108000050116675004923663267372675516701316716800102080000201600002673685218002110901080000108000001080021204408005910193800406004300502021642267251010480000102672826728267282672826732
800242672820000000000450012671321212162580010108000010800005011668960492364926741267451668131671680010208000020160000267366411800211090108000010800000108002020008001910261800406104300502021624267241010080000102672926729267092672926728
80024267272000000110067012267213770258001010800001180000501167298049236612672826716166723166888001020800002016000026727771180021109010800001080000010800000430800390003980039011943191502041642267341313080000102673826737267382671526737
800242673620011100000450112671321212162580010108000010800005011688430492364826731267281680631670780010208000020160000267087711800211090108000010800000108000000080039000398003961394300502021644267241010480000102672826728267292672926728
8002426708200000000004500026712012121833800101080000108000050116689604923635269312673616681316716800102080000201600002673785118002110901080000108000001080019214308001910221800006139000502021624267051010480000102672826728267282672826728
800242672720000000000001126693212121625800101080000108000050116996204923657267292673216654316707800102080000201600002670877118002110901080000108000001080000000800390003980000615943192502021642267371313580000102671626737267372673726737