Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDRH (unsigned offset)

Test 1: uops

Code:

  ldrh w0, [x6, #8]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)92inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
100540331101112110135900102510001000100015208398398197325610001000100037477111001010001000010202043101910121100061190191731161139914071000375375399399375
100439830000014410138907720251000100010001555938140320432611000100010004038511100101000100001000043103900039100001044007311611395131301000404404387382404
100440331111116900138921102510001000100015267398374197323210001000100039456111001010001000110181901059101611039615901907311611378101041000375375395375395
10043943000000450013883772025100010001000159574034032043239100010001000403851110010100010000100004310000004210396104300731161139113051000404404382404403
10044023111000671033790126025100010001000150373943942163256100010001000398771110010100010000101919431019100211040015801917311611379101071000399399399375399
100439820000000001366207025100010001000155553814032253261100010001000403851110010100010001100004310380000100001384400731161139513001000382385404382404
1004403310110021002359210192510001000100015304398398221325610001000100039877111001010001000010192001058100611040605801907311611378141001000399399375399375
1004398300000001003592011925100010001000152743983981973232100010001000398771110010100010000101919431059100631040605901917311611379101001000399399375399399
1004399300000000013660001925100010001000155264033812043261100010001000402641110010100010000100000103800050103861394400731161139113051000404403382404383
1004403310110067000388377202510001000100015509382403204323910001000100040386111001010001000110202001058100211000015843190731161140010071000399399399399375

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldrh w0, [x6, #8]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570051525000001100700416978759715254010430103100023010010000616059334249404966961070056700416463703649594010030200100006020010000700513511402011009910010000301001000011001000001100000000100001010000261057111697983000300910000301007005170048700517005170051
4020470050524000000000700266978759715254010430106100023010010000616059334249404966961070053700566466103649594010030200100006020010000700743511402011009910010000301001000011001000001100000000100001010000261017111698133000396610000301007003670051700517005170051
4020470035524000000000700266978759715254010430106100013010010000616078334249414966976070041700566465203649444010030200100006020010000700773511402011009910010000301001000001001000001100000000100001000000261017111697983000396910000301007005170036700517004870036
4020470035525000000000700326976459709254010430103100003010010000616005334220604966955070050700506464603649534010030200100006020010000700533511402011009910010000301001000001001000130100010001100001111000261017111698133000090910000301007003670036700487005170048
40204700355250000025000700356976459709254010030103100013010010000616005334220604966970070050700356464603649384010030200100006020010000701163511402011009910010000301001000001001000001100000000100001010000261017111698043000696910000301007005770057700577004270042
4020470041525110002000700326978159706254010430100100013010010000616005334220604966955070050700526464903649534010030200100006020010000700733511402011009910010000301001000001001000001100000000100000010000261017111698193000390910000301007005770057700577005470058
4020470043524110002100700266976759695254010430103100013010010051616059334220604966970070314700506469503649914029430200100006020010000700503511402011009910010000301001000001001000001100000000100001010000261017111698193000696910000301007005970057700577005770042
40204700565241010021007003269781597092540100301031000030100100006160323342350049669550700507005064631036493940100302001000060200100007008435114020110099100100003010010000010010000011000000001000010100002610110711698133000096910000301007005170053700397005070052
4020470047524000001000700266978759715254010830103100023010010000616078334235004966961070041700416465203649594010030200100006020010000704203511402011009910010000301001000001001000000100000000100001010000261027111698043000696910000301007005470042700427004270042
4020470041524110002100700206978159695254010430103100003010010000616005334147014966970070050700356464603649534010030200100006020010000700643511402011009910010000301001000001001000221100020011100001111100261017111698133000000910000301007005170051700367005170036

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0051

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f1e223a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4002570051525010010070036697435971025400143001310001300101000061701833423980496697170054700366467236497640010300201000060020100007005435114002110910100003001010000010100001100000010000100252177191069798300000101310000300107005270052700367003670052
4002470035525000010070039697435971025400143001310001300101000061706833414700496697170051700516467236496040010300201000060020100007003535114002110910100003001010000010100001100000010000110252177178698143000313101010000300107003670055700557005270036
400247003552500001007002069778596952540014300131000130010100006212543345470049669747005470035646723649604001030020100006002010000700543511400211091010000300101000001010000110000001000010025218718969817300031310010000300107005870036700557005570052
4002470035525000010070039697755971025400103001010001300101000061706833422540496697170054700516467236497940010300201000060020100007005135114002110910100003001010000010100001100000010000100252187198698143000313101010000300107003670093700527005570055
40024700515250010100700396977859713254001430010100003001010000617018334147004966974700357003564672364960400103002010000600201000070054351140021109101000030010100000101000011000010100001002521107188697983000013131010000300107005570055700557005570055
4002470054525000010070039697785969525400143001310001300101000061701833424230496695570035700516465336496040010300201000060020100007005135114002110910100003001010000010100001100000010000100252187191069817300030101010000300107005570055700557003670036
4002470054524001001070036697805969525400103001310001300101000061699133423980496695570054700356467236496040010300201000060020100007005135114002110910100003001010000010100000100000010000010252187110869817300001301310000300107005570055700527003670036
4002470054525001010070020697435969525400143001310001300101000061706833414700496697470054700546465536497940010300201000060020100007005435114002110910100003001010000010100001100000010000100252177188698143000313101310000300107005570055700367005270036
40024700545240000000700396977859714254001430013100003001010000617068334147004966974700547005164672364979400103002010000600201000070035351140021109101000030010100000101000011000000100001002521971109697983000313131310000300107005570036700557005570055
4002470035525000010070039697785971325400143001310000300101000061700033414700496695570054700546467236497940010300201000060020100007005435114002110910100003001010000110100001100000010000110252197189697983000313101010000300107005270055700367005570052

Test 3: throughput

Count: 8

Code:

  ldrh w0, [x6, #8]
  ldrh w0, [x6, #8]
  ldrh w0, [x6, #8]
  ldrh w0, [x6, #8]
  ldrh w0, [x6, #8]
  ldrh w0, [x6, #8]
  ldrh w0, [x6, #8]
  ldrh w0, [x6, #8]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e223a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80205267362001111166132672237702580100100800001008001550011699490492363426737267361664261666680115200800242008002426740851180201100990100800001008000011008002020008005910161800396058431901115118116112673413135800001002673726737267372673726738
802042673720010100670326722377182580100100800001008001350011674600492363426736267371665931669480100200800002008000026743641180201100990100800001008000001008002021008005801260800396159431901005110216222673313135800001002671526737267382671526737
8020426736200110006612267223772025801001008000010080000500116731604923657267362673616659316694801002008000020080000269298511802011009901008000010080000010080020194308006100061800400159431900005110216222701213135800001002673726741267372673726750
8020426737201100006703267213772035801001008000010080000500116986814923656267372673616659316694801002008000020080000267368511802011009901008000010080000010080020214308005910061800406059431910005110216222673413135800001002673726737267152671526737
8020426736200111007003267213771925801001008000010080000500116768114923656267372673616659316694801002008000020080000267398511802011009901008000010080000010080019194308001900060800006159431910005110216222673313135800001002674126739267372673726738
802042671420110000661326721370192580100100800001008000050011684320492363426737267141665931669480100200800002008000026736861180201100990100800001008000001008002020430800600006080040005843190000511021622267331305800001002673726737267152673726737
8020426736200111006703267210701825801001008000010080000500116746014923634267362673616658316672801002008000020080000267458511802011009901008000010080000010080020204308005900021800396159431900005110216222673313135800001002673826715267152673726737
8020426736201101006703267213771925801001008000010080000500116755504923656267362673616642316695801002008000020080000268929011802011009901008000010080000010080019194308001901161800406159431900005110216222673313130800001002671526738267372671526715
8020426736200100006702267210971925801001008000010080000500116747404923656267422673616658316672801002008000020080000267368511802011009901008000010080000010080020204308005810161800396159431910005110216222673413135800001002671526737267372671526737
8020426736201100006703267213071925801001008000010080000500116755504923656267362671516659316694801002008000020080193267408511802011009901008000010080000010080019194308005810061800406059431900005110216222673313130800001002673726737267372673826737

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfl1i cache miss demand (d3)d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
80025267362001111671032672137720258001010800001080000501165304149236562671526736166813167178001020800002080000267826411800211091080000108000010800192045800590016180041605901915020431611267331313080000102673826716267372674026737
8002426715201100067103267213092025800101080000108000050116710614923634267362673716681316716800102080000208000026740851180021109108000010800001080019204380019000608000060594419150204116112673300580000102673726737267372673726737
8002426736200100167003267220071925800101080000108000050116710914923656267362673616681316717800102080000208000026741851180021109108000010800001080019194380019000218000001594319150204216112673400080000102673726738267382673826716
800242671420010116700326722200222580010108000010800005011672190492365726736267361668231671780010208000020800002672785118002110910800001080000108002021438005810160800406159431915020411611267331313580000102673826737267382673726737
800242673620011012100026700007025800101080000108000050117397504923634267152673616659316695800102080000208000026741851180021109108000010800001080019194380059002218003960190190502041161126792130080000102673726737267382673826737
8002426714200101066102267222700258001010800001080000501167298049236562673726736166823167178001020800002080000267418511800211091080000108000010800212008001910221800006058431815020421611267361313080000102671626737267372673726716
80024267362001000670022672130719258001010800001080000501173975049236562673726736166813167168001020800002080000267178511800211091080000108000010800191908001911160800000159441905020421611267331313580000102673726737267372673726737
800242671420011006700226721377125800101080000108000050116530414923635267142673616681316717800102080000208000026722851180021109108000010800001080019190800191016480000615843190502042161126733130580000102673726737267372673726738
800242671520110106600026722207192580010108000010800005011677910492365626736267371668131671680010208000020800002671685118002110910800001080000108001920438005810060800006019431915020411611267331313080000102673726716267382673826737
8002426736200101067100267210701925800101080000108000050117062204923656267362673716681316694800102080000208000026741851180021109108000010800001080020194380019000218004061584319050204116112673300580000102671526737267372673726738