Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LDR (32-bit)

Test 1: uops

Code:

  ldr w0, [x6]
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires: 1.000

Issues: 1.000

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 0.000

retire uop (01)cycle (02)03090e0f1e22233a3f4346494f51schedule uop (52)schedule ldst uop (55)dispatch ldst uop (58)simd uops in schedulers (5a)606d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst int load (95)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)acafb5b6bbl1d cache miss ld nonspec (bf)cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)f5f6f7f8fd
1005394310045001379201202510001000100014219139439721732521000100010003945611100110001000110004310393910006139437311621399101041000395395375375376
10043743011450003790001625100010001000140601394374217323210001000100039477111001100010000100001000391039013943732162139101041000395395375396375
10043943000010135901212162510001000100015037039439421732521000100010003947711100110001000110000103939103960007311611371101001000375375395395375
10043743100450013592001625100010001000149890378374217325210001000100039477111001100010000100043103939103961043732162239101041000395395395395375
100437430000000379212016251000100010001501813943742183252100010001000394771110011000100001000431039391039003943732162239110001000395395375395395
1004394200045000360200162510001000100014989037439419732321000100010003747711100110001000010004310393910000139437311611391101001000375375395395375
100439420004510035900016251000100010001406003743741973232100010001000374771110011000100001000010393910390139073216223910041000395395395395395
10043943000451013590120025100010001000149890394394217325210001000100039477111001100010000100001039010396139073116123710001000395375395395376
1004394300045000379212120251000100010001503703943741973232100010001000374771110011000100001000431000391039010437321622391101041000395395375375399
1004394311000113790012025100010001000140600396374217325210001000100039456111001100010001100043100001000600437321622371101041000395395375395375

Test 2: Latency 1->2 (with chain penalty)

Chain cycles: 3

Code:

  ldr w0, [x6]
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0047

retire uop (01)cycle (02)03mmu table walk data (08)090e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
4020570047525000000100007003269735597452540100301031000130111100076155403342375496697070047700476469966498640118302301001060260100107005035114020110099100100003010010000110010000011000010010000100261037111698103000306610000301007004970040704067004270036
4020470035525000100101007003269735597062540100301031000730100100006160153342062496695570047700356463136495840100306961000060200100007003635114020110099100100003010010000010010000011000000010000110261017111699463000306010000301007003670048700367004870048
4020470047525000000000007002069764597072540160301031000130100100006160153342062496696770047700476464336498540100302001000060200100007004735114020110099100100003010010000010010000011000000010000110261017111698103000366010000301007003870048700487004970048
40204700355240011003901007003269735597062540100301031000130100100006160153342254496695670050702176477736498940100302001000060200100007004735114020110099100100003010010000010010000001000000010000110261017111697983000066610000301007003870395700507004870048
4020470047525010000101007003269735597062540104301031000130100100006160153342062496696770047700356464336497840100302001000060200100007003535114020110099100100003010010000010010000011000010010012110261017111700893000366610000301007004870048700367004870048
4020470047524000000101007002069781596952540100301031000130100100006160153342062496697070035700356464636499040100302001000060200100007005035114020110099100100003010010000110010000011000010010000100261017111698133000096010000301007005170054701337017870051
402047005052400000016010070025697815969525401003010310000301001000061600533422064966970700507003564646364957401003020010000602001000070035351140201100991001000030100100000100100000110000001810000110261017111698103000306610000301007004870048700367004870048
4020470035525000100100007002469735597062540104301001000030100100006160153342062496696770035700476475036497540100302001000060200100007005135114020110099100100003010010000010010000001000042010000112261017112698163000360610000301007005170048700557031570051
4020470050524001000152641007002069764598262540104301001000130100100006161753342062496696770035700476463136500540100302001000060200101647003535114020110099100100003010010000010010000001000000010000110261017111697983000306610000301007004870048700367004870048
4020470047524100100001007003569764596962540104301001000130100100006160153342062496696770035700356464336495640100302001000060200100007005035114020110099100100003010010000010010000011000000010000110261017111698133000306610000301007004870048700367004870048

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 4.0050

retire uop (01)cycle (02)03090e0f18191e1f22233f4d4f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
400257004752410000235010700326972859709254001430013100023001010000616982334147001496696770035700506476836497540010300201000060020100007003535114002110910100003001010000010100001100000010000110252027111698133000396010000300107003670051700517005170036
40024700355250110042010700356974359695254001430010100013001010000616982334147000496697070050700506465936496040010300201000060020100007005035114002110910100003001010000010100000100000310000100252027111697983000390910000300107005170048700367005170048
400247004752400000549010700206976059695484002230022100013001010000617068334220600496697070050700356474636496040010300201000060020100007005035114002110910100003001010000010100000100000010000110252017111698553000306910000300107005170051700517003670048
40024700505240000046010700356974359695254001430013100013001010000616982334220600496697070050700356466836497540010300201000060020100007003535114002110910100003001010000110100001100000010000110252017111698133000396910000300107005170051700517005170051
40024700505240000036010700356976059709254001430013100013001010000616982334147000496697070050700356466536497240010300201000060020100007005035114002110910100003001010000010100001100000010000110255417111698103000390910000300107004870051700517003870090
40024700505250000061010700356976059709254001030013100003001010000617068334220600496695570050700356466536496040010300201000060020100007005035114002110910100003001010000010100000100000010000110252017111698133000396910000300107005170051700517005170051
40024700355250000034010700356972859709254001030013100013001010000616982334220600496695570047700506466536497540010300201000060020100007003535114002110910100003001010000010100000100000010000100252027111697983000366910000300107005170051700367005170051
400247005052500000550010700356979259706254001030013100013001010000616982334220600496697070035700506476636497540010300201000060020100667005035114002110910100003001010000010100001100000310000110252017111697983000306910000300107003970048700447005670051
400247005052401000717010700356974359709254001430013100013001010000617068334220600986695570053700356466636497840010300201000060020100007003535114002110910100003001010000010100001100000010000110252017111698103000390910000300107003670051700517005170036
40024700475240110034010700206976059695254001430013100013001010000616982334220600496695570050700506466836497540010300201000060020100007004735114002110910100003001010000010100001100000010000110252017111698133000099910000300107005170051700367005170036

Test 3: throughput

Count: 8

Code:

  ldr w0, [x6]
  ldr w0, [x6]
  ldr w0, [x6]
  ldr w0, [x6]
  ldr w0, [x6]
  ldr w0, [x6]
  ldr w0, [x6]
  ldr w0, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)l2 tlb miss data (0b)0e0f191e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)60696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)92inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8020526740200110006610326721077202580100100800001008000050011675551492365726715267361663731669480100200800002008000026737851180201100990100800001008000001008002020430800591016480039615901915110216222671100135800001002673726737267372671526715
80204267362001111067003267213701925801001008000010080000500117243704923657267362673616659316672801002008000020080000267148511802011009901008000010080000010080019214308001910121800396019431915110216222671101305800001002673726737267152673726738
802042673720010010670002669937719258010010080000100800005001167142149236562671426736166373166958010020080000200800002673664118020110099010080000100800000100800211943080058101618003901194319151102162226734013135800001002671526737267152673726738
8020426714200110106700226721279025801001008000010080000500116651914923634267142673616637316672801002008000020080000270166411802011009901008000010080000010080021194308005910160800406059431905110216222673301370800001002673726737267152673726715
8020426767200101102110326699279202580100100800001008000050011672240492365626714267361665931669480100200800002008000026737641180201100990100800001008000001008002019430800581016080040611901925110216222671100135800001002673826737267152673726737
8020426736200111006610226699377182580100100800001008000050011673881492365626714267361665831667280100200800002008000026736641180201100990100800001008000001008001921430800590026480000615901915110216222673301305800001002673726715267372671526737
802042673620010000211022672100919258010010080000100800005001167142049236462673626736166613166728010020080000200800002673685118020110099010080000100800000100800191943080059101618000061584319251102162226711013135800001002673726737267152673726737
80204267362001111067000267223072025801001008000010080000500116986804923656267362671416658316695801002008000020080000267366411802011009901008000010080000010080019190080058101218004061194319051102162226733013135800001002671526737267372673726737
8020426714201111006700326699271519258010010080000100800005001168432149236752673626737166593166968010020080000200800002673685118020110099010080000100800000100800192000800190106180041615901905110216222673301375800001002673726715267372673726738
8020426737200111106700326721270192580100100800001008000050011698680492365626714267361665831669580100200800002008000026736851180201100990100800001008000001008002019008005800061800406019431815110216222673300135800001002673726737267372673726737

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule ldst uop (55)dispatch int uop (56)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)5f6061696a6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map int uop inputs (7f)map ldst uop inputs (80)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int load (95)inst int alu (97)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? int retires (ef)f5f6f7f8fd
8002526722200110100650002671721818162580010108000010800005011694850104923642267222672216667316694800102080000208000026732641180021109108000010800001108002019428005710159800396119421910502050516232672999280000102673426734267342673326733
8002426715200000010000226693001216258001010800001080000501171143015492364726722267281667231668880010208000020800002672756118002110910800001080000010800000080000000398000060000005020503163326724100280000102670926723267232670926729
800242672220000000000002669321200258001010800001080000501167605015492364826708267271667231668880010208000020800002672771118002110910800001080000110800000080000000428003860394300050205031633267241010080000102670926723267232672326728
80024267302000000114100026712000025800101080000108000050117269001549236472672226708166723166888001020800002080000267287111800211091080000108000001080000039800390003580000613900005020512162326724610080000102672326709267282670926723
800242670820011000021101267171180025800101080000108000050116760501549236482670826708166723167078001022800002080000267227111800211091080000108000001080000043800390000800356104300050205131623267051010080000102672926729267092672826729
800242672820000001041100267072018025800101080000108000050117295401549236422670826727166523167028001020800002080000267225611800211091080000108000001080000008000000035800356135390005020513163326719610280000102672326723267232670926723
8002426708200000000410012669321218025800101080000108000050117045501549236282672726728166723167078001020800002080000267277111800211091080000108000001080000039800390103980035603539000502051316232671966280000102670926723267262670926736
8002426722200100000451022671221200258001010800001080000501171390015492365326732267321667731671280010208000020800002673264118002110910800001080000010800211908005710262800356039430015020513163326724610480000102672926709267282672826709
8002426727200000000450012671221201625800101080000108000050117142301549236282672726727166553167078001020800002080000267087111800211091080000108000001080000039800390003580000610430005020513163326725010080000102672926729267292672926728
800242672720010000145002267130120162580010108000010800005011672980154923648267282672716781316707800102080000208000026708561180021109108000010800000108000003980039000398003760354300050205131632267241010280000102672926728267282672926709