Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (single, D)

Test 1: uops

Code:

  ld1 { v0.d }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.001

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.001

retire uop (01)cycle (02)03mmu table walk instruction (07)l2 tlb miss instruction (0a)0e0f1e223a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
620052942622125221040455428762001735320011002100010001000500011899522695290712929431020001000100010002000291612923711610011000100011000310000031002210013582925168573062989208413087381421666528410163431395415345100010002933029360294022942929214
6200429353219232700004629288920017241200210021000100010005000119074226002902929286310200010001000100020002917029149116100110001000110002100000010003130130789245680230031464208073061381316656228457162181418315322100010002934729309293742917629432
6200429335220252000004582288450017296200110011000100010005000119410226472908629204310200010001000100020002909129145116100110001000010003100000010002030132199168684330591565206883055381818646728467162501387715262100010002936629356293122935529482
620042928922024250000463928846001732720011000100010001000500011944022633291122919131020001000100010002000291262914511610011000100011000210000059710010020128319418686231491460206973047381513646228358164811410515236100010002935829320293052929029329
620042942822027200000464728818101736320011001100010001000500011935422662291312933431020001000100010002000291382913211610011000100001000210000001000300012966932867473050863207763060381312746228379162931377415209100010002926629301293632922529358
6200429412220242900804595288131017273200110011000100010005000119543226342914329276310200010001000100020002916129316116100110001000010000100000010002030129249277686430131063207113104381314676328450162271386715256100010002938629315293572933629293
6200429333219243501014596288251017319200110011000100010005000119030226382911029419310200010001000100020002911229056116100110001000110003100000010003000129659252685430971162206053116381514646728434161381381515221100010002928029218292882926729220
6200429277220202100204635288940017308200110011000100010005000119433225732908829268310200010001000100020002914029109116100110001000010000100200010012020135179248681031091363206663284381118606528409159341391115410100010002929529239293592931429275
6200429328220272400004541288290117196200010011000100010005000119070226412903529348310200010001000100020002920529116116100110001000010000100100010003000129829191693630781464206673229381117696328376163441398415168100010002926329367292602929529358
620042936221919220001464728794011731020011001100010001000500011944822654290692914831020001000100010002000291672915611610011000100001000010000031001203012930920268623083963206803149381111646228679163531394015324100010002925529434293552939129404

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.d }[1], [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f18191e1f2223243f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5l1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60205140051104901000001010014003613941112936325701024010020002100003010020000100001264020669373414310939114002714005114005113179703132399601003020010000200006020010000300001400511400511150201100991004010010000100000100100000110000000100001100321021262213955940000101010100001000040100140036140036140052140052140036
60204140035104900000001010014003613941112936325701024010020002100003010020000100001264020669373414310939114002714005114003513179703132399604253020010000200006020010000300001400511400511150201100991004010010000100000100100000010000000100001100321021262213955940000101010100001000040100140052140052140036140052140036
60204140035104900000001010014002013941112936325701024010020002100003010020000100001264020669294714310939114002714005114009913179703132399601003020010000200006020010000300001400511400511150201100991004010010000100000100100000110000000100001100321021263213955940000101010100001000040100140052140052140036140052140052
6020414005110490000000101001400361394111293632570100401002000210000301002000010055126381466937341431093911400271400351400511317970313238260100302001000020000602001000030000140035140051115020110099100401001000010000010010000011000010310000100032102126221396634000001010100001000040100140036140052140052140052140052
6020414005110490000000101001400361394111293632570100401002000010000301002000010000126402066937341431093911400271400511400351317970313239960100302001000020000602001000030000140035140051115020110099100401001000010000010010000001000000010000110032102126221395604000001010100001000040100140052140052140036140036140052
60204140051104900000001010014003613941112936325701024010020002100003010020000100001264020669373414310939114002714005114005113179703132399601003020010000200006020010000300001400511400351150201100991004010010000100000100100000010000000100001000321021273313955940000101010100001000040100140052140052140052140052140052
60204140051104900000000000014002013941112936325701024010020000100003010020000100001264020669294714308701114002714005114005113179303132399601003020010000200006020010000300001400511400561150201100991004010010000100000100100000110000000100001000321031273313955940000101010100001000040100140052140052140052140052140052
6020414005110490000000101001400361394111293472570102401002000210000301002000010000126380366937341431093901400271400511400511317970313239960100302001000020000602001000030000140035140035115020110099100401001000010000010010000011000000010000110032102126221395594000010010100001000040100140036140036140052140052140036
60204140035104900000000010014003613941112936325701024010020002100003010020000100001263803669373414310939014002814005114005113179703132382601003020010000200006020010000300001400541400351150201100991004010010000100000100100000110000000100001100321031262213955940000101010100001000040100140036140052140036140052140052
6020414010610490000000100001400361394111293642570100401002000210000301002000010000126402066929471431093901400111400351400511317970313239960100302001000020000602001000030000140051140051115020110099100401001000010000010010000011000000010000110032103126331395594000001010100001000040100140052140052140052140052140052

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)l2 tlb miss data (0b)0e0f18191e1f2223243f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60025140047104800000001010014041313939712936225700124001020002100003001020000100001264477669387814325829114002614005014005013181503132430600103002010000200006002010000300001400501400472150021109104001010000100000101000011000000100001000314001111391313955440000066100001000040010140036140036140051140036140051
6002414005010490000000100001400351393971293472570010400102000210000300102000010000126442966929471432144211400261400351400501318140313242060010300201000020000600201000030000140050140047115002110910400101000010000010100001100000010000100031400811381113956940000969100001000040010140051140036140048140048140048
600241400351049000000060000140032139401129347257001240010200001000130010200001000012644436693538143258291140011140035140050131818031324376001030020100002000060020100003000014005014004711500211091040010100001000001010000010000001000000003140010111131013956940000909100001000040010140036140051140051140036140036
60024140035104900000005620000140032139394129347257001240010200021000230010200001000012644296692947143260331140023140047140035131815031324206001030020100002012560020100003000014005414004711500211091040010100001000001010000010000001000011003140011113111113956940000000100001000040010140051140051140051140036140036
600241400501049000000010000140020139397129362257001040010200021000030010200001000012644436692947143258291140026140050140035131803031324206001030020100002000060020100003000014004714004711500211091040010100001000001010000010000001000000003140011111121313956940000960100001000040010140051140051140051140036140036
600241400501049000010010000140043139394129348257001240010200001000030152200001000012644776693538143258291140011140050140047131815031324336001030020100002000060020100003000014005014003511500211091040010100001000001010000010000001000001003140011113121213956940000966100001000040010140051140036140051140051140036
60024140050104900000001010014002013940212934725700104001020002100003001020000100001264429669294714321442114002614005514005013180303132433600103002010000200006002010000300001400501400471150021109104001010000100001101000001000040100000100314001311311913956940000000100001000040010140036140051140051140051140051
60024140047104900000000010014003413939412935925700124001020002100003001020000100001264429669390214321442114002314005014005113181803132433600103002010000200006002010000300001400471400471150021109104001010000100000101000001000100100001000314009113131413955440000969100001000040010140051140051140036140048140051
60024140050104900000000000014003513939712936225700124001020000100003001020000100001264429669368514326112114003114005014005013180303132420600103002010000200006002010000300001400501400471150021109104001010000100000101000011000101100001000314001211110713956940000999100001000040010140051140036140036140036140036
600251400531049000000000000140020139397129359257001240010200021000030010200001000012644776692947143261121140011140050140050131818031324336001030020100002000060020100003000014005014003511500211091040010100001000001010000110000001000001003140010113101213955440000009100001000040010140051140052140051140051140051

Test 3: throughput

Count: 8

Code:

  ld1 { v0.d }[1], [x6]
  ld1 { v0.d }[1], [x6]
  ld1 { v0.d }[1], [x6]
  ld1 { v0.d }[1], [x6]
  ld1 { v0.d }[1], [x6]
  ld1 { v0.d }[1], [x6]
  ld1 { v0.d }[1], [x6]
  ld1 { v0.d }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0006

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e1f22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160205160052119900100010041010116003721818159686122516010210080002800001008000080000500421810229392401600331600521600521396873140010160100200800008000020080000160000160052160052118020210099100100800008000001008000003908000000358003561353905110004161116004900628000080000100160041160041160041160053160053
160204160052119910000000041010116003721818159686122516010210080002800001008000080000500421017229381471600331600521600521396873140010160100200800008000020080000160000160052160040118020110099100100800008000001008000003908000000358000061353915110001161116004906028000080000100160053160053160053160053160053
160204160052119900000000041010116003701818159686112516010210080002800001008000080000500421810229392401600211600521600401396873140010160100200800008017120080000160000160052160052118020110099100100800008000001008000003908003500358003660353905138301161116004906628000080000100160053160041160053160053160053
160204160052119900000000000101160037218015968612251601021008000280000100800008000050042181022939240160033160052160052139687314001016010020080000800002008000016000016005216005211802011009910010080000800000100800000390800350035800006135005110001161116003706628000080000100160053160053160053160041160053
160204160052119900000000041000116003721818159690112516010210080002800001008000080000500421810229392401600331600521600521396873140010160100200800008000020080000160000160052160052118020110099100100800008000001008000003908003500358003561353905110001161116004900028000080000100160053160053160053160053160053
1602041600521199000000000410101160025218181596860251601021008001480000100800008000050042181022939228160033160052160052139687314001016010020080000800002008000016000016005216005231802011009910010080000800000100800000390800350035800356103905110001161116003706628000080000100160053160053160053160053160053
1602041600521199000000000001011600372181815969012251601021008000280000100800008000050042181022939203160033160052160052139687314001016010020080000800002008000016000016005216005211802011009910010080000800000100800000390800000035800356135005110001161116003706008000080000100160041160053160053160053160053
16020416005211990000000000000116003721818159686122516010010080002800001008000080000500421810229392401600331600521600521396873140010160100200800008000020080000160000160048160060118020110099100100800008000001008000003908003500388003561353905110001161116003706628000080000100160053160041160041160053160041
16020416005211980000000004100011600372181815969011251601021008000280000100800008000050042101722939240160021160052160040139687313999816010020080000800002008017116000016005216005211802011009910010080000800000100800000008000000358003561353905110001161116004906028000080000100160053160053160053160041160041
1602041600521199000000000410101160037000159686122516010210080002800001008000080000500421017229392401600211600401600401396873139998160100200800008000020080000160000160052160100118020110099100100800008000001008000003908003500358003561353905110001161116004906028000080000100160053160053160053160053160053

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0006

retire uop (01)cycle (02)03l1d tlb fill (05)090e18191e22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)cfd0d2d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160025160052119901000451021600252181815968416251600121080002800001080000800005042206622938147116003301600521600521397093140032160010208000080000208000016000016004016005211800211091010800008000001080000039080039000080039603500502000716471600531064800008000010160057160057160041160107160041
1600241600521199000004500216008621212159690162516001210800008000010800008000050422066229392401160021016005616005213971331400361600102080000800002080000160000160056160052118002110910108000080000010800000390800350003580035613539050200031676160049064800008000010160057160057160057160057160041
1600241600401199000004110216002521812159690162516001210800028000010800008000050421085229392401160021016005616005613971331400361600102080000800002080000160000160056160052118002110910108000080000110800000390800350003980039003539050200181697160053060800008000010160057160057160057160041160041
16002416024011990000245002160025212121596901625160012108000280000108000080000504218782293814701600370160056160052139713314003616001020800008000020800001600001600401600521180021109101080000800000108000003908004001039800396135430502000416441600531064800008000010160041160041160041160057160041
160024160040119900000650031600412121215969002516001210800028000010800008000050422109229392401160037016004016005613969731400201600102080000800002080000160000160052160040118002110910108000080000010800000008003900039800356039005020005164716005310100800008000010160053160057160057160057160057
160024160052119900000450001600412121215968402516001210800028000010800008000050421085229381470160037016005616005613971331400201600102080000800002080000160000160056160052118002110910108000080000010800000390800390000800396035390502000816441601031064800008000010160053160091160041160053160041
160024160040119900000000216004120121596841225160012108000080000108000080000504210852293814711600330160052160056139713314002016001020800008000020800001600001600611600611180021109101080000800000108001920008001910221800006157421950200081643160043992800008000010160062160062160047160047160062
1600241600611199100006510216010301812159684025160012108000280000108000080000504220662293924011600330160056160040139697314003616001020800008000020800001600001600561600521180021109101080000800000108000003908000000039800000139430502000416441600371064800008000010160057160057160106160053160053
160024160040119900000451021600372121215968416251600121080002800001080000800005042108522938147116002101600561600561396973140036160010208000080000208000016000016005616004011800211091010800008000001080000039080039000080039613500502000816341600371064800008000010160041160053160053160053160053
160024160056119900000451021600312181815968515251600121080004800001080000800005042241022940044016004201600611600611397183140041160010208000080000208000016000016004016005211800211091010800008000001080000039080000000128003960350050200081635160103662800008000010160057160057160041160053160053