Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1 (single, H)

Test 1: uops

Code:

  ld1 { v0.h }[1], [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.001

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.001

retire uop (01)cycle (02)03mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)0e0f18191e223a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f6061696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
62005293572202100200100314561287771017320200110011000100010005000119501000226182912729307310200010001000100020002918029151116100110001000010000100003100020012991915568663079966206253076382110565428421165261375015336100010002926929222291892930829191
6200429255220250025000030455328809001719020001001100010001000500011949410226322907629184310200010001000100020002908329158116100110001000010000100101021000210128349198688730871349205333106381312565028326163491390314985100010002919729312292402915629359
620042923421919002000003145802871510172892001100110001000100050001194600022669291262930931020001000100010002000291232905311610011000100001000310010941001203129188989677230391254206883112381910484928344163021386914989100010002927729183292782932029266
620042925921925002100003046192871210172192001100110001000100050001193500022665290332922631020001000100010002000290932907811610011000100001000310010181000210128499111679330191147206033118382014515428372162861402315280100010002939329257292632924029228
62004292722191900210000204957287471017334200110011000100010005029119455002259729076292653102000100010001000200029164290341161001100010000100031000071001313129839182683430601154206393115381914515228340166341406315024100010002922529218292792931829251
6200429341218220021000030467228758001729220011000100010001000500011944500227222908329257310200010001000100020002907729074116100110001000010000100008210002131301592946861308094320556306138179464928366162931385515273100010002936129204292632922129207
62004292832202700250000314650287411117257200110001000100010005000119357002261729065293843102000100010001000200029104291241161001100010000100001000099100121012916925068543071846206453133381212525428379165891417914982100010002936929330292472933729299
62004293092192600170000014835287570117331200110011000100010005000119526002260929051292583102000100010001000200029023290621161001100010000100001000099100121312926925468073014195320586308138179505428322163831387615176100010002925829273293372920729306
62004292532192400200000714606288740017268200110011000100010005000119546002260429033292993102000100010001000200029134291511161001100010000100031000111000203129679279692230171054207273048381416505328400164891391315130100010002923229270292942933329204
620042922722022002201000145382877010172652000100010001000100050011194930022649290592920131020001000100010002000290832926711610011000100001000310010751001212129059043684931011552205533110381714555628309161961380215033100010002921629157292122920329280

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1 { v0.h }[1], [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0051

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f22233a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch mispred nonspec (cb)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
602051400511049001000001010014042113945612936825701044010020002100003010020000100001263985669402214309667114003314005714005713180331323926010030200100002000060200100003000014005714004111502011009910040100100001000011001000220100020111100001101000321011391113956840000101010100001000040100140052140052140052140036140052
602041400511049000000001000014002013943012936825701024010020014100003010020000100001263814669402214312591014002714005114005113179331323826010030200100002000060200100003000014005114005111502011009910040100100001000001001000110100020011100001111100321011391113956840000101010100001000040100140425140057140052140052140052
6020414005110490000000010000140036139427129363257010240100200021000030100200001000012640206693734143109391140013140051140035131797313239960100302001000020000602001000030000140035140051115020110099100401001000010000010010002101000100111000011110003210113911139568400000100100001000040100140052140036140052140036140036
60204140417104900000000000001401261394301293682570104401002000410000301002000010000126398566963571431259111400331400571400571318033132392601003020010000200006020010000300001400571400571150201100991004010010000100000100100000110000000010000100000032101126111395594000010100100001000040100140038140436140055140052140036
602041400351049000000001001014003613940612936410770100401002000010000301002000010000126402066938781431093911400271403331401281317933132382601003020010000200006020010000300001400571400511150201100991004010010000100000100100000110000010010001001000040612368111416734021710100100001000040100142022143337143343143380143556
602041435501074000100373648971936010143772141548130417157670772405472017710008301002000010000126398566940221431259111400171400411400611318013132393603953020010000201076020010000300001400571400571150201100991004010010000100001100100000110000000010032101000032101126111395494000010010100001000040100140052140052140052140052140052
6020414005110490000000000010140020139430129368257010440100200021000030100200001000012639856693244143125911140033140057140057131803313238260100302001021420000602001000030000140053140051115020110099100401001000010000110010002211000200111000011111003210111411139568400000100100001000040100140052140052140052140036140194
60204140038104800000000100101400361394301293682570104401002000410000301002000010000126398566940221431259111400271400511400351317973132399601003020010000200006020010000300001400351400511150201100991004010010000100000100100000110000000010000101000032101126111395464000001010100001000040100140042140058140058140058140058
602041400571049100100001001114002713940612936325701004010020002100003010020000100001264020669373414310939114002714005114003513179731323826010030200100002042760200100003000014005114003511502011009910040100100001000011001000211100010001100001111100321011142113956840000101010100001000040100140054140402140052140036140052
6020414009710490000010010010140036139406129347257010040100200021000030100200001000012639586693734143109391140012140051140051131797313238260100302001000020000602001000030000140051140051115020110099100401001000010000110010000001000000001000000100003210117311139559400000010100001000040100140036140052140036140052140055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0047

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f1e2223243a3f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd0d2l1i cache miss demand (d3)d5map dispatch bubble (d6)d9ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
60025140047104900101021001140038139400129353257001240010200041000030010200001000012645076693830143264371014001701400531400411318213132436600103002010000200006002010000300001400531400531150021109104001010000100000101000231100020111100001111003140000411302213957240000666100001000040010140054140054140054140042140054
6002414010810491011001100114003813940012936825700144001020004100003001020000100001264543669383014326437101400290140053140053131821313243660010300201000020000600201000030000140053140053115002110910400101000010000010100022110002011100000111203140000211302213956040000606100001000040010140042140054140054140042140042
600241400531049110100210001401431394001293642570014400102000210000300102000010000126450766984481432908910140029014004114009513183650313349660010301811005320108619661000030324140244140141115002110910400101000010000010100024110002017100031101103140000211302213956040008066100001000040010140054140054140054140054140054
6002414005310491111002100114003913940012936425700144001020002100003001020000100001264507669383014326437101400290140053140054131821313254960010305031007520216606621000030000140056140053115002110910400101000010000010100012110002304100000111103140000211302213957240000666100001000040010140057140045140058140054140054
60024140053104911111114100114008413940012936725700144001020004100003001020000100001266852669589414326437001400290140053140053131821313243660010300201000020000600201000030000140053140053115002110910400101000010000110100022110002011100001111203140000211302213957240000666100001000040010140054140054140054140054140054
6002414005310491111002100114003813940012936425700144001020004100003001020000100561264507669383014326437101400290140053140053131821313243660010300201000020000600201000030000140053140041115002110910400101000010000010100011110001011100001111103140000211302213956040000666100001000040010140054140054140054140054140054
6002414004110481100002000014003813940012936425700144001020004100003001020000100001264507669383014326437101400290140054140041131821313243660010300201006420000600201000030000140041140053115002110910400101000010000010100022110002011100000111003140000211302213957240000666100001000040010140054140054140054140054140044
6002414004110491100001000114003813938812936425700144001020004100003016020000100001264507669383014326437001400290140053140053131821313243660010300201000020000600201000030000140053140053115002110910400101000010000010100022010002011100001111003140000211302213957240000006100001000040010140054140054140054140054140054
6002414005310491111002100114003813940012936425700144001020004100003001020000100001264507669383014326437001400170140053140053131821313243660010300201000020000600201000030000140053140041115002110910400101000010000010100032110002001100001111003140000211302213957240000666100001000040010140054140042140054140054140054
6002414005310491101002100114003813940012936425700144001020004100003001020000100001264507669383014326437001400170140053140041131821313243660010300201006220000600201000030000140056140053115002110910400101000010000110100033110003014100001111003177000211302213957240000666100001000040010140054140054140054140054140042

Test 3: throughput

Count: 8

Code:

  ld1 { v0.h }[1], [x6]
  ld1 { v0.h }[1], [x6]
  ld1 { v0.h }[1], [x6]
  ld1 { v0.h }[1], [x6]
  ld1 { v0.h }[1], [x6]
  ld1 { v0.h }[1], [x6]
  ld1 { v0.h }[1], [x6]
  ld1 { v0.h }[1], [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 2.0007

retire uop (01)cycle (02)03l2 tlb miss data (0b)0e0f1e1f22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)6061696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd0d5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160205160056119900045000016004521211596900251601021008000280133100800008000050042204122939575151600411600401600601396753140018160100200800008000020080000160000160060160056118020110099100100800008000001008000008003800080168613944000511051161116003714078000080000100160061160061160057160057160061
160204160056119900000101160025212015968402516010010080000800001008000080000500421017229395471516004116004016005613969531400191601002008000080000200800001600001600601600561180201100991001008000080000010080000438000010080038013944000511051161116005314078000080000100160061160061160061160041160061
1602041600401199000440001160045211159685172516010210080004800001008000080000500421017229395471516004116006016006013969531399981601002008000080000200800001600001600401600561180201100991001008000080000010080000438003800080038000430005110511611160053141048000080000100160057160061160041160041160041
160204160040121000000101160045211159690025160244100800028000010080000800005004222042293957515160042160060160060139695314001816010020080000800002008000016000016004016004011802011009910010080000800000100800000800000008003861380000511051161116005701478000080000100160061160061160061160061160041
160204160060119900044000116004101121596841625160100100800028000010080000800005004222042293994715160041160040160040139695313999816010020080000800002008000016000016004016005611802011009910010080000800000100800000800000008003860044000511051161116005710078000080000100160061160041160061160061160061
1602041600401199000440000160045210159690162516010010080002800001008000080000500422226229395591516004116006016011013969531400181601002008000080000200800001600001600561600561180201100991001008000080000010080000080000003880000603900005110511611160057101048000080000100160057160061160061160061160061
1602041600561199000001011600252012159684162516010210080000800001008000080000500422232229395591516004116006016006013969531400181601002008000080000200800001600001600601600561180201100991001008000080000110080000438005400080038610440005110511611160037141408000080000100160061160041160061160041160061
16020416006011980000010116004500115968502516010210080000800001008000080000500422232229381471516002116006016006013967531400181601002008000080000200800001600001600401600561180201100991001008000080000010080000438003810388003860000005137511611160037141008000080000100160057160041160057160041160041
160204160056119800044010016004120121596841625160102100800028000010080000800005004210292293994715160041160060160056139691314001816010020080000800002008000016000016004016005611802011009910010080000800000100800004380039000800386139430005110511611160053141478000080000100160061160061160061160061160061
160204160040119900044010116004521121596731925160102100800028000010080000800005004222042293814715160041160060160040139695313999816010020080000800002008000016000016006016005611802011009910010080000800000100800004380000000800006039440005110511611160037141078000080000100160041160057160061160061160041

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 2.0008

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2branch cond mispred nonspec (c5)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)dbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160025160061119910100210021600462181815968502516001410800028000010800008000050421532229400440160042016006116011113971831400411600102080000800002080000160000160061160110118002110910108000080000010800192000800560015980000615701910050201016046160058092800008000010160062160062160062160062160062
160024160061119910100650031600462018159687162516001410800048000010800008000050422410229400440160042016004616004613971831400411600102080000800002080000160000160096160061118002110910108000080000010800191942080057100598000001570192005020616056160058992800008000010160047160062160062160062160047
1600241601091199111006510316004621818159685152516001410800048000010800008000050422384229400150160042016006116006113971831400261600102080000800002080000160000160046160046118002110910108000080000010800201942118800582002180000015742190005020616056160058902800008000010160047160062160062160062160062
160024160061119810000651011600310181815968802516001210800048000010800008000050422388229400440160042016004616006113970331400261600102080000800002080000160340160061160046118002110910108000080000110800191900800551005980038001942192005020716064160058992800008000010160047160047160047160062160062
160025160046119911000650021600462181815968516251600121080004800001080000800005042152122938681016002701600461600611397183140041160010208000080000208000016000016006116006111800211091010800008000001080020190080019000218000061570191005020616066160058992800008000010160062160047160047160047160062
1600251600461199111006500216004621818159685162516001410800048000010800008000050421533229400310160081016006116004613971831400411600102080000800002080000160000160061160046118002110910108000080000110800201900800571002180038605742191005020716066160043992800008000010160062160062160047160062160047
1600241600611198111002100316004601801596851525160012108000480000108000080000504215142294004401600660160061160046139703314004116001020800008000020800001600001600461600611180021109101080000800000108002019420800191115980000615742191005020416046160043902800008000010160062160062160062160062160062
16002416006111991100019900216009420181596851525160014108000480000108000080000504223952293869301600420160061160046139718314004116001020800008000020800001600001600461600611180021109101080000800000108002020420800571005980000611942190005020616064160043092800008000010160108160062160062160047160047
160024160061119910111220011600462018159479162516001412800048000010800008032150421532229400440160027016004616004613969247140052160010208000080000208000016000016006116004611800211091010800008000001080020200080057100598000060570181005020416064160058992800008000010160062160047160047160062160062
16002416004611991110065101160046018181596851625160014108000480000108000080000504215322294002301600270160061160061139718314004116001020801708000020800001600001600611600611180021109101080000800000108002019420800570012180000605742191015020516078160043092800008000010160047160047160062160062160062