Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD4 (multiple, 8B)

Test 1: uops

Code:

  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 6.000

Issues: 6.008

Integer unit issues: 0.000

Load/store unit issues: 2.000

SIMD/FP unit issues: 4.008

retire uop (01)cycle (02)03l1i tlb fill (04)mmu table walk data (08)0e0f18191e2223243a3f43464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696b6d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5bbl1d cache miss ld nonspec (bf)cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
66005290382252000004100514128234222159706000401220004000200010000475003230350283162833231060002000400020008000281862816711610011000100002000042004142002224138831005272583430151192273435381817383828020142981214612921200040002821028289282002828328353
66004283262130001004100517128262022159956012401220004000200010000475742230550282302839431060002000400020008000284162812811610011000100002000042000002002044135041004971343392040190983296381513443627975141491230413252200040002851128497284532825928281
66004283492120000000100493928247200158336008401220004000200010000475524230140281332832531060002000400020008000284322827211610011000100002000042000042004224140451022871433293139191173397382110424327852146301190813227200040002843028361283662842528387
66004283302140000006100505028005222157986012401220004000200010000475844230320280482829831060002000400020008000282892825811610011000100012000042004042004224136551000870713399042193223312382011473927831143191202413679200040002838928471281752833228413
6600428362213000000810050832812422215922601240122000400020001000047556423064028250284313106000200040002000800028339282021161001100010001200004200404200204013863100807117331804119274338538209374028077145411204213446200040002825728421282542845828263
6600428368213001100410049822803702215949600840002000400020001000047528423061028264282203106000200040002000800028291283921161001100010000200004200200200424413579997871923448043193273347381513444027924141281190213162200040002836128356284242833428231
6600428303212100000410048952809422215874600840122000400020001000047578623079028212283683106000200040002000800028359283411161001100010000200004200217200422413936985370653376044191203250382413404128052147431212113700200040002843128524284372832228372
66004283392132000000100495828248020158256012401220004000200010000475526230310281682832131060002000400020008000283682831611610011000100002000042002022002224136141015771623392038191703409381515403727951145491194312967200040002836228303282562839828339
660042829421200000041005000281590221614860124000200040002000100004752682303802823428246310600020004000200080002816528305116100110001000020000420020220002441363798367210319404519152331338138434227954144291187613259200040002829528308284142838528408
66004282722131000000100514728296022160766012401220004000200010000475407229930282502843331060002000400020008000284032830611610011000100002000042004022002244137661003670683442039191913313381214354328025149291229613624200040002846428409282142828428318

Test 2: throughput

Count: 8

Code:

  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 1.0009

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e22233a3f4346494e4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480205800725991100100058002800542555025480172100320072160000100320000160000500800048544002808005008006980069335148010020016000032000020016000064000080047800691180201100991001008000080000010016001315430160053115616003961524312051091171180066131341600003200001008007080049800708007080070
4802048006959911110000571008005405050254801241003200721600001003200001600005008011871088075208002808006980047035148010020016000032000020016000064000080069800471180201100991001008000080000010016001313430160052015116000061514313251091171180066131301600003200001008004880048800488007080070
4802048006960010000000581008003205500254801721003200241600001003200001600005008011871088075218002808006980047332948010020016000032000020016000064000080069800691180201100991001008000080000010016001214430160054025216000001514313051091171180066131301600003200001008007080070800708004880048
48020480070600100000005810280032055502548016410032006416000010032000016000050080004210880752080050080047800690329480100200160000320000200160000640000800478006911802011009910010080000800000100160015140016005202521600390051431205109117118006601351600003200001008007080070800708007080049
480204800695991100000058102800542555025480172100320072160000100320000160000500801179108808880800500800698004733514801002001600003200002001600006400008004780069118020110099100100800008000001001600141343016005201521600006152431325109117118004401341600003200001008007080070800708004880070
480204800475991110000012102800322055025480172100320072160000100320000160000500800042108807520800500800698006903294801002001600003200002001600006400008006980069118020110099100100800008000001001600141243016001302551600000152431305109117118006601351600003200001008007080070800708007080048
48020480069599100100002110380054205502548016410032007216000010032000016000050080117954400320800500800698006933524805142001600003200002001600006400008006980069118020110099100100800008000001001600121443016001212521600396112012051091171180044131301600003200001008007080048800708004880048
480204800695991011000073102800540055025480164100320064160000100320000160000500801181108807520800280800718006933534801002001600003200002001600006400008006980069118020110099100100800008000001001600151400160053001316003900524313051091171180066131301600003200001008007080048800708007080048
480204800696001010000058102800542555025480172100320024160000100320000160000500800048108807520800280800698006933514801002001600003200002001600006400008006980047118020110099100100800008000001001600141443016005201511600396112431305109117118004401351600003200001008007080048800708004880070
48020480069600111111005810280032205502548012410032007216013810032000016000050080004310880752080028080047800690329480100200160000320000200160000640000800488004711802011009910010080000800000100160015134301600540152160039615201215109117118006601301600003200001008007080070800488007080070

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 1.0007

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f18191e1f22243f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)branch mispred nonspec (cb)cfd5map dispatch bubble (d6)ddfetch restart (de)e0e7eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
480025800565991010005701080041212120254800701032006016000010320000160000508002228320000080037080056800560033848001020160000320000201600006400008005680056118002110910108000080000110160000027016002302416002461242705019121723800531660160000320000108005780057800428005780042
48002480056600000000300108004100120254800701032006016000010320000160000508000003840000080022080056800410033848001020160000320000201600006400008005680041118002110910108000080000010160000000160000001600006124270501961723800530660160000320000108005780057803378050580057
480024800565990000003241640080501212120594800701032006016000010320000160000508000008320000080022080338800560056394800102016026032026020160000640000800568005621800211091010800008000001016050422701600240241600246024270501922623807081660160000320000108005780057800578005780057
480024800565990001013000087128012013001724493610103321001667541033332016745250979235648734408681508864188422125470233848001020160000320000201600006400008004180044118002110910108000080000010160000027016002402416000001000501921762800381660160000320000108005780057800428013880057
4800248005660000000030010800412121202548007010320000160000103200001600005080021883200000800370800568005600338480010201600003200002016000064000080056800561180021109101080000800000101600000001600240241600240124270501921722800531660160000320000108005780057800578005780042
48002480056599000000300108004120002548007010320060160000103200001600005080022183200000800370800568005600338480010201600003200002016000064000080041800561180021109101080000800000101600000270160024001600246124270501921726800380060160000320000108005780057800428005780057
48002480056600000000756010800410121202548006610320060160000103200001600005080000083200000800370800418005600338480010201600003200002016000064000080041800411180021109101080000800000101600000270160000024160000602400501921722800531660160000320000108005780057800578004280057
480024800566000000003001080041212002548007010320060160000103200001600005080021938400000800370801168005600338480010201600003200002016000064000080056800561180021109101080000800000101600000270160024001600246124270501921726800531060160000320000108004280057800578004280057
4800248005660000000030010800412121202548007010320076160000103200001600005080000083200000800370800568005600338480010201600003200002016000064000080056800561180021109101080000800000101600000270160024024160024612400501961762800531060160000320000108004280057800578005780042
4800248005659900000084300080026012002548001010320060160000103200001600005080021983200000800370800418005600338480010201600003200002016000064000080056800561180021109101080000800000101600000270160000001600246124270505021726800531660160000320000108005780057800578010780057