Apple Microarchitecture Research by Dougall Johnson

M1/A14 P-core (Firestorm): Overview | Base Instructions | SIMD and FP Instructions
M1/A14 E-core (Icestorm):  Overview | Base Instructions | SIMD and FP Instructions

LD1R (2D)

Test 1: uops

Code:

  ld1r { v0.2d }, [x6]
  nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop ; nop
  mov x0, 1
  mov x1, 2
  mov x8, 0

(no loop instructions)

1000 unrolls and 1 iteration

Retires (minus 60 nops): 2.000

Issues: 2.003

Integer unit issues: 0.000

Load/store unit issues: 1.000

SIMD/FP unit issues: 1.003

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk instruction (07)mmu table walk data (08)09l2 tlb miss instruction (0a)l2 tlb miss data (0b)0e0f18191e22243a3f464951schedule uop (52)schedule simd uop (54)schedule ldst uop (55)dispatch simd uop (57)dispatch ldst uop (58)simd uops in schedulers (5a)ldst uops in schedulers (5b)5f696d6emap rewind (75)map stall (76)dispatch uop (78)map ldst uop (7d)map simd uop (7e)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst simd load (98)inst ldst (9b)9dl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cfd0d2l1i cache miss demand (d3)l1i tlb miss demand (d4)d5map dispatch bubble (d6)d9dadbddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)f5f6f7f8fd
620052940622101900170100011046212876911171962003100310001000100050001193352261429052292603102000100010001000100029073290631161001100010000100133100202410001130012988924268423060544206053147381713414228382161941386315121100010002927229240292362926429271
620042924622011411110000041046642878400171722004100410001000100050001195102259429011292703102000100010001000100029136291191161001100010000100133100211710003121012848918668623076134020630308838186374128338161991391315077100010002917229234292532926829252
6200429215218115111710000510455928738001724320041006100010001000500011943222608290662921731020001000100010001000291982912711610011000100001001221003202281000313111291092556915310464520575314338178454528337162861394214808100010002930229212292712919929269
62004292742191161117100004004616287520017308200410041000100010005000119493226252899529288310200010001000100010012910429137116100110001000010011210021711010003131112996911668333109645206153062381211384228387163091374115072100010002935229246292092927129255
62004292492191171119100004004622288010017258200310041000100010005000119371225952909929221310200010001000100010002909029097116100110001000010013310021111000313111297292106870309664020570312438194424228352163891394514980100010002933029195293112928629200
620042916721911511161000016104619287290017284200310041000100010005000119524226442899229138310200010001000100010002914629115116100110001000010014210030011000312121289291546837306054120599307438189394428320162851374515043100010002896929280292792921029248
6200429185218118101610000510465228721001712720031004100010001000500011942322609290592923931020001000100010001000290582906911610011000100001001103100128171000212111307791966820306654320627310338155424428372161001376315050100010002926929184293532921729299
6200429191219118111110000110467528839001722520041004100010001000500011948522708290192917631020001000100010001000291532907511610011000100001001441002230129710002131112947922170143082542205903086381711444028337162151386215162100010002939829176292552926729218
62004292382191120012111004410459228724001724620021003100010001000500011926022646290352922031020001000100010001000291322913111610011000100001001321000730410002130012835913668643052840206183081381612454428308163741378514836100010002919529274292442925029198
6200429290219018001401000310458128781111723120031003100010001000500012027122632290492927731020001000100010001000291132915311610011000100001001131001506310002020012906937468693066845206163072381313444528375162461378814911100010002926929326292822928029233

Test 2: Latency 1->2 roundtrip

Chain cycles: 3

Code:

  ld1r { v0.2d }, [x6]
  fmov x0, d0
  eor x8, x8, x0
  eor x8, x8, x0
  add x6, x6, x8
  mov x0, 1
  mov x1, 2
  mov x8, 0

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0054

retire uop (01)cycle (02)03mmu table walk data (08)09l2 tlb miss data (0b)0e0f18191e1f2223243f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696b6d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd0d5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
602051400571049000000010000140020139406129347257010040100200001000030100200001000012640206692947143109390140011014005414003513180003132389601003020010000200006020010213200001400511400511150201100991004010010000100000100100000010000000010000101000321002139611395684000013010100001000040100140055140036140036140055140055
6020414003510490000000100001400391394271293472570102401002000210000301002000010000126380366938781431093901400113140035140054131800031323996010030200100002000060200100002000014043114005111502011009910040100100001000001001000000100000000100001000003210011271113956540000131313100001000040100140055140055140036140055140055
602041400541049000000010100140039139406129365257010040100200021000030100200001000012640206693235143185560140030014028414005713179303132389601003020010000200006020010000200001400541400351150201100991004010010000100000100100000110001000010000101000321001139111395654000001313100001000040100140055140055140055140036140036
602041400351049100000010100140039139427129365257010240100200001000030100200001000012638036692947143122820140030014003514003513179703132399601003020010000200006020010211200001400541400351150201100991004010010000100000100100000110000000010000101000321001139111395654000013013100001000040100140036140055140055140103140055
60204140054104900000001010014002013940612936325701024010020002100003010020000100001264020669387814310939114001101400511400351318000313238260100302001000020000602001000020000140054140035115020110099100401001000010000010010000001000000001000010100032100113911139559400000010100001000040100140055140057140060140432140055
602041400541049000000010000140039139411129363257010040100200021000030100200001000012640206693734143122820140030014003714005413179303132382601003020010000200006020010000200001400541400511150201100991004010010000100000100100000110000230481000010100032100113911139565400000013100001000040100140055140036140055140055140055
602041400541049000000000100140036139427129347257010240100200001000030100200001000012639586692947143122821140030014005414005413180003132399601003020010000200006020010000200001400351400351150201100991004010010000100000100100000110000000010000001000321001139111395654000013130100001000040100140052140052140428140055140058
6020414003510490000000403520001400391394271293652570102401002000210000301002000010000126380366938781430870101400300140051140051131797031323996010030200100002000060200100002000014003514005111502011009910040100100001000001001000000100000300100001010003210011271113955940000101310100001000040100140055140055140036140052140055
60204140054104900000001010014003913942712936525701024010020002100003010020000100001263803669387814308701014001101400541400351318000313238260100302001000020000602001000020000140054140051115020110099100401001000010000010010000011000000001000000100032100113911139565400001300100001000040100140055140055140055140036140055
602041400541049000000082010014003913942712936525701024010020000100003010020000100001263803669373414308701014003001400351400601318000313260560100302001000020000602001000020000140055140423115020110099100401001000010000010010000011000000001000010100032100113911139546400000010100001000040100140055140036140055140052140055

1000 unrolls and 10 iterations

Result (median cycles for code, minus 3 chain cycles): 11.0050

retire uop (01)cycle (02)0309l2 tlb miss data (0b)0e0f181e22233f4d4f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)int prf full (71)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d cache miss ld (a3)ld unit uop (a6)l1d cache writeback (a8)acafb5l1d cache miss ld nonspec (bf)branch cond mispred nonspec (c5)cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0? int output thing (e9)eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
6002514004710481000011014002013939412935925700104001020002100003001020000100001264443669353814325933114001314005214005013181803132433600103002010000200006002010000200001400471400351150021109104001010000100000101000011000050100001100314031113313955440000960100001000040010140051140036140048140036140051
6002414004710490000011014003213939712936225700104001020002100003001020000100001264443669352214325829014002614005014003513180303132435600103002010062200006002010000200001400501400471150021109104001010000100000101000001000010100001000314031133313956640000969100001000040010140051140036140067140036140048
6002414003510480001001014003513939712934725700124001020000100003001020000100001264477669368514325829014002614005014004713181503132506600103002010000200006002010000200001400501400471150021109104001010000100000101000001000000100000100314031133313956940000669100001000040010140036140036140036140051140036
6002414003510490000000014003513939712936225700124001020002100003001020000100001264477669294714325829014001114005014005013180303132436600103002010000200006002010000200001400351400471150021109104001010000100000101000011000000100001100314031113313956640000999100001000040010140048140051140050140049140037
60024140108104901000181014003513940512936225700354001020002100003001020000100001264477669368514325829014003114005014003513181803132438600103002010000200006002010000200001400501400471150021109104001010000100000101000011000000100001100314031133313956940000669100001000040010140052140038140053140423140036
6002414005010490000061014003213939412936225700104001020002100003001020000100001264477669368514325829114002614003514003513180303132433600103002010000200006002010000200001400471400471150021109104001010000100000101000011000000100001100314021112313956640000969100001000040010140051140036140036140051140051
6002414005010490000011014002013939712936225700124001020002100003001020000100001264477669368514321442014002614008214005013180303132478600103002010000200006002010000200001400501400471150021109104001010000100000101000011000000100001000314031133213955440000006100001000040010140036140051140051140036140051
6002414005010490000011014007913939412936225700104001020002100003001020000100001264429669368514321442014001114003514005013180303132462600103002010000200006002010000200001400351400471150021109104001010000100000101000001000000100001100317731113313956940000960100001000040010140048140048140048140036140048
6002414005010490000061014004013939712936225700104001020002100003001020000100001264477669368514325829114002614005014005013181803132466600103002010000200006002010000200001400351400471150021109104001010000100000101000001000000100001100314021133313956840000099100001000040010140051140051140036140082140048
6002414003510490000011014003513939412936225700124001020000100003001020000100001264477669368514326555114001114005514003513181803132478600103002010000200006002010000200001400501400351150021109104001010000100001101000011000010100000100314031113213956940000009100001000040010140051140051140051140048140036

Test 3: throughput

Count: 8

Code:

  ld1r { v0.2d }, [x6]
  ld1r { v0.2d }, [x6]
  ld1r { v0.2d }, [x6]
  ld1r { v0.2d }, [x6]
  ld1r { v0.2d }, [x6]
  ld1r { v0.2d }, [x6]
  ld1r { v0.2d }, [x6]
  ld1r { v0.2d }, [x6]
  mov x7, x6
  mov x8, x6
  mov x9, x6
  mov x10, x6
  mov x11, x6
  mov x12, x6
  mov x13, x6

(fused SUBS/B.cc loop)

100 unrolls and 100 iterations

Result (median cycles for code divided by count): 0.3342

retire uop (01)cycle (02)03l1d tlb fill (05)mmu table walk data (08)l2 tlb miss data (0b)0e0f1e22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)ld unit uop (a6)st unit uop (a7)l1d cache writeback (a8)a9acafb5b6bbl1d cache miss ld nonspec (bf)l1d tlb miss nonspec (c1)c2cdcfd5map dispatch bubble (d6)ddfetch restart (de)e0eaebec? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
160205267372001011010200326722207025160165100800198000010080000800005001176870187829412669726737267156659366951601002008000080000200800008000026737267371180201100991001008000080000010080019194380019102218003961044000511031633267291410080000800001002673326709267332670926741
16020426708200000014510026713000192516014410080044800001008000080000500116888018840111267132673226708663036690160100200800008000020080000800002670826728118020110099100100800008000001008000004380039020548000061394400051103162326725140380000800001002673326729267362673326733
1602042672820000011440002671720019251601441008004480000100800008000050011696231880202126715267202673266303666616010020080000800002008000080000267322672811802011009910010080000800000100800000438000000039800006139000051103163326705140080000800001002673326733267332673326709
160204267282000001144000267170120192516010010080000800001008000080000500117462818870671267132673226732665436690160100200800008000020080000800002673226728118020110099100100800008000001008000000800000000800000104300051103162326729014380000800001002670926709267332673326733
160204267282000001023003267003701925160165100800648000010080000800005001169295188353212672626737267376659366951601002008000080000200800008000026737267371180201100991001008000080000010080019194380058000608000060580192051103163326734013080000800001002673826716267382671626716
1602042671520011010440012671721200251601441008000080000100800008000050011688801887334126712267082670866503666616010020080000800002008000080000267082672811802011009910010080000800000100800000080038000388003861043000511031633267051414380000800001002673326709267292673326709
160204267282000001044001266932011925160144100800008000010080000800005001169085188521412671826732267086630366901601002008000080000200800008000026708267281180201100991001008000080000010080000044800390003880038600440005110316332670500380000800001002670926733267292673326733
16020426708201000114400126717012102516014410080044800001008000080000500116888018835731267132673226732665036666160100200800008000020080000800002670826728118020110099100100800008000001008000004380038000438003801044000511231633267051410080000800001002670926733267332670926709
160204267322000001044101266930111925160144100800008000010080000800005001174887188722812672726737267376637366731601002008000080000200800008000026715267371180201100991001008000080000010080019200800611012180000605843190051103162326712130280000800001002671626716267382673826738
1602042673720011111670002670020702516016510080019800001008000080000500117017918871601267222672826708663036666160100200800008000020080000800002673226728118020110099100100800008000001008000004380038000388003801043000511031633267291414080000800001002672926709267332670926709

1000 unrolls and 10 iterations

Result (median cycles for code divided by count): 0.3340

retire uop (01)cycle (02)03l1i tlb fill (04)09l2 tlb miss data (0b)0e0f1e1f22233a3f4346494f51schedule uop (52)schedule int uop (53)schedule simd uop (54)schedule ldst uop (55)dispatch int uop (56)dispatch simd uop (57)dispatch ldst uop (58)int uops in schedulers (59)simd uops in schedulers (5a)ldst uops in schedulers (5b)60696d6emap stall dispatch (70)simd prf full (72)map rewind (75)map stall (76)dispatch uop (78)map int uop (7c)map ldst uop (7d)map simd uop (7e)map int uop inputs (7f)map ldst uop inputs (80)map simd uop inputs (81)8283flush restart other nonspec (84)85inst all (8c)inst branch (8d)inst branch taken (90)inst branch cond (94)inst int alu (97)inst simd load (98)inst ldst (9b)9d9fl1d tlb access (a0)l1d tlb miss (a1)l1d cache miss ld (a3)a5ld unit uop (a6)l1d cache writeback (a8)acafb5b6bbl1d cache miss ld nonspec (bf)c2branch cond mispred nonspec (c5)cfd2d5map dispatch bubble (d6)ddfetch restart (de)e0eaeb? ldst retires (ed)? simd retires (ee)? int retires (ef)f5f6f7f8fd
1600252672520011011410101267080181812251600521080041800001080000800005011701071884807026706267292672366680367031600102080000800002080000800002672326723118002110910108000080000010800000390800000358003561353900502003165526720068000080000102672426709267242672426724
1600242672320000011410101267082181812251600511080041800001080000800005011701071884807026704267082672366530367031600102080000800002080000800002670826723118002110910108000080000010800000390800350358003561353900502002163526705608000080000102672426724267242672426724
160024267082000001041000126708218181125160051108000080000108000080000501170107188480702671126723267236653036703160010208000080000208000080000267232670811800211091010800008000001080000039080035135800356135000502002162326720608000080000102672426724267242672426724
1600242672320000011410101266932181812251600511080041800001080000800005011698441884807026715267292670866680367031600102080000800002080000800002672327130118002110910108000080000010800000008000003580035010000502005165526720668000080000102672426709267242672426709
16002426708200000104101002670821818122516005110800008000010800008000050116888018848070267042672326708666803668816001020800008000020800008000026708267231180021109101080000800000108000000080035035800350103900502003165626720668000080000102670926709267242670926709
160024267232000001041010126708201811251600511080000800001080000800005011701071884807026704267232672366530367031600102080000800002080000800002672326708118002110910108000080000010800000390800000358003561353900502005163326720008000080000102672426724267242672426709
160024267232000001041010126708018012251600101080000800001080000800005011688801884807026712267232672366680367031600102080000800002080000800002670826708118002110910108000080000010800000390800350358003561353900502003163226720608000080000102672426709267092670926724
160024267232000001000101267082181802516005110800418000010800008000050117010718848070267122672326723665303670316001020800008000020800008000026708267231180021109101080000800000108000003908003503580000610000502003163226705068000080000102672426724267242672426709
1600242672320000010001012670820183251600511080041800001080000800005011701071884807026712267232672366530367031600102080000800002080000800002672326708118002110910108000080000010800000390800350358003560353900502006165526720068000080000102672426724267242672426724
16002426708200000104101012670801818025160052108004180000108000080000501170107188480712669726723267236668036703160010208000080000208000080000267262672311800211091010800008000001080000039080035135800350103900502003163226720608000080000102670926709267092671226724