Skip to content

Commit

Permalink
Merge branch 'dev/3.0' into feature/profiling
Browse files Browse the repository at this point in the history
  • Loading branch information
guodongliang committed Dec 30, 2024
2 parents 3dec40c + 8529f12 commit 6760e68
Show file tree
Hide file tree
Showing 22 changed files with 259 additions and 77 deletions.
19 changes: 13 additions & 6 deletions modules/Nncase.Modules.CPU/Passes/Distributed/AutoDistributed.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,17 @@ public sealed partial class AutoDistributedPass : FunctionPass
{
private readonly CompileOptions _compileOptions;

public AutoDistributedPass(CompileOptions compileOptions)
private readonly string _moduleKind;

public AutoDistributedPass(CompileOptions compileOptions, string moduleKind = "cpu")
{
_compileOptions = compileOptions;
_moduleKind = moduleKind;
}

protected override Task<BaseFunction> RunCoreAsync(BaseFunction input, RunPassContext context)
{
var rewriter = new AutoDistributedRewriter(_compileOptions, _compileOptions.TargetOptions is CpuTargetOptions options ? options : new CpuTargetOptions());
var rewriter = new AutoDistributedRewriter(_compileOptions, _compileOptions.TargetOptions is CpuTargetOptions options ? options : new CpuTargetOptions(), _moduleKind);
return Task.FromResult(rewriter.Rewirte(input));
}
}
Expand All @@ -54,19 +57,23 @@ internal sealed class AutoDistributedRewriter : ExprVisitor<Dictionary<IRType, L
{
private readonly Dictionary<Expr, IEquality> _equalMemo = new();

public AutoDistributedRewriter(CompileOptions compileOptions, CpuTargetOptions targetOptions)
private readonly string _moduleKind;

public AutoDistributedRewriter(CompileOptions compileOptions, CpuTargetOptions targetOptions, string moduleKind = "cpu")
{
Placements = targetOptions.Hierarchies.Select(h => new Placement(h, targetOptions.HierarchyNames)).ToArray();
Placements = targetOptions.Hierarchies.Select(h => new Placement(h, targetOptions.HierarchyNames, targetOptions.HierarchyKind)).ToArray();
CompileOptions = compileOptions;
TargetOptions = targetOptions;
if (Path.Exists(TargetOptions.DistributedScheme) && System.Text.Json.JsonSerializer.Deserialize<DistributedScheme>(File.ReadAllText(TargetOptions.DistributedScheme)) is DistributedScheme scheme)
{
Scheme = scheme.Outputs.ToDictionary(n => n.Name, n => (new IRArray<SBP>(n.NdSBP), new Placement(n.Hierarchy, n.HierarchyName)));
Scheme = scheme.Outputs.ToDictionary(n => n.Name, n => (new IRArray<SBP>(n.NdSBP), new Placement(n.Hierarchy, n.HierarchyName, targetOptions.HierarchyKind)));
}
else
{
Scheme = new Dictionary<string, (IRArray<SBP> NdSBP, Placement Placement)>();
}

_moduleKind = moduleKind;
}

public IRArray<Placement> Placements { get; }
Expand Down Expand Up @@ -332,7 +339,7 @@ protected override Dictionary<IRType, List<Expr>> VisitLeafCall(Call expr)
return new Dictionary<IRType, List<Expr>> { { expr.CheckedType, new() { expr } } };
}

var isSupported = PassUtility.IsCpuSupported(op, expr, expr.Arguments.ToArray());
var isSupported = PassUtility.IsCpuSupported(op, expr, expr.Arguments.ToArray(), _moduleKind);
foreach (var param in op.Parameters)
{
VisitLeafArgument(param.ParameterKind, expr.Arguments[param.Index], isSupported);
Expand Down
9 changes: 8 additions & 1 deletion modules/Nncase.Modules.CPU/Passes/PassUtility.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public static bool IsCpuSupported(Op op)
return op is IR.Math.Unary or IR.Math.Binary { BinaryOp: BinaryOp.Add or BinaryOp.Sub or BinaryOp.Mul or BinaryOp.Div } or IR.Math.MatMul or IR.NN.Conv2D { PadMode: PadMode.Constant } or IR.NN.Softmax or IR.NN.LayerNorm or IR.NN.InstanceNormalization or IR.Imaging.ResizeImage { IsTFResize: false } or IR.Tensors.Unsqueeze or IR.Tensors.Reshape or IR.Tensors.Slice or IR.Tensors.Concat or IR.Tensors.Transpose or IR.NN.Swish or IR.Tensors.Gather or IR.NN.Pad { PadMode: PadMode.Constant } or IR.Math.Reduce or IR.Math.ReduceArg or IR.Math.Clamp or IR.NN.Erf or IR.Tensors.Cast or IR.Tensors.Expand or IR.Tensors.Where or IR.Math.Compare or IR.Tensors.ScatterND;
}

public static bool IsCpuSupported(Op op, Expr expr, IEnumerable<Expr> arguments)
public static bool IsCpuSupported(Op op, Expr expr, IEnumerable<Expr> arguments, string moduleKind = "cpu")
{
if (!IsCpuSupported(op))
{
Expand Down Expand Up @@ -110,6 +110,13 @@ public static bool IsCpuSupported(Op op, Expr expr, IEnumerable<Expr> arguments)
return false;
}

break;
case IR.Tensors.Gather gather:
if (moduleKind == "xpu")
{
return false;
}

break;
default:
break;
Expand Down
30 changes: 30 additions & 0 deletions modules/Nncase.Modules.CPU/Passes/Rules/CPU/FoldMatmulReduce.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,33 @@ public sealed partial class FoldPackedMatmulReduce : IRewriteRule
return null;
}
}

[RuleGenerator]
public sealed partial class SwapUnpackReduce : IRewriteRule
{
public IPattern Pattern { get; } =
IsBoxing(
target_name: "boxing",
op => op.NewType is DistributedType dt && dt.NdSBP.All(s => s != SBP.P),
IsUnpack(
target_name: "unpack",
_ => true,
IsPackedMatMul(
"mm",
"call",
_ => true,
IsWildcard("lhs"),
IsWildcard("rhs"))));

public Expr? GetReplace(Call call, Boxing boxing, Unpack unpack)
{
if (call.CheckedType is DistributedType dt && dt.NdSBP.Any(s => s == SBP.P))
{
var newType = new DistributedType(dt.TensorType, dt.NdSBP.Select(s => s is SBPPartialSum ? SBP.B : s).ToArray(), dt.Placement);
var newBoxing = IR.F.CPU.Boxing(call, newType, boxing.IsReshape);
return IR.F.CPU.Unpack(newBoxing, [.. unpack.Lanes], [.. unpack.Axes]);
}

return null;
}
}
4 changes: 2 additions & 2 deletions modules/Nncase.Modules.CPU/Passes/Rules/CPU/GraphPartition.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public CPUOutputBoxingFusion(string moduleKind)

private Call? GetReplace(Call call, Op op, Boxing boxing, IReadOnlyList<Expr> callParams)
{
if (!PassUtility.IsCpuSupported(op, call, callParams))
if (!PassUtility.IsCpuSupported(op, call, callParams, ModuleKind))
{
return null;
}
Expand Down Expand Up @@ -142,7 +142,7 @@ public CPUSingleFusion(string moduleKind)

private Call? GetReplace(Call call, Op op, IReadOnlyList<Expr> callParams)
{
if (!PassUtility.IsCpuSupported(op, call, callParams))
if (!PassUtility.IsCpuSupported(op, call, callParams, ModuleKind))
{
return null;
}
Expand Down
7 changes: 7 additions & 0 deletions modules/Nncase.Modules.CPU/Targets/CPUTargetOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Nncase.IR;

namespace Nncase.Targets;

Expand Down Expand Up @@ -58,6 +59,12 @@ public class CpuTargetOptions : ICpuTargetOptions
[CommandLine.FromAmong(NocArchitecture.Mesh, NocArchitecture.CrossBar)]
public NocArchitecture NocArch { get; set; } = NocArchitecture.Mesh;

[DisplayName("--hierarchy-kind")]
[Description("Hierarchy Kind.")]
[DefaultValue(HierarchyKind.Parallel)]
[CommandLine.FromAmong(HierarchyKind.Parallel, HierarchyKind.SMT)]
public HierarchyKind HierarchyKind { get; set; } = HierarchyKind.Parallel;

[DisplayName("--hierarchies")]
[Description("the distributed hierarchies of hardware. eg. `8,4 4,8` for dynamic cluster search or `4` for fixed hardware.")]
[DefaultValue("() => new int[][] { new int[] { 1 } }")]
Expand Down
11 changes: 10 additions & 1 deletion modules/Nncase.Modules.CPU/Targets/CPUTargetOptionsCommand.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright (c) Canaan Inc. All rights reserved.
// Licensed under the Apache license. See LICENSE file in the project root for full license information.
/* This file is generated by tools/stackvm_gen/CApiGen at 10/25/2024 6:12:16 PM +08:00. */
/* This file is generated by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */

using System;
using System.Collections.Generic;
Expand All @@ -13,6 +13,7 @@
using System.Threading.Tasks;
using Nncase;
using Nncase.CommandLine;
using Nncase.IR;

namespace Nncase.Targets;

Expand Down Expand Up @@ -46,6 +47,11 @@ public CpuTargetOptionsCommand(string name)
description: "Noc Architecture.",
getDefaultValue: () => NocArchitecture.Mesh);
Add(NocArchOption);
HierarchyKindOption = new Option<HierarchyKind>(
name: "--hierarchy-kind",
description: "Hierarchy Kind.",
getDefaultValue: () => HierarchyKind.Parallel);
Add(HierarchyKindOption);
HierarchiesOption = new Option<IEnumerable<int[]>>(
name: "--hierarchies",
description: "the distributed hierarchies of hardware. eg. `8,4 4,8` for dynamic cluster search or `4` for fixed hardware.",
Expand Down Expand Up @@ -105,6 +111,8 @@ public CpuTargetOptionsCommand(string name)

public Option<NocArchitecture> NocArchOption { get; }

public Option<HierarchyKind> HierarchyKindOption { get; }

public Option<IEnumerable<int[]>> HierarchiesOption { get; }

public Option<string> HierarchyNamesOption { get; }
Expand Down Expand Up @@ -138,6 +146,7 @@ public CpuTargetOptions GetBoundValue(InvocationContext context)
UnifiedMemoryArch = context.ParseResult.GetValueForOption(_cmd.UnifiedMemoryArchOption)!,
MemoryAccessArch = context.ParseResult.GetValueForOption(_cmd.MemoryAccessArchOption)!,
NocArch = context.ParseResult.GetValueForOption(_cmd.NocArchOption)!,
HierarchyKind = context.ParseResult.GetValueForOption(_cmd.HierarchyKindOption)!,
Hierarchies = context.ParseResult.GetValueForOption(_cmd.HierarchiesOption)!.ToArray(),
HierarchyNames = context.ParseResult.GetValueForOption(_cmd.HierarchyNamesOption)!,
HierarchySizes = context.ParseResult.GetValueForOption(_cmd.HierarchySizesOption)!.ToArray(),
Expand Down
14 changes: 10 additions & 4 deletions python/_nncase.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ from typing import Any, List, BinaryIO, Enum
import numpy


""" This block is generated by tools/stackvm_gen/CApiGen at 10/25/2024 6:12:16 PM +08:00. """
""" This block is generated by tools/stackvm_gen/CApiGen at 12/20/2024 5:27:07 PM +08:00. """


class MemoryAccessArchitecture(Enum):
Expand All @@ -16,9 +16,14 @@ class NocArchitecture(Enum):
CrossBar = 1


""" end the auto generated block by tools/stackvm_gen/CApiGen at 10/25/2024 6:12:16 PM +08:00. """
class HierarchyKind(Enum):
Parallel = 0
SMT = 1

""" This block is generated by tools/stackvm_gen/CApiGen at 10/25/2024 6:12:16 PM +08:00. """

""" end the auto generated block by tools/stackvm_gen/CApiGen at 12/20/2024 5:27:07 PM +08:00. """

""" This block is generated by tools/stackvm_gen/CApiGen at 12/20/2024 5:27:07 PM +08:00. """


class CpuTargetOptions:
Expand All @@ -28,6 +33,7 @@ class CpuTargetOptions:
UnifiedMemoryArch: bool
MemoryAccessArch: MemoryAccessArchitecture
NocArch: NocArchitecture
HierarchyKind: HierarchyKind
Hierarchies: List[List[int]]
HierarchyNames: str
HierarchySizes: List[int]
Expand All @@ -37,7 +43,7 @@ class CpuTargetOptions:
CustomOpScheme: str


""" end the auto generated block by tools/stackvm_gen/CApiGen at 10/25/2024 6:12:16 PM +08:00. """
""" end the auto generated block by tools/stackvm_gen/CApiGen at 12/20/2024 5:27:07 PM +08:00. """


class CompileOptions:
Expand Down
2 changes: 1 addition & 1 deletion python/nncase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import _nncase
from _nncase import RuntimeTensor, TensorDesc, Simulator, CpuTargetOptions, NocArchitecture, MemoryAccessArchitecture
from _nncase import RuntimeTensor, TensorDesc, Simulator, CpuTargetOptions, NocArchitecture, HierarchyKind, MemoryAccessArchitecture


def _initialize():
Expand Down
16 changes: 12 additions & 4 deletions python/nncase/native/ffi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ PYBIND11_MODULE(_nncase, m) {
&shape_bucket_options::fix_var_map));

// clang-format off
/* This block is generated by tools/stackvm_gen/CApiGen at 10/25/2024 6:12:16 PM +08:00. */
/* This block is generated by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */

py::enum_<memory_access_architecture_t>(m, "MemoryAccessArchitecture")
.value("UMA", memory_access_architecture_uma)
Expand All @@ -245,6 +245,10 @@ PYBIND11_MODULE(_nncase, m) {
.value("Mesh", noc_architecture_mesh)
.value("CrossBar", noc_architecture_cross_bar);

py::enum_<hierarchy_kind_t>(m, "HierarchyKind")
.value("Parallel", hierarchy_kind_parallel)
.value("SMT", hierarchy_kind_smt);


py::class_<cpu_target_options>(m, "CpuTargetOptions")
.def(py::init())
Expand All @@ -262,12 +266,16 @@ PYBIND11_MODULE(_nncase, m) {
py::overload_cast<bool>(&cpu_target_options::unified_memory_arch))
.def_property(
"MemoryAccessArch",
[]() {},
py::overload_cast<>(&cpu_target_options::memory_access_arch),
py::overload_cast<memory_access_architecture_t>(&cpu_target_options::memory_access_arch))
.def_property(
"NocArch",
[]() {},
py::overload_cast<>(&cpu_target_options::noc_arch),
py::overload_cast<noc_architecture_t>(&cpu_target_options::noc_arch))
.def_property(
"HierarchyKind",
py::overload_cast<>(&cpu_target_options::hierarchy_kind),
py::overload_cast<hierarchy_kind_t>(&cpu_target_options::hierarchy_kind))
.def_property(
"Hierarchies",
[]() {},
Expand Down Expand Up @@ -297,7 +305,7 @@ PYBIND11_MODULE(_nncase, m) {
[]() {},
py::overload_cast<std::string_view>(&cpu_target_options::custom_op_scheme)) ;

/* end the auto generated block by tools/stackvm_gen/CApiGen at 10/25/2024 6:12:16 PM +08:00. */
/* end the auto generated block by tools/stackvm_gen/CApiGen at 12/20/2024 3:41:05 PM +08:00. */
// clang-format on

py::class_<calibration_dataset_provider>(m, "CalibrationDatasetProvider")
Expand Down
Loading

0 comments on commit 6760e68

Please sign in to comment.