diff --git a/.github/actions/prereqs/action.yaml b/.github/actions/prereqs/action.yaml index 13961ff5..d19710c5 100644 --- a/.github/actions/prereqs/action.yaml +++ b/.github/actions/prereqs/action.yaml @@ -20,5 +20,5 @@ runs: python3 -m venv ${{ inputs.env_name }} source ${{ inputs.env_name }}/bin/activate pip install --upgrade pip - env MLIR_PYTHON_EXTRAS_SET_VERSION="0.0.8.3" HOST_MLIR_PYTHON_PACKAGE_PREFIX="aie" pip install -r requirements.txt + pip install -r requirements.txt echo "Prerequisites installed into ${{ inputs.env_name }}" diff --git a/.github/actions/test/action.yaml b/.github/actions/test/action.yaml index ea3e774f..d8d4e910 100644 --- a/.github/actions/test/action.yaml +++ b/.github/actions/test/action.yaml @@ -30,10 +30,10 @@ runs: case "${{ inputs.test_suite }}" in operators) - TEST_PATH="operators/" + TEST_PATH="iron/operators/" ;; applications) - TEST_PATH="applications/" + TEST_PATH="iron/applications/" ;; *) echo "Invalid test_suite: ${{ inputs.test_suite }}" @@ -46,4 +46,3 @@ runs: else pytest -m "not extensive" $TEST_PATH --csv-output=tests_latest.csv ${{ inputs.test_flags }} fi - diff --git a/README.md b/README.md index 2eeffec0..c833eb40 100755 --- a/README.md +++ b/README.md @@ -35,31 +35,31 @@ The IRON Python API for Ryzenβ„’ AI NPUs is described in the following paper: | Section | Description | Datatype | AIE2 | AIE2P | Status | Design Example | |:--------|:------------|:---------|:-----|:------|:-------|:-------------| -| [Element-wise Add](./aie_kernels/generic/add.cc) | Element-wise addition kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/elementwise_add/](./operators/elementwise_add/) | -| [Element-wise Mul](./aie_kernels/generic/mul.cc) | Element-wise multiplication kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/elementwise_mul/](./operators/elementwise_mul/) | -| [GEMM](./aie_kernels/aie2p/mm.cc) | General Matrix Multiplication kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/gemm/](./operators/gemm/) | -| [GEMV](./aie_kernels/generic/mv.cc) | General Matrix-Vector Multiplication kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/gemv/](./operators/gemv/) | -| [GQA](./aie_kernels/aie2p/mha.cc) | Grouped Query Attention kernel (Single pipeline) | bfloat16 | | βœ“ | 🟒 | [operators/mha/](./operators/mha/) | -| [MHA](./aie_kernels/aie2p/mha.cc) | Multi-Head Attention kernel & Grouped Query Attention | bfloat16 | | βœ“ | 🟒 | [operators/mha/](./operators/mha/) | -| [RMSNorm](./aie_kernels/aie2/rms_norm.cc) | RMSNorm kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/rms_norm/](./operators/rms_norm/) | -| [RoPE](./aie_kernels/generic/rope.cc) | Rotary Positional Embedding kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/rope/](./operators/rope/) | -| [SiLU](./aie_kernels/aie2/silu.cc) | Sigmoid Linear Unit activation kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/silu/](./operators/silu/) | -| [Softmax](./aie_kernels/aie2/softmax.cc) | Softmax kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/softmax/](./operators/softmax/) | -| [Weighted RMSNorm](./aie_kernels/aie2/rms_norm.cc) | Weighted RMSNorm kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/rms_norm/](./operators/rms_norm/) | -| [Copy](./aie_kernels/generic/passThrough.cc) | Copy | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/mem_copy/](./operators/mem_copy/) | -| [Transpose](./aie_kernels/generic/transpose.cc) | Transpose | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/transpose/](./operators/transpose/) | -| [AXPY](./aie_kernels/generic/axpy.cc) | AXPY | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/axpy/](./operators/axpy/) | +| [Element-wise Add](./aie_kernels/generic/add.cc) | Element-wise addition kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/elementwise_add/](./iron/operators/elementwise_add/) | +| [Element-wise Mul](./aie_kernels/generic/mul.cc) | Element-wise multiplication kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/elementwise_mul/](./iron/operators/elementwise_mul/) | +| [GEMM](./aie_kernels/aie2p/mm.cc) | General Matrix Multiplication kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/gemm/](./iron/operators/gemm/) | +| [GEMV](./aie_kernels/generic/mv.cc) | General Matrix-Vector Multiplication kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/gemv/](./iron/operators/gemv/) | +| [GQA](./aie_kernels/aie2p/mha.cc) | Grouped Query Attention kernel (Single pipeline) | bfloat16 | | βœ“ | 🟒 | [iron/operators/mha/](./iron/operators/mha/) | +| [MHA](./aie_kernels/aie2p/mha.cc) | Multi-Head Attention kernel & Grouped Query Attention | bfloat16 | | βœ“ | 🟒 | [iron/operators/mha/](./iron/operators/mha/) | +| [RMSNorm](./aie_kernels/aie2/rms_norm.cc) | RMSNorm kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/rms_norm/](./iron/operators/rms_norm/) | +| [RoPE](./aie_kernels/generic/rope.cc) | Rotary Positional Embedding kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/rope/](./iron/operators/rope/) | +| [SiLU](./aie_kernels/aie2/silu.cc) | Sigmoid Linear Unit activation kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/silu/](./iron/operators/silu/) | +| [Softmax](./aie_kernels/aie2/softmax.cc) | Softmax kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/softmax/](./iron/operators/softmax/) | +| [Weighted RMSNorm](./aie_kernels/aie2/rms_norm.cc) | Weighted RMSNorm kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/rms_norm/](./iron/operators/rms_norm/) | +| [Copy](./aie_kernels/generic/passThrough.cc) | Copy | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/mem_copy/](./iron/operators/mem_copy/) | +| [Transpose](./aie_kernels/generic/transpose.cc) | Transpose | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/transpose/](./iron/operators/transpose/) | +| [AXPY](./aie_kernels/generic/axpy.cc) | AXPY | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/axpy/](./iron/operators/axpy/) | | [Reduction]() | Reduction | bfloat16 | | | 🟑 | | -| [Dequant](./aie_kernels/generic/expand.cc) | Dequant Q4NX from [AWQ](https://github.com/mit-han-lab/llm-awq) to bfloat16 | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/dequant/](./operators/dequant/) | -| [RELU](./aie_kernels/aie2/relu.cc) | RELU | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/relu/](./operators/relu/) | -| [Leaky RELU](./aie_kernels/aie2p/leaky_relu.cc) (WIP) | Leaky RELU kernel | bfloat16 | | βœ“ | βšͺ | [operators/leaky_relu/](./operators/leaky_relu/) | -| [GELU](./aie_kernels/aie2/gelu.cc) | GELU | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/gelu/](./operators/gelu/) | -| [LayerNorm](./aie_kernels/aie2/layer_norm.cc) | LayerNorm | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/layer_norm/](./operators/layer_norm/) | +| [Dequant](./aie_kernels/generic/expand.cc) | Dequant Q4NX from [AWQ](https://github.com/mit-han-lab/llm-awq) to bfloat16 | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/dequant/](./iron/operators/dequant/) | +| [RELU](./aie_kernels/aie2/relu.cc) | RELU | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/relu/](./iron/operators/relu/) | +| [Leaky RELU](./aie_kernels/aie2p/leaky_relu.cc) (WIP) | Leaky RELU kernel | bfloat16 | | βœ“ | βšͺ | [iron/operators/leaky_relu/](./iron/operators/leaky_relu/) | +| [GELU](./aie_kernels/aie2/gelu.cc) | GELU | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/gelu/](./iron/operators/gelu/) | +| [LayerNorm](./aie_kernels/aie2/layer_norm.cc) | LayerNorm | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/layer_norm/](./iron/operators/layer_norm/) | | [Convolution]() | Convolution | bfloat16 | | | 🟑 | | | [MaxPool]() | MaxPool | bfloat16 | | | βšͺ | | | [AveragePool]() | AveragePool | bfloat16 | | | βšͺ | | -| [Tanh](./aie_kernels/aie2/tanh.cc) | Tanh kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/tanh/](./operators/tanh/) | -| [Sigmoid](./aie_kernels/aie2/sigmoid.cc) | Sigmoid kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [operators/sigmoid/](./operators/sigmoid/) | +| [Tanh](./aie_kernels/aie2/tanh.cc) | Tanh kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/tanh/](./iron/operators/tanh/) | +| [Sigmoid](./aie_kernels/aie2/sigmoid.cc) | Sigmoid kernel | bfloat16 | βœ“ | βœ“ | 🟒 | [iron/operators/sigmoid/](./iron/operators/sigmoid/) | > Use this dashboard to quickly check the status of each kernel and locate relevant setup, build, and usage information. @@ -114,17 +114,17 @@ If starting from `Ubuntu 24.04` you may need to update the Linux kernel to 6.11+ 1. Install required Python packages (from requirements.txt): ```bash - MLIR_PYTHON_EXTRAS_SET_VERSION="0.0.8.3" HOST_MLIR_PYTHON_PACKAGE_PREFIX="aie" pip install -r requirements.txt + pip install -r requirements.txt ``` 1. To test your installation, you can try to build and run the example below: ```bash - ./operators/axpy/test.py + ./iron/operators/axpy/test.py ``` ### Building/Using & Testing Operators -All available operators can be found in `operators`. These each contain: +All available operators can be found in `iron/operators`. These each contain: * `op.py`: The Python operator interface -- an easy access point to integrate operators into your project that prescribes how to compile the operator (build artifacts) and how to call it at runtime (buffer sizes, etc.) * `design.py`: The implementation of the operator's NPU code. Often references a kernel in `aie_kernels` for the compute core code and describes the data movement using ObjectFIFOs. @@ -137,17 +137,17 @@ All available operators can be found in `operators`. These each contain: To build and test all the operators: ``` bash -pytest operators/ -m "not extensive" +pytest iron/operators/ -m "not extensive" ``` To run the extensive test suite: ``` bash -pytest operators/ +pytest iron/operators/ ``` To run a specific operator's tests: ``` bash -pytest operators/axpy/ +pytest iron/operators/axpy/ ``` ### Git Hooks (Optional but Recommended) diff --git a/REUSE.toml b/REUSE.toml index 2e0fcd7a..dbc082d1 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -4,7 +4,7 @@ SPDX-PackageSupplier = "Advanced Micro Devices, Inc." SPDX-PackageDownloadLocation = "https://github.com/AARInternal/ironclad" [[annotations]] -path = "applications/llama_3.2_1b/prompt.txt" +path = "iron/applications/llama_3.2_1b/prompt.txt" precedence = "closest" SPDX-FileCopyrightText = "Public Domain" SPDX-License-Identifier = "CC0-1.0" diff --git a/applications/llama_3.2_1b/src/aie_device_manager.py b/applications/llama_3.2_1b/src/aie_device_manager.py deleted file mode 100644 index a06d957a..00000000 --- a/applications/llama_3.2_1b/src/aie_device_manager.py +++ /dev/null @@ -1,100 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -""" -Global AIE Device Manager for resource sharing and cleanup -""" - -import logging -import os -import sys -from pathlib import Path -from typing import Dict, Optional, Any -import pyxrt -from aie.iron.hostruntime.config import detect_npu_device -from aie.iron.device import NPU1, NPU2 - - -class AIEDeviceManager: - """Singleton manager for AIE XRT resources""" - - _instance = None - - def __new__(cls): - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - - def __init__(self): - self.device = pyxrt.device(0) - self.device_type = detect_npu_device() - self.contexts = {} # xclbin_path -> (context, xclbin) - self.kernels = {} # (xclbin_path, kernel_name) -> kernel - - def get_context_and_kernel( - self, xclbin_path: str, kernel_name: str | None = None - ) -> (pyxrt.hw_context, pyxrt.kernel): - """Get or create hardware context and kernel for xclbin""" - # Check if we already have a context for this xclbin - - if xclbin_path not in self.contexts: - xclbin = pyxrt.xclbin(xclbin_path) - self.device.register_xclbin(xclbin) - xclbin_uuid = xclbin.get_uuid() - context = pyxrt.hw_context(self.device, xclbin_uuid) - self.contexts[xclbin_path] = (context, xclbin) - logging.debug(f"Created new context for {Path(xclbin_path).name}") - else: - context, xclbin = self.contexts[xclbin_path] - logging.debug(f"Reusing context for {Path(xclbin_path).name}") - - # Get kernel name if not provided - if kernel_name is None: - kernels = xclbin.get_kernels() - if not kernels: - raise RuntimeError("No kernels found in xclbin") - kernel_name = kernels[0].get_name() - - # Check if we already have the kernel - kernel_key = (xclbin_path, kernel_name) - if kernel_key not in self.kernels: - self.kernels[kernel_key] = pyxrt.kernel(context, kernel_name) - logging.debug( - f"Created new kernel {kernel_name} from xclbin {Path(xclbin_path).name}" - ) - else: - logging.debug( - f"Reusing kernel: {kernel_name} from xclbin {Path(xclbin_path).name}" - ) - - return context, self.kernels[kernel_key] - - def device_str(self) -> str: - return self.device_type.resolve().name - - def cleanup(self): - """Clean up all XRT resources""" - self.kernels.clear() - - # Clear contexts - for xclbin_path, (context, xclbin) in self.contexts.items(): - try: - del context - except: - pass - self.contexts.clear() - - # Clear device - if self.device is not None: - try: - del self.device - except: - pass - self.device = None - - logging.debug("Cleaned up AIE device manager") - - def reset(self): - """Reset the device manager (for debugging)""" - self.cleanup() - AIEDeviceManager._instance = None diff --git a/applications/llama_3.2_1b/src/compilation.py b/applications/llama_3.2_1b/src/compilation.py deleted file mode 100644 index c6716441..00000000 --- a/applications/llama_3.2_1b/src/compilation.py +++ /dev/null @@ -1,535 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -""" -This file implements a simple Python-based build system. You specify what you -want to compile (*artifacts*) through subclasses of `CompilationArtifact`. -Each artifact can have a list of depenencies of other artifacts that it relies -on. Each artifact corresponds to exactly one file. If a file with a matching -name already exists, and all its dependencies are built and older than the file, -then the existing file will be reused. - -For each file name, artifacts are singletons. You create artifacts by calling -the `new` class method of the appropriate class. This ensures that artifact -objects are uniqued, i.e., calling `new` twice with the same file name will -return the same object. - -There is a special artifact for source files that do not need to get generated, -`SourceArtifact`. It is likely that in your compilation dependency graph, -the leaf nodes will be `SourceArtifact`s. - -You specify how to generate (compile) an artifact through *rules*, which are -expressed as subclasses of `CompilationRule`. This class requires you to -implement two methods: `matches` and `compile`. During compilation, we will -call `matches` on the set of remaining artifacts to see if the given rule is -able to produce any of the artifacts not available yet. If this function -returns `True`, we will call `compile` on the rule to generate the artifact. -`compile` returns a new list of artifacts, which may be the same one as -before; however, if `matches()==True`, at least one of the artifacts in the -list must be made available after calling `compile()`. -""" - -from abc import ABC, abstractmethod -from pathlib import Path -import os.path -import zlib -import logging -import subprocess -import importlib.util -from contextlib import nullcontext -from aie.extras.context import mlir_mod_ctx - - -# Compilation Artifacts -# -------------------------------------------------------------------------- - - -class CompilationArtifact(ABC): - _instances = {} - - @classmethod - def new(cls, path, *args, **kwargs): - """Uniques artifacts based on absolute file path; any two artifacts with the same absolute path will be represented by the same object.""" - path = Path(path) - abs_path = path.absolute() - if abs_path not in cls._instances: - cls._instances[abs_path] = None - instance = cls(path, *args, **kwargs) - cls._instances[abs_path] = instance - else: - assert ( - type(cls._instances[abs_path]) == cls - ), f"Artifact with path {abs_path} is already registered with a different type" - return cls._instances[abs_path] - - def __init__(self, path, depends=None): - abs_path = path.absolute() - assert ( - abs_path in self._instances - ), "do not construct artifact objects directly; call the get() class method instead for uniquing" - self.path: Path = path - self.depends: list[CompilationArtifact] = depends if depends is not None else [] - self.users: list[CompilationArtifact] = ( - [] - ) # List of ancestor artifacts that depend on this artifact - for dependency in self.depends: - dependency.users.append(self) - - def __repr__(self): - return f"{self.__class__.__name__}(path={self.path}, depends={self.depends})" - - def set_path(self, new_path): - old_abs_path = self.path.absolute() - new_path = Path(new_path) - abs_path = new_path.absolute() - self.path = new_path - del CompilationArtifact._instances[old_abs_path] - CompilationArtifact._instances[abs_path] = self - - def is_available(self): - if not self.path.exists(): - return False - for dependency in self.depends: - # If any of our dependencies' dependencies are outdated, this artifact is also outdated - if not dependency.is_available(): - return False - # If any of our direct dependencies are newer than this artifact, this artifact is invalid - if dependency.is_newer_than(os.path.getmtime(str(self.path))): - return False - return True - - def is_newer_than(self, time): - return os.path.getmtime(str(self.path)) > time - - def delete(self): - for user in self.users: - user.depends.remove(self) - del self._instances[self.path.absolute()] - return self.users - - -class SourceArtifact(CompilationArtifact): - pass - - -class XclbinArtifact(CompilationArtifact): - def __init__( - self, path, depends, kernel_name="MLIR_AIE", extra_flags=None, xclbin_input=None - ): - super().__init__(path, depends) - self.kernel_name = kernel_name - self.extra_flags = extra_flags if extra_flags is not None else [] - self.xclbin_input = xclbin_input - - -class InstsBinArtifact(CompilationArtifact): - def __init__(self, path, depends, extra_flags=None): - super().__init__(path, depends) - self.extra_flags = extra_flags if extra_flags is not None else [] - - -class KernelObjectArtifact(CompilationArtifact): - def __init__(self, path, depends, extra_flags=None, rename_symbols=None): - super().__init__(path, depends) - self.extra_flags = extra_flags if extra_flags is not None else [] - self.rename_symbols = rename_symbols if rename_symbols is not None else {} - - -class KernelArchiveArtifact(CompilationArtifact): - pass - - -class PythonGeneratedMLIRArtifact(CompilationArtifact): - def __init__( - self, - path, - import_path, - callback_fn, - callback_args=None, - callback_kwargs=None, - requires_context=False, - ): - self.import_path = import_path - self.callback_fn = callback_fn - self.callback_args = callback_args if callback_args is not None else [] - self.callback_kwargs = callback_kwargs if callback_kwargs is not None else {} - self.requires_context = requires_context - super().__init__(path) - - def is_available(self): - is_available = super().is_available() - if is_available: - # Force regeneration if the Python source is changed - return os.path.getmtime(str(self.path)) >= os.path.getmtime( - self.import_path - ) - return is_available - - -# Compilation Rules -# -------------------------------------------------------------------------- - - -class CompilationRule(ABC): - @abstractmethod - def matches(self, artifact: list[CompilationArtifact]) -> bool: - pass - - @abstractmethod - def compile( - self, artifacts: list[CompilationArtifact] - ) -> list[CompilationArtifact]: - pass - - -class GenerateMLIRFromPythonCompilationRule(CompilationRule): - def matches(self, artifacts): - return any( - isinstance(artifact, PythonGeneratedMLIRArtifact) - and len(artifact.depends) == 0 - for artifact in artifacts - ) - - def compile(self, artifacts): - """Generate MLIR from a Python callback that uses the MLIR bindings""" - for i, artifact in enumerate(artifacts): - if not isinstance(artifact, PythonGeneratedMLIRArtifact): - continue - if not all(dependency.is_available() for dependency in artifact.depends): - continue - - # Import the Python source file - spec = importlib.util.spec_from_file_location( - Path(artifact.import_path).name, artifact.import_path - ) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - # We only initiate an MLIR context if requested; otherwise, it is expected that the callback creates the context - ctx_callback = lambda: ( - mlir_mod_ctx() if artifact.requires_context else nullcontext() - ) - with ctx_callback() as ctx: - callback_function = getattr(module, artifact.callback_fn) - mlir_code = callback_function( - *artifact.callback_args, **artifact.callback_kwargs - ) - # Stringify the generated MLIR - if artifact.requires_context: - mlir_code = str(ctx.module) - else: - mlir_code = str(mlir_code) - - with open(artifact.path, "w") as f: - f.write(mlir_code) - - # Now that the artifact is generated, replace this artifact with the MLIR source code file - old_users = artifact.delete() - new_artifact = SourceArtifact.new(artifact.path) - for user in old_users: - user.depends.append(new_artifact) - artifacts[i] = new_artifact - logging.debug(f"Created MLIR source string for {artifact.path.name}") - - return artifacts - - -class AieccCompilationRule(CompilationRule): - def __init__(self, build_dir, peano_dir, mlir_aie_dir, *args, **kwargs): - self.build_dir = build_dir - self.aiecc_path = Path(mlir_aie_dir) / "bin" / "aiecc.py" - self.peano_dir = peano_dir - super().__init__(*args, **kwargs) - - def matches(self, artifacts): - return any( - isinstance(artifact, (XclbinArtifact, InstsBinArtifact)) - and all(dependency.is_available() for dependency in artifact.depends) - for artifact in artifacts - ) - - def compile(self, artifacts): - # If there are both xclbin and insts.bin targets based on the same source MLIR code, we can combine them into one single `aiecc.py` invocation. - mlir_sources = set() - mlir_sources_to_xclbins = {} - mlir_sources_to_insts_bins = {} - for artifact in artifacts: - if not isinstance(artifact, (XclbinArtifact, InstsBinArtifact)): - continue - if not all(dependency.is_available() for dependency in artifact.depends): - continue - mlir_dependencies = [ - d - for d in artifact.depends - if isinstance(d, (SourceArtifact, PythonGeneratedMLIRArtifact)) - ] - if len(mlir_dependencies) != 1: - raise RuntimeError( - f"Expected exactly one dependency of {artifact.path} to be SourceArtifact or PythonGeneratedMLIRArtifact, got: {', '.join(str(dep.path) for dep in artifact.depends)}" - ) - mlir_dependency = mlir_dependencies[0] - mlir_sources.add(mlir_dependency) - if isinstance(artifact, XclbinArtifact): - mlir_sources_to_xclbins.setdefault(mlir_dependency, []).append(artifact) - elif isinstance(artifact, InstsBinArtifact): - mlir_sources_to_insts_bins.setdefault(mlir_dependency, []).append( - artifact - ) - - # Now we know for each mlir source if we need to generate an xclbin, an insts.bin or both for it - for mlir_source in mlir_sources: - # Build aiecc command using Peano - compile_cmd = [ - "python", - str(self.aiecc_path), - "--no-compile-host", - "--no-xchesscc", - "--no-xbridge", - "--peano", - str(self.peano_dir), - ] - do_compile_xclbin = mlir_source in mlir_sources_to_xclbins - do_compile_insts_bin = mlir_source in mlir_sources_to_insts_bins - if do_compile_xclbin: - first_xclbin = mlir_sources_to_xclbins[mlir_source][ - 0 - ] # FIXME: this does not handle the case of multiple xclbins with different kernel names or flags from the same MLIR - compile_cmd += first_xclbin.extra_flags + [ - "--aie-generate-xclbin", - "--xclbin-name=" + str(first_xclbin.path), - "--xclbin-kernel-name=" + first_xclbin.kernel_name, - ] - if first_xclbin.xclbin_input is not None: - compile_cmd += [ - "--xclbin-input=" + str(first_xclbin.xclbin_input.path) - ] - if do_compile_insts_bin: - first_insts_bin = mlir_sources_to_insts_bins[mlir_source][ - 0 - ] # FIXME: this does not handle the case of multiple insts.bins with different flags from the same MLIR - if not do_compile_xclbin: - compile_cmd += ["--no-compile"] - compile_cmd += first_insts_bin.extra_flags + [ - "--aie-generate-npu", - "--npu-insts-name=" + str(first_insts_bin.path), - ] - compile_cmd += [str(mlir_source.path)] - - env = os.environ.copy() - logging.debug(f"Compiling MLIR with command: {' '.join(compile_cmd)}") - result = subprocess.run( - compile_cmd, - cwd=str(self.build_dir), - capture_output=True, - text=True, - timeout=300, - env=env, - ) - if result.returncode == 0: - logging.debug( - f"Successfully compiled {mlir_source.path} to {', '.join([str(first_xclbin.path)] if do_compile_xclbin else [] + [str(first_insts_bin.path)] if do_compile_insts_bin else [])}" - ) - else: - raise RuntimeError( - f"MLIR compilation for {mlir_source.path} failed: {result.stderr}" - ) - - # There may be multiple targets that require an xclbin/insts.bin from the same MLIR with different names; copy them - for sources_to in [mlir_sources_to_xclbins, mlir_sources_to_insts_bins]: - if sources_to.get(mlir_source, [])[1:]: - copy_src = sources_to[mlir_source][0] - for copy_dest in sources_to[mlir_source][1:]: - shutil.copy(copy_src.path, copy_dest.path) - - # With the newly generated files, is_available() should now return True on the Xclbin and InstsBin targets - return artifacts - - -class PeanoCompilationRule(CompilationRule): - def __init__(self, peano_dir, mlir_aie_dir, *args, **kwargs): - self.peano_dir = peano_dir - self.mlir_aie_dir = mlir_aie_dir - super().__init__(*args, **kwargs) - - def matches(self, artifacts): - return any( - isinstance(artifact, KernelObjectArtifact) - and all( - isinstance(dependency, SourceArtifact) and dependency.is_available() - for dependency in artifact.depends - ) - for artifact in artifacts - ) - - def compile(self, artifacts): - clang_path = Path(self.peano_dir) / "bin" / "clang++" - include_path = Path(self.mlir_aie_dir) / "include" - - for artifact in artifacts: - if not isinstance(artifact, KernelObjectArtifact): - continue - - if len(artifact.depends) != 1: - raise RuntimeError( - "Expected exactly one dependency (the C source code) for KernelObjectArtifact" - ) - source_file = artifact.depends[0] - if not isinstance(source_file, SourceArtifact): - raise RuntimeError( - "Expected KernelObject dependency to be a C source file" - ) - - cmd = ( - [ - str(clang_path), - "-O2", - "-std=c++20", - "--target=aie2p-none-unknown-elf", - "-Wno-parentheses", - "-Wno-attributes", - "-Wno-macro-redefined", - "-Wno-empty-body", - "-Wno-missing-template-arg-list-after-template-kw", - f"-I{str(include_path)}", - ] - + artifact.extra_flags - + ["-c", str(source_file.path), "-o", str(artifact.path)] - ) - logging.debug(f"Running compilation command: {' '.join(cmd)}") - result = subprocess.run(cmd, capture_output=True, text=True) - if result.returncode != 0: - raise RuntimeError(f"Compilation failed: {result.stderr}") - logging.debug(f"Successfully compiled: {artifact.path.name}") - - if artifact.rename_symbols: - self._rename_symbols(artifact) - - return artifacts - - def _rename_symbols(self, artifact): - objcopy_path = "llvm-objcopy-18" - cmd = [ - objcopy_path, - ] - for old_sym, new_sym in artifact.rename_symbols.items(): - cmd += [ - "--redefine-sym", - f"{old_sym}={new_sym}", - ] - cmd += [str(artifact.path)] - - logging.debug(f"Running renaming command: {' '.join(cmd)}") - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode == 0: - logging.debug(f"Successfully renamed symbols in: {artifact.path.name}") - else: - raise RuntimeError(f"Symbol renaming failed: {result.stderr}") - - -class ArchiveCompilationRule(CompilationRule): - def __init__(self, peano_dir, *args, **kwargs): - self.peano_dir = peano_dir - super().__init__(*args, **kwargs) - - def matches(self, artifacts): - return any( - isinstance(artifact, KernelArchiveArtifact) and len(artifact.depends) > 0 - for artifact in artifacts - ) - - def compile(self, artifacts): - """Create an archive (.a) from compiled object files""" - for artifact in artifacts: - if not isinstance(artifact, KernelArchiveArtifact): - continue - - # Get archive filename from method - archive_path = str(artifact.path) - object_files = [ - str(dep.path) - for dep in artifact.depends - if isinstance(dep, KernelObjectArtifact) - ] - - # Try to find ar tool from PEANO, then system - ar_path = None - - if self.peano_dir: - # Peano has llvm-ar for archiving - peano_ar = Path(self.peano_dir) / "bin" / "llvm-ar" - if os.path.exists(peano_ar): - ar_path = peano_ar - - if ar_path is None: - raise RuntimeError( - "Could not find 'ar' tool in PEANO installation or system PATH" - ) - - cmd = [str(ar_path), "rcs", archive_path] + object_files - - result = subprocess.run(cmd, capture_output=True, text=True) - - if result.returncode == 0: - logging.debug( - f"Successfully created archive: {Path(archive_path).name}" - ) - else: - raise RuntimeError(f"Archive creation failed: {result.stderr}") - - return artifacts - - -# Global Functions -# -------------------------------------------------------------------------- - - -def apply_rules(rules, artifacts): - for rule in rules: - if rule.matches(artifacts): - logging.debug(f"Applying rule: {rule.__class__.__name__}") - artifacts = rule.compile(artifacts) - break - else: - # None of the rules matched - return False, artifacts - - return True, artifacts - - -def compile(rules, artifacts): - # While some artifacts remain to be compiled (not all are available) - while not all(artifact.is_available() for artifact in artifacts): - remaining = [artifact for artifact in artifacts if not artifact.is_available()] - success, artifacts = apply_rules(rules, remaining) - if not success: - raise RuntimeError( - f"No matching rule to compile target(s): {', '.join(str(artifact.path.name) for artifact in artifacts if not artifact.is_available())}" - ) - return artifacts - - -def get_work_list(artifacts): - """ - Return a flattened artifact creation worklist in reverse topological order from dependencies. - The returned list will start with leaf nodes (artifacts with no dependencies), and any following artifacts will only contain artifacts from earlier in the list. - """ - work_list = [] - todo = list(artifacts) - visited = set() - - def dfs_visit(artifact): - if artifact in visited: - # Thanks to uniquing of artifact objects, this avoids duplicate creation of the same artifacts - return - visited.add(artifact) - # First visit all dependencies, so put leaves first (post-order) ... - for dep in artifact.depends: - dfs_visit(dep) - # ... then put parent - if not artifact.is_available(): - work_list.append(artifact) - - for artifact in todo: - dfs_visit(artifact) - - return work_list diff --git a/conftest.py b/conftest.py index f3140421..2f4ab726 100644 --- a/conftest.py +++ b/conftest.py @@ -10,8 +10,7 @@ import sys import statistics -sys.path.insert(0, str(Path(__file__).parent)) -from operators.common import AIEContext +from iron.common import AIEContext @pytest.fixture @@ -124,7 +123,7 @@ def pytest_runtest_makereport(item, call): csv_reporter = item.session.config._csv_reporter if csv_reporter: # The pytest nodeid looks like this: - # operators/dequant/test.py::test_dequant[iter0-dequant_8_cols_2_channels_2048_tile_128] + # iron/operators/dequant/test.py::test_dequant[iter0-dequant_8_cols_2_channels_2048_tile_128] # Extract only the stem out of that. nodeid_components = re.match( r"^(.+?)::(.+?)\[(iter\d+-)?(.+?)\]$", item.nodeid diff --git a/iron/__init__.py b/iron/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/applications/llama_3.2_1b/README.md b/iron/applications/llama_3.2_1b/README.md similarity index 100% rename from applications/llama_3.2_1b/README.md rename to iron/applications/llama_3.2_1b/README.md diff --git a/applications/llama_3.2_1b/analyze_profile.py b/iron/applications/llama_3.2_1b/analyze_profile.py similarity index 100% rename from applications/llama_3.2_1b/analyze_profile.py rename to iron/applications/llama_3.2_1b/analyze_profile.py diff --git a/applications/llama_3.2_1b/configs/llama32_1b.json b/iron/applications/llama_3.2_1b/configs/llama32_1b.json similarity index 100% rename from applications/llama_3.2_1b/configs/llama32_1b.json rename to iron/applications/llama_3.2_1b/configs/llama32_1b.json diff --git a/applications/llama_3.2_1b/configs/llama32_1b.json.license b/iron/applications/llama_3.2_1b/configs/llama32_1b.json.license similarity index 100% rename from applications/llama_3.2_1b/configs/llama32_1b.json.license rename to iron/applications/llama_3.2_1b/configs/llama32_1b.json.license diff --git a/applications/llama_3.2_1b/inference.py b/iron/applications/llama_3.2_1b/inference.py similarity index 98% rename from applications/llama_3.2_1b/inference.py rename to iron/applications/llama_3.2_1b/inference.py index a2408d47..8109c543 100755 --- a/applications/llama_3.2_1b/inference.py +++ b/iron/applications/llama_3.2_1b/inference.py @@ -10,10 +10,6 @@ import sys from pathlib import Path -# Add IRON repository root to Python path -repo_root = Path(__file__).parent.parent.parent -sys.path.insert(0, str(repo_root)) - import argparse import time import torch @@ -27,7 +23,7 @@ import logging from collections import deque -from operators.common import AIEOperatorBase +from iron.common import AIEOperatorBase from src.utils import ( model_memory_size, load_weights_into_llama, diff --git a/applications/llama_3.2_1b/prompt.txt b/iron/applications/llama_3.2_1b/prompt.txt similarity index 100% rename from applications/llama_3.2_1b/prompt.txt rename to iron/applications/llama_3.2_1b/prompt.txt diff --git a/applications/llama_3.2_1b/src/block/feed_forward.py b/iron/applications/llama_3.2_1b/src/block/feed_forward.py similarity index 99% rename from applications/llama_3.2_1b/src/block/feed_forward.py rename to iron/applications/llama_3.2_1b/src/block/feed_forward.py index 85305ad3..8bae36ec 100644 --- a/applications/llama_3.2_1b/src/block/feed_forward.py +++ b/iron/applications/llama_3.2_1b/src/block/feed_forward.py @@ -9,7 +9,7 @@ import torch import torch.nn as nn from ..utils import assign -from operators import ( +from iron.operators import ( AIEElementwiseMul, AIEGEMM, AIEGEMV, diff --git a/applications/llama_3.2_1b/src/block/gqa.py b/iron/applications/llama_3.2_1b/src/block/gqa.py similarity index 99% rename from applications/llama_3.2_1b/src/block/gqa.py rename to iron/applications/llama_3.2_1b/src/block/gqa.py index 05566814..1a712ff9 100644 --- a/applications/llama_3.2_1b/src/block/gqa.py +++ b/iron/applications/llama_3.2_1b/src/block/gqa.py @@ -10,8 +10,8 @@ import torch import torch.nn as nn -from operators import AIERope, AIESoftmax, AIEMHA, AIEGEMM, AIEGEMV -from operators.rope.rope_utils import apply_rope +from iron.operators import AIERope, AIESoftmax, AIEMHA, AIEGEMM, AIEGEMV +from iron.operators.rope.rope_utils import apply_rope from torchtune.modules import KVCache diff --git a/applications/llama_3.2_1b/src/block/transformer.py b/iron/applications/llama_3.2_1b/src/block/transformer.py similarity index 99% rename from applications/llama_3.2_1b/src/block/transformer.py rename to iron/applications/llama_3.2_1b/src/block/transformer.py index 42a48146..f2b46cdf 100644 --- a/applications/llama_3.2_1b/src/block/transformer.py +++ b/iron/applications/llama_3.2_1b/src/block/transformer.py @@ -11,7 +11,7 @@ from ..utils import assign from src.block.gqa import GroupedQueryAttention from src.block.feed_forward import FeedForward -from operators import AIERMSNorm, AIEElementwiseAdd +from iron.operators import AIERMSNorm, AIEElementwiseAdd class TransformerBlock(nn.Module): diff --git a/applications/llama_3.2_1b/src/model_with_json.py b/iron/applications/llama_3.2_1b/src/model_with_json.py similarity index 99% rename from applications/llama_3.2_1b/src/model_with_json.py rename to iron/applications/llama_3.2_1b/src/model_with_json.py index 9984e0a3..856fb048 100644 --- a/applications/llama_3.2_1b/src/model_with_json.py +++ b/iron/applications/llama_3.2_1b/src/model_with_json.py @@ -11,8 +11,8 @@ import json from pathlib import Path from src.block.transformer import TransformerBlock -from operators.rope.rope_utils import compute_rope_params -from operators import AIERMSNorm, AIEGEMM, AIEGEMV +from iron.operators.rope.rope_utils import compute_rope_params +from iron.operators import AIERMSNorm, AIEGEMM, AIEGEMV from rich.console import Console from rich.text import Text diff --git a/applications/llama_3.2_1b/src/tokenizer.py b/iron/applications/llama_3.2_1b/src/tokenizer.py similarity index 100% rename from applications/llama_3.2_1b/src/tokenizer.py rename to iron/applications/llama_3.2_1b/src/tokenizer.py diff --git a/applications/llama_3.2_1b/src/utils.py b/iron/applications/llama_3.2_1b/src/utils.py similarity index 100% rename from applications/llama_3.2_1b/src/utils.py rename to iron/applications/llama_3.2_1b/src/utils.py diff --git a/applications/llama_3.2_1b/test.py b/iron/applications/llama_3.2_1b/test.py similarity index 100% rename from applications/llama_3.2_1b/test.py rename to iron/applications/llama_3.2_1b/test.py diff --git a/applications/llama_3.2_1b/torch_to_npy.py b/iron/applications/llama_3.2_1b/torch_to_npy.py similarity index 100% rename from applications/llama_3.2_1b/torch_to_npy.py rename to iron/applications/llama_3.2_1b/torch_to_npy.py diff --git a/operators/common/__init__.py b/iron/common/__init__.py similarity index 100% rename from operators/common/__init__.py rename to iron/common/__init__.py diff --git a/operators/common/aie_base.py b/iron/common/aie_base.py similarity index 100% rename from operators/common/aie_base.py rename to iron/common/aie_base.py diff --git a/operators/common/aie_context.py b/iron/common/aie_context.py similarity index 89% rename from operators/common/aie_context.py rename to iron/common/aie_context.py index 0a76132a..804499f6 100644 --- a/operators/common/aie_context.py +++ b/iron/common/aie_context.py @@ -75,26 +75,14 @@ def prepare_runtime(self): # Set up kernels for kernel_name, (xclbin, xclbin_kernel_name, insts) in op.kernels.items(): - context, xrt_kernel = self.device_manager.get_context_and_kernel( - str(xclbin.path), xclbin_kernel_name + handle = self.device_manager.get_kernel_handle( + str(xclbin.path), xclbin_kernel_name, str(insts.path) ) - with open(str(insts.path), "rb") as f: - instructions = np.frombuffer(f.read(), dtype=np.uint32) - logging.debug( - f"Allocating instruction buffer for {len(instructions)} instructions." - ) - insts_bo = pyxrt.bo( - self.device_manager.device, - instructions.nbytes, - pyxrt.bo.cacheable, - xrt_kernel.group_id(1), - ) - insts_bo.write(instructions.view(np.uint8), 0) op.xrt_kernels[kernel_name] = ( - context, - xrt_kernel, - insts_bo, - len(instructions), + handle.context, + handle.kernel, + handle.insts_bo, + len(handle.insts), ) # If multiple buffers (of the same binned size) are used in the @@ -174,12 +162,13 @@ def prepare_runtime(self): op.buffer_bos[buffer_name] = bo_pools[alloc_pool][alloc_idx] # Setup runlist - _, (first_xclbin, first_xclbin_kernel_name, _) = next( + _, (first_xclbin, first_xclbin_kernel_name, first_insts) = next( iter(op.kernels.items()) ) - context, _ = self.device_manager.get_context_and_kernel( - str(first_xclbin.path), first_xclbin_kernel_name + handle = self.device_manager.get_kernel_handle( + str(first_xclbin.path), first_xclbin_kernel_name, str(first_insts.path) ) + context = handle.context if self.use_runlist: op.xrt_runlist = pyxrt.runlist(context) for i, (kernel_name, *buffer_args) in enumerate(op.runlist): diff --git a/iron/common/aie_device_manager.py b/iron/common/aie_device_manager.py new file mode 100644 index 00000000..fda4d0cb --- /dev/null +++ b/iron/common/aie_device_manager.py @@ -0,0 +1,53 @@ +# SPDX-FileCopyrightText: Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Global AIE Device Manager for resource sharing and cleanup +""" + +import logging +import os +import sys +from pathlib import Path +from typing import Dict, Optional, Any +import pyxrt +from aie.utils import DefaultNPURuntime +from aie.utils.npukernel import NPUKernel +from aie.iron.device import NPU1, NPU2 + + +class AIEDeviceManager: + """Singleton manager for AIE XRT resources""" + + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + self.runtime = DefaultNPURuntime + # Expose device for AIEContext buffer allocation + # Accessing protected member _device as AIEContext needs pyxrt.device + self.device = self.runtime._device + self.device_type = self.runtime.device() + + def get_kernel_handle(self, xclbin_path: str, kernel_name: str, insts_path: str): + """Get kernel handle using HostRuntime""" + npu_kernel = NPUKernel( + xclbin_path=xclbin_path, insts_path=insts_path, kernel_name=kernel_name + ) + return self.runtime.load(npu_kernel) + + def device_str(self) -> str: + return self.device_type.resolve().name + + def cleanup(self): + """Clean up all XRT resources""" + # HostRuntime handles cleanup + pass + + def reset(self): + """Reset the device manager (for debugging)""" + pass diff --git a/operators/common/compilation.py b/iron/common/compilation.py similarity index 99% rename from operators/common/compilation.py rename to iron/common/compilation.py index f3f51344..2cbaa916 100644 --- a/operators/common/compilation.py +++ b/iron/common/compilation.py @@ -39,7 +39,6 @@ from contextlib import nullcontext from aie.extras.context import mlir_mod_ctx - # Compilation Artifacts # -------------------------------------------------------------------------- diff --git a/operators/common/test_utils.py b/iron/common/test_utils.py similarity index 100% rename from operators/common/test_utils.py rename to iron/common/test_utils.py diff --git a/operators/common/utils.py b/iron/common/utils.py similarity index 99% rename from operators/common/utils.py rename to iron/common/utils.py index f7834e7f..9037fbd8 100644 --- a/operators/common/utils.py +++ b/iron/common/utils.py @@ -11,7 +11,6 @@ import numpy as np from ml_dtypes import bfloat16 - torch_dtype_map = { "bf16": torch.bfloat16, "f32": torch.float32, diff --git a/operators/__init__.py b/iron/operators/__init__.py similarity index 100% rename from operators/__init__.py rename to iron/operators/__init__.py diff --git a/operators/axpy/design.py b/iron/operators/axpy/design.py similarity index 100% rename from operators/axpy/design.py rename to iron/operators/axpy/design.py diff --git a/operators/axpy/op.py b/iron/operators/axpy/op.py similarity index 99% rename from operators/axpy/op.py rename to iron/operators/axpy/op.py index a64c8b85..ce1702c6 100644 --- a/operators/axpy/op.py +++ b/iron/operators/axpy/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/axpy/reference.py b/iron/operators/axpy/reference.py similarity index 92% rename from operators/axpy/reference.py rename to iron/operators/axpy/reference.py index 18de139b..c8ade0f0 100644 --- a/operators/axpy/reference.py +++ b/iron/operators/axpy/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(input_length: int, scalar=3.0, dtype="bf16", seed=42): diff --git a/operators/axpy/test.py b/iron/operators/axpy/test.py similarity index 92% rename from operators/axpy/test.py rename to iron/operators/axpy/test.py index 2962b504..b91e802f 100755 --- a/operators/axpy/test.py +++ b/iron/operators/axpy/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.axpy.op import AIEAXPY -from operators.axpy.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.axpy.op import AIEAXPY +from iron.operators.axpy.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/dequant/design.py b/iron/operators/dequant/design.py similarity index 100% rename from operators/dequant/design.py rename to iron/operators/dequant/design.py diff --git a/operators/dequant/op.py b/iron/operators/dequant/op.py similarity index 99% rename from operators/dequant/op.py rename to iron/operators/dequant/op.py index f235cbdd..d4aeab8a 100644 --- a/operators/dequant/op.py +++ b/iron/operators/dequant/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/dequant/reference.py b/iron/operators/dequant/reference.py similarity index 99% rename from operators/dequant/reference.py rename to iron/operators/dequant/reference.py index a0a853ce..69a736ef 100644 --- a/operators/dequant/reference.py +++ b/iron/operators/dequant/reference.py @@ -5,7 +5,6 @@ import numpy as np from ml_dtypes import bfloat16 - tensor_type_to_quant = {torch.uint8: torch.quint8} diff --git a/operators/dequant/test.py b/iron/operators/dequant/test.py similarity index 92% rename from operators/dequant/test.py rename to iron/operators/dequant/test.py index 8b33dcb8..03b037f4 100644 --- a/operators/dequant/test.py +++ b/iron/operators/dequant/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.dequant.op import AIEDequant -from operators.dequant.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.dequant.op import AIEDequant +from iron.operators.dequant.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/elementwise_add/design.py b/iron/operators/elementwise_add/design.py similarity index 100% rename from operators/elementwise_add/design.py rename to iron/operators/elementwise_add/design.py diff --git a/operators/elementwise_add/op.py b/iron/operators/elementwise_add/op.py similarity index 99% rename from operators/elementwise_add/op.py rename to iron/operators/elementwise_add/op.py index 3521c3c2..d1963723 100644 --- a/operators/elementwise_add/op.py +++ b/iron/operators/elementwise_add/op.py @@ -7,7 +7,7 @@ import logging from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/elementwise_add/reference.py b/iron/operators/elementwise_add/reference.py similarity index 91% rename from operators/elementwise_add/reference.py rename to iron/operators/elementwise_add/reference.py index 8b5e7bfd..32d995b7 100644 --- a/operators/elementwise_add/reference.py +++ b/iron/operators/elementwise_add/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(input_length: int, dtype="bf16", seed=42): diff --git a/operators/elementwise_add/test.py b/iron/operators/elementwise_add/test.py similarity index 90% rename from operators/elementwise_add/test.py rename to iron/operators/elementwise_add/test.py index 1fdec179..781265f5 100755 --- a/operators/elementwise_add/test.py +++ b/iron/operators/elementwise_add/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.elementwise_add.op import AIEElementwiseAdd -from operators.elementwise_add.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.elementwise_add.op import AIEElementwiseAdd +from iron.operators.elementwise_add.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/elementwise_mul/design.py b/iron/operators/elementwise_mul/design.py similarity index 100% rename from operators/elementwise_mul/design.py rename to iron/operators/elementwise_mul/design.py diff --git a/operators/elementwise_mul/op.py b/iron/operators/elementwise_mul/op.py similarity index 99% rename from operators/elementwise_mul/op.py rename to iron/operators/elementwise_mul/op.py index 954391b8..60113341 100644 --- a/operators/elementwise_mul/op.py +++ b/iron/operators/elementwise_mul/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/elementwise_mul/reference.py b/iron/operators/elementwise_mul/reference.py similarity index 91% rename from operators/elementwise_mul/reference.py rename to iron/operators/elementwise_mul/reference.py index ee2f154a..76778096 100644 --- a/operators/elementwise_mul/reference.py +++ b/iron/operators/elementwise_mul/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(input_length: int, dtype="bf16", seed=42): diff --git a/operators/elementwise_mul/test.py b/iron/operators/elementwise_mul/test.py similarity index 91% rename from operators/elementwise_mul/test.py rename to iron/operators/elementwise_mul/test.py index 1a3f762d..2c92d288 100755 --- a/operators/elementwise_mul/test.py +++ b/iron/operators/elementwise_mul/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.elementwise_mul.op import AIEElementwiseMul -from operators.elementwise_mul.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.elementwise_mul.op import AIEElementwiseMul +from iron.operators.elementwise_mul.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/gelu/design.py b/iron/operators/gelu/design.py similarity index 100% rename from operators/gelu/design.py rename to iron/operators/gelu/design.py diff --git a/operators/gelu/op.py b/iron/operators/gelu/op.py similarity index 99% rename from operators/gelu/op.py rename to iron/operators/gelu/op.py index 8217017b..86fea435 100644 --- a/operators/gelu/op.py +++ b/iron/operators/gelu/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/gelu/reference.py b/iron/operators/gelu/reference.py similarity index 90% rename from operators/gelu/reference.py rename to iron/operators/gelu/reference.py index 359a8660..a77cce60 100644 --- a/operators/gelu/reference.py +++ b/iron/operators/gelu/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(input_length: int, dtype="bf16", seed=42): diff --git a/operators/gelu/test.py b/iron/operators/gelu/test.py similarity index 92% rename from operators/gelu/test.py rename to iron/operators/gelu/test.py index 4f6f0972..d91a9e7a 100755 --- a/operators/gelu/test.py +++ b/iron/operators/gelu/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.gelu.op import AIEGELU -from operators.gelu.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.gelu.op import AIEGELU +from iron.operators.gelu.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/gemm/design.py b/iron/operators/gemm/design.py similarity index 99% rename from operators/gemm/design.py rename to iron/operators/gemm/design.py index 432474cf..6ea439d5 100644 --- a/operators/gemm/design.py +++ b/iron/operators/gemm/design.py @@ -23,7 +23,6 @@ from aie.helpers.taplib import TensorAccessSequence, TensorTiler2D, TensorAccessPattern from aie.iron.controlflow import range_ - microkernel_mac_dim_map = { "npu": { "bf16": (4, 8, 4), diff --git a/operators/gemm/op.py b/iron/operators/gemm/op.py similarity index 99% rename from operators/gemm/op.py rename to iron/operators/gemm/op.py index 9201eb2c..007e46b3 100644 --- a/operators/gemm/op.py +++ b/iron/operators/gemm/op.py @@ -7,7 +7,7 @@ import logging from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, @@ -18,7 +18,7 @@ PythonGeneratedMLIRArtifact, ) -from operators.common.utils import torch_to_numpy, numpy_to_torch +from iron.common.utils import torch_to_numpy, numpy_to_torch class AIEGEMM(AIEOperatorBase): diff --git a/operators/gemm/reference.py b/iron/operators/gemm/reference.py similarity index 97% rename from operators/gemm/reference.py rename to iron/operators/gemm/reference.py index 093f3ab8..2cb15ab5 100644 --- a/operators/gemm/reference.py +++ b/iron/operators/gemm/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference( diff --git a/operators/gemm/test.py b/iron/operators/gemm/test.py similarity index 96% rename from operators/gemm/test.py rename to iron/operators/gemm/test.py index 4c4062a7..6480aeff 100755 --- a/operators/gemm/test.py +++ b/iron/operators/gemm/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.gemm.op import AIEGEMM -from operators.gemm.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.gemm.op import AIEGEMM +from iron.operators.gemm.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/gemv/design.py b/iron/operators/gemv/design.py similarity index 99% rename from operators/gemv/design.py rename to iron/operators/gemv/design.py index c4354518..0a153364 100644 --- a/operators/gemv/design.py +++ b/iron/operators/gemv/design.py @@ -12,13 +12,12 @@ import aie.dialects.memref as memref from aie.dialects.aie import * from aie.dialects.aiex import * -from aie.helpers.dialects.ext.scf import _for as range_ +from aie.helpers.dialects.scf import _for as range_ from aie.helpers.util import try_convert_np_type_to_mlir_type from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 - """ Matrix-vector design diff --git a/operators/gemv/op.py b/iron/operators/gemv/op.py similarity index 98% rename from operators/gemv/op.py rename to iron/operators/gemv/op.py index b86088d4..6ed5a9fe 100644 --- a/operators/gemv/op.py +++ b/iron/operators/gemv/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, @@ -16,7 +16,7 @@ SourceArtifact, PythonGeneratedMLIRArtifact, ) -from operators.common.utils import torch_to_numpy +from iron.common.utils import torch_to_numpy class AIEGEMV(AIEOperatorBase): diff --git a/operators/gemv/reference.py b/iron/operators/gemv/reference.py similarity index 100% rename from operators/gemv/reference.py rename to iron/operators/gemv/reference.py diff --git a/operators/gemv/test.py b/iron/operators/gemv/test.py similarity index 92% rename from operators/gemv/test.py rename to iron/operators/gemv/test.py index 1c3f07e4..2dd4a8e6 100755 --- a/operators/gemv/test.py +++ b/iron/operators/gemv/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.gemv.op import AIEGEMV -from operators.gemv.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.gemv.op import AIEGEMV +from iron.operators.gemv.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/layer_norm/design.py b/iron/operators/layer_norm/design.py similarity index 100% rename from operators/layer_norm/design.py rename to iron/operators/layer_norm/design.py diff --git a/operators/layer_norm/op.py b/iron/operators/layer_norm/op.py similarity index 99% rename from operators/layer_norm/op.py rename to iron/operators/layer_norm/op.py index 36fb5256..cc3c1aa2 100644 --- a/operators/layer_norm/op.py +++ b/iron/operators/layer_norm/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/layer_norm/reference.py b/iron/operators/layer_norm/reference.py similarity index 91% rename from operators/layer_norm/reference.py rename to iron/operators/layer_norm/reference.py index 5ea77533..2d1627a8 100644 --- a/operators/layer_norm/reference.py +++ b/iron/operators/layer_norm/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(rows: int, cols: int, dtype="bf16", seed=42): diff --git a/operators/layer_norm/test.py b/iron/operators/layer_norm/test.py similarity index 92% rename from operators/layer_norm/test.py rename to iron/operators/layer_norm/test.py index 106ddf1a..2b14641c 100755 --- a/operators/layer_norm/test.py +++ b/iron/operators/layer_norm/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.layer_norm.op import AIELayerNorm -from operators.layer_norm.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.layer_norm.op import AIELayerNorm +from iron.operators.layer_norm.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/leaky_relu/design.py b/iron/operators/leaky_relu/design.py similarity index 100% rename from operators/leaky_relu/design.py rename to iron/operators/leaky_relu/design.py diff --git a/operators/leaky_relu/op.py b/iron/operators/leaky_relu/op.py similarity index 99% rename from operators/leaky_relu/op.py rename to iron/operators/leaky_relu/op.py index e9cbc413..e26fc368 100644 --- a/operators/leaky_relu/op.py +++ b/iron/operators/leaky_relu/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/leaky_relu/reference.py b/iron/operators/leaky_relu/reference.py similarity index 91% rename from operators/leaky_relu/reference.py rename to iron/operators/leaky_relu/reference.py index f225577f..ed0c5807 100644 --- a/operators/leaky_relu/reference.py +++ b/iron/operators/leaky_relu/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(input_length: int, alpha=0.01, dtype="bf16", seed=42): diff --git a/operators/leaky_relu/test.py b/iron/operators/leaky_relu/test.py similarity index 89% rename from operators/leaky_relu/test.py rename to iron/operators/leaky_relu/test.py index 09a50af1..cac577ad 100755 --- a/operators/leaky_relu/test.py +++ b/iron/operators/leaky_relu/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.leaky_relu.op import AIELeakyReLU -from operators.leaky_relu.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.leaky_relu.op import AIELeakyReLU +from iron.operators.leaky_relu.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/mem_copy/design.py b/iron/operators/mem_copy/design.py similarity index 100% rename from operators/mem_copy/design.py rename to iron/operators/mem_copy/design.py diff --git a/operators/mem_copy/op.py b/iron/operators/mem_copy/op.py similarity index 99% rename from operators/mem_copy/op.py rename to iron/operators/mem_copy/op.py index 80bda8be..c5c9f14e 100644 --- a/operators/mem_copy/op.py +++ b/iron/operators/mem_copy/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/mem_copy/reference.py b/iron/operators/mem_copy/reference.py similarity index 100% rename from operators/mem_copy/reference.py rename to iron/operators/mem_copy/reference.py diff --git a/operators/mem_copy/test.py b/iron/operators/mem_copy/test.py similarity index 93% rename from operators/mem_copy/test.py rename to iron/operators/mem_copy/test.py index 94672f3b..afd7f540 100644 --- a/operators/mem_copy/test.py +++ b/iron/operators/mem_copy/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.mem_copy.op import AIEMemCopy -from operators.mem_copy.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.mem_copy.op import AIEMemCopy +from iron.operators.mem_copy.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/mha/design.py b/iron/operators/mha/design.py similarity index 99% rename from operators/mha/design.py rename to iron/operators/mha/design.py index 0f9fdf4e..d11e4ed4 100644 --- a/operators/mha/design.py +++ b/iron/operators/mha/design.py @@ -24,7 +24,7 @@ from aie.iron.device import NPU1Col1, NPU2, Tile from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorTiler2D, TensorAccessSequence, TensorAccessPattern -from aie.helpers.dialects.ext.scf import if_, else_ +from aie.helpers.dialects.scf import if_, else_ base_dir = Path(__file__).parent diff --git a/operators/mha/op.py b/iron/operators/mha/op.py similarity index 98% rename from operators/mha/op.py rename to iron/operators/mha/op.py index 463a3062..58864519 100644 --- a/operators/mha/op.py +++ b/iron/operators/mha/op.py @@ -7,7 +7,7 @@ from pathlib import Path from typing import Dict, List -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, @@ -17,7 +17,7 @@ SourceArtifact, PythonGeneratedMLIRArtifact, ) -from operators.common.utils import torch_to_numpy, numpy_to_torch +from iron.common.utils import torch_to_numpy, numpy_to_torch class AIEMHA(AIEOperatorBase): diff --git a/operators/mha/reference.py b/iron/operators/mha/reference.py similarity index 100% rename from operators/mha/reference.py rename to iron/operators/mha/reference.py diff --git a/operators/mha/test.py b/iron/operators/mha/test.py similarity index 91% rename from operators/mha/test.py rename to iron/operators/mha/test.py index 11a4140c..35c5087f 100755 --- a/operators/mha/test.py +++ b/iron/operators/mha/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.mha.op import AIEMHA -from operators.mha.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.mha.op import AIEMHA +from iron.operators.mha.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/relu/design.py b/iron/operators/relu/design.py similarity index 100% rename from operators/relu/design.py rename to iron/operators/relu/design.py diff --git a/operators/relu/op.py b/iron/operators/relu/op.py similarity index 99% rename from operators/relu/op.py rename to iron/operators/relu/op.py index dfab584b..8b1f54e8 100644 --- a/operators/relu/op.py +++ b/iron/operators/relu/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/relu/reference.py b/iron/operators/relu/reference.py similarity index 90% rename from operators/relu/reference.py rename to iron/operators/relu/reference.py index bdd18d5e..c2e748b8 100644 --- a/operators/relu/reference.py +++ b/iron/operators/relu/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(input_length: int, dtype="bf16", seed=42): diff --git a/operators/relu/test.py b/iron/operators/relu/test.py similarity index 92% rename from operators/relu/test.py rename to iron/operators/relu/test.py index 2981991c..3194c8c0 100755 --- a/operators/relu/test.py +++ b/iron/operators/relu/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.relu.op import AIEReLU -from operators.relu.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.relu.op import AIEReLU +from iron.operators.relu.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/rms_norm/design.py b/iron/operators/rms_norm/design.py similarity index 100% rename from operators/rms_norm/design.py rename to iron/operators/rms_norm/design.py diff --git a/operators/rms_norm/design_weighted.py b/iron/operators/rms_norm/design_weighted.py similarity index 100% rename from operators/rms_norm/design_weighted.py rename to iron/operators/rms_norm/design_weighted.py diff --git a/operators/rms_norm/op.py b/iron/operators/rms_norm/op.py similarity index 98% rename from operators/rms_norm/op.py rename to iron/operators/rms_norm/op.py index 4553fa6a..1ba38d92 100644 --- a/operators/rms_norm/op.py +++ b/iron/operators/rms_norm/op.py @@ -7,7 +7,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, @@ -17,7 +17,7 @@ SourceArtifact, PythonGeneratedMLIRArtifact, ) -from operators.common.utils import torch_to_numpy +from iron.common.utils import torch_to_numpy class AIERMSNorm(AIEOperatorBase): diff --git a/operators/rms_norm/reference.py b/iron/operators/rms_norm/reference.py similarity index 93% rename from operators/rms_norm/reference.py rename to iron/operators/rms_norm/reference.py index ca6dbbab..5014fbae 100644 --- a/operators/rms_norm/reference.py +++ b/iron/operators/rms_norm/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference( diff --git a/operators/rms_norm/test.py b/iron/operators/rms_norm/test.py similarity index 94% rename from operators/rms_norm/test.py rename to iron/operators/rms_norm/test.py index 672e42fc..e6dd012d 100755 --- a/operators/rms_norm/test.py +++ b/iron/operators/rms_norm/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.rms_norm.op import AIERMSNorm -from operators.rms_norm.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.rms_norm.op import AIERMSNorm +from iron.operators.rms_norm.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/rope/README.md b/iron/operators/rope/README.md similarity index 100% rename from operators/rope/README.md rename to iron/operators/rope/README.md diff --git a/operators/rope/design.py b/iron/operators/rope/design.py similarity index 99% rename from operators/rope/design.py rename to iron/operators/rope/design.py index 780e52fa..f1082bdd 100644 --- a/operators/rope/design.py +++ b/iron/operators/rope/design.py @@ -11,10 +11,9 @@ from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern -from aie.helpers.dialects.ext.scf import _for as range_ +from aie.helpers.dialects.scf import _for as range_ from ml_dtypes import bfloat16 - """ Rotary Positional Encoding (RoPE) design diff --git a/operators/rope/op.py b/iron/operators/rope/op.py similarity index 99% rename from operators/rope/op.py rename to iron/operators/rope/op.py index 7bd0f091..be8e7f95 100644 --- a/operators/rope/op.py +++ b/iron/operators/rope/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/rope/reference.py b/iron/operators/rope/reference.py similarity index 100% rename from operators/rope/reference.py rename to iron/operators/rope/reference.py diff --git a/operators/rope/rope_utils.py b/iron/operators/rope/rope_utils.py similarity index 100% rename from operators/rope/rope_utils.py rename to iron/operators/rope/rope_utils.py diff --git a/operators/rope/test.py b/iron/operators/rope/test.py similarity index 94% rename from operators/rope/test.py rename to iron/operators/rope/test.py index 7399f78a..095a8cc3 100755 --- a/operators/rope/test.py +++ b/iron/operators/rope/test.py @@ -5,12 +5,11 @@ import sys from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) import pytest -from operators.rope.op import AIERope -from operators.rope.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.rope.op import AIERope +from iron.operators.rope.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/sigmoid/design.py b/iron/operators/sigmoid/design.py similarity index 100% rename from operators/sigmoid/design.py rename to iron/operators/sigmoid/design.py diff --git a/operators/sigmoid/op.py b/iron/operators/sigmoid/op.py similarity index 99% rename from operators/sigmoid/op.py rename to iron/operators/sigmoid/op.py index 33702eb4..a24d051d 100644 --- a/operators/sigmoid/op.py +++ b/iron/operators/sigmoid/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/sigmoid/reference.py b/iron/operators/sigmoid/reference.py similarity index 90% rename from operators/sigmoid/reference.py rename to iron/operators/sigmoid/reference.py index 05bf648c..5eba46f5 100644 --- a/operators/sigmoid/reference.py +++ b/iron/operators/sigmoid/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(input_length: int, dtype="bf16", seed=42): diff --git a/operators/sigmoid/test.py b/iron/operators/sigmoid/test.py similarity index 91% rename from operators/sigmoid/test.py rename to iron/operators/sigmoid/test.py index 21d5a8bb..1dc5b99d 100755 --- a/operators/sigmoid/test.py +++ b/iron/operators/sigmoid/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.sigmoid.op import AIESigmoid -from operators.sigmoid.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.sigmoid.op import AIESigmoid +from iron.operators.sigmoid.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/silu/design.py b/iron/operators/silu/design.py similarity index 100% rename from operators/silu/design.py rename to iron/operators/silu/design.py diff --git a/operators/silu/op.py b/iron/operators/silu/op.py similarity index 99% rename from operators/silu/op.py rename to iron/operators/silu/op.py index c8a89d05..3583868c 100644 --- a/operators/silu/op.py +++ b/iron/operators/silu/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/silu/reference.py b/iron/operators/silu/reference.py similarity index 90% rename from operators/silu/reference.py rename to iron/operators/silu/reference.py index 7bfec312..26ebeef3 100644 --- a/operators/silu/reference.py +++ b/iron/operators/silu/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(input_length: int, dtype="bf16", seed=42): diff --git a/operators/silu/test.py b/iron/operators/silu/test.py similarity index 92% rename from operators/silu/test.py rename to iron/operators/silu/test.py index e9ac0fb4..4dc52ba0 100755 --- a/operators/silu/test.py +++ b/iron/operators/silu/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.silu.op import AIESiLU -from operators.silu.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.silu.op import AIESiLU +from iron.operators.silu.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/softmax/design.py b/iron/operators/softmax/design.py similarity index 99% rename from operators/softmax/design.py rename to iron/operators/softmax/design.py index cdac226c..981312be 100644 --- a/operators/softmax/design.py +++ b/iron/operators/softmax/design.py @@ -11,7 +11,7 @@ from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern -from aie.helpers.dialects.ext.scf import _for as range_ +from aie.helpers.dialects.scf import _for as range_ from ml_dtypes import bfloat16 diff --git a/operators/softmax/op.py b/iron/operators/softmax/op.py similarity index 99% rename from operators/softmax/op.py rename to iron/operators/softmax/op.py index 7c4cef71..106f0415 100644 --- a/operators/softmax/op.py +++ b/iron/operators/softmax/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/softmax/reference.py b/iron/operators/softmax/reference.py similarity index 92% rename from operators/softmax/reference.py rename to iron/operators/softmax/reference.py index 0ae9ca31..d0322776 100644 --- a/operators/softmax/reference.py +++ b/iron/operators/softmax/reference.py @@ -4,7 +4,7 @@ """Golden reference generator for softmax operator.""" import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(rows: int, cols: int, dtype="bf16", seed=42): diff --git a/operators/softmax/test.py b/iron/operators/softmax/test.py similarity index 93% rename from operators/softmax/test.py rename to iron/operators/softmax/test.py index dd2c297e..1ad613d9 100755 --- a/operators/softmax/test.py +++ b/iron/operators/softmax/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.softmax.op import AIESoftmax -from operators.softmax.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.softmax.op import AIESoftmax +from iron.operators.softmax.reference import generate_golden_reference +from iron.common.test_utils import run_test def get_optimal_columns_channels(input_length, tile_size): diff --git a/operators/swiglu_decode/op.py b/iron/operators/swiglu_decode/op.py similarity index 96% rename from operators/swiglu_decode/op.py rename to iron/operators/swiglu_decode/op.py index 7ddf8d6d..869493c9 100644 --- a/operators/swiglu_decode/op.py +++ b/iron/operators/swiglu_decode/op.py @@ -6,7 +6,7 @@ import numpy as np from ml_dtypes import bfloat16 -from operators.common import ( +from iron.common import ( AIEOperatorBase, XclbinArtifact, InstsBinArtifact, @@ -15,10 +15,10 @@ SourceArtifact, PythonGeneratedMLIRArtifact, ) -from operators.gemv.op import AIEGEMV -from operators.silu.op import AIESiLU -from operators.elementwise_mul.op import AIEElementwiseMul -from operators.common.utils import torch_to_numpy +from iron.operators.gemv.op import AIEGEMV +from iron.operators.silu.op import AIESiLU +from iron.operators.elementwise_mul.op import AIEElementwiseMul +from iron.common.utils import torch_to_numpy class AIESwiGLUDecode(AIEOperatorBase): diff --git a/operators/swiglu_decode/reference.py b/iron/operators/swiglu_decode/reference.py similarity index 100% rename from operators/swiglu_decode/reference.py rename to iron/operators/swiglu_decode/reference.py diff --git a/operators/swiglu_decode/test.py b/iron/operators/swiglu_decode/test.py similarity index 92% rename from operators/swiglu_decode/test.py rename to iron/operators/swiglu_decode/test.py index 9cb372c1..11b35fa2 100755 --- a/operators/swiglu_decode/test.py +++ b/iron/operators/swiglu_decode/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.swiglu_decode.op import AIESwiGLUDecode -from operators.swiglu_decode.reference import generate_golden_reference -from operators.common.test_utils import run_test, verify_buffer +from iron.operators.swiglu_decode.op import AIESwiGLUDecode +from iron.operators.swiglu_decode.reference import generate_golden_reference +from iron.common.test_utils import run_test, verify_buffer def generate_test_params(extensive=False): diff --git a/operators/swiglu_prefill/op.py b/iron/operators/swiglu_prefill/op.py similarity index 97% rename from operators/swiglu_prefill/op.py rename to iron/operators/swiglu_prefill/op.py index 6a0d0c2b..2b2aa341 100644 --- a/operators/swiglu_prefill/op.py +++ b/iron/operators/swiglu_prefill/op.py @@ -6,7 +6,7 @@ import numpy as np from ml_dtypes import bfloat16 -from operators.common import ( +from iron.common import ( AIEOperatorBase, XclbinArtifact, InstsBinArtifact, @@ -15,10 +15,10 @@ SourceArtifact, PythonGeneratedMLIRArtifact, ) -from operators.gemm.op import AIEGEMM -from operators.silu.op import AIESiLU -from operators.elementwise_mul.op import AIEElementwiseMul -from operators.common.utils import torch_to_numpy +from iron.operators.gemm.op import AIEGEMM +from iron.operators.silu.op import AIESiLU +from iron.operators.elementwise_mul.op import AIEElementwiseMul +from iron.common.utils import torch_to_numpy class AIESwiGLUPrefill(AIEOperatorBase): diff --git a/operators/swiglu_prefill/test.py b/iron/operators/swiglu_prefill/test.py similarity index 91% rename from operators/swiglu_prefill/test.py rename to iron/operators/swiglu_prefill/test.py index 872c9442..75510d63 100755 --- a/operators/swiglu_prefill/test.py +++ b/iron/operators/swiglu_prefill/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.swiglu_prefill.op import AIESwiGLUPrefill -from operators.swiglu_decode.reference import generate_golden_reference -from operators.common.test_utils import run_test, verify_buffer +from iron.operators.swiglu_prefill.op import AIESwiGLUPrefill +from iron.operators.swiglu_decode.reference import generate_golden_reference +from iron.common.test_utils import run_test, verify_buffer def generate_test_params(extensive=False): diff --git a/operators/tanh/design.py b/iron/operators/tanh/design.py similarity index 100% rename from operators/tanh/design.py rename to iron/operators/tanh/design.py diff --git a/operators/tanh/op.py b/iron/operators/tanh/op.py similarity index 99% rename from operators/tanh/op.py rename to iron/operators/tanh/op.py index 8133ae17..5bccad5e 100644 --- a/operators/tanh/op.py +++ b/iron/operators/tanh/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/tanh/reference.py b/iron/operators/tanh/reference.py similarity index 90% rename from operators/tanh/reference.py rename to iron/operators/tanh/reference.py index b38c6fcc..a87a85c0 100644 --- a/operators/tanh/reference.py +++ b/iron/operators/tanh/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(input_length: int, dtype="bf16", seed=42): diff --git a/operators/tanh/test.py b/iron/operators/tanh/test.py similarity index 92% rename from operators/tanh/test.py rename to iron/operators/tanh/test.py index 548e6371..f9986bb3 100755 --- a/operators/tanh/test.py +++ b/iron/operators/tanh/test.py @@ -6,11 +6,10 @@ import pytest from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) -from operators.tanh.op import AIETanh -from operators.tanh.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.tanh.op import AIETanh +from iron.operators.tanh.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/transpose/design.py b/iron/operators/transpose/design.py similarity index 100% rename from operators/transpose/design.py rename to iron/operators/transpose/design.py diff --git a/operators/transpose/op.py b/iron/operators/transpose/op.py similarity index 99% rename from operators/transpose/op.py rename to iron/operators/transpose/op.py index b71e14f8..7963fd06 100644 --- a/operators/transpose/op.py +++ b/iron/operators/transpose/op.py @@ -6,7 +6,7 @@ from ml_dtypes import bfloat16 from pathlib import Path -from operators.common import ( +from iron.common import ( AIEOperatorBase, AIEOperatorConstraintError, XclbinArtifact, diff --git a/operators/transpose/reference.py b/iron/operators/transpose/reference.py similarity index 90% rename from operators/transpose/reference.py rename to iron/operators/transpose/reference.py index 1c250bdf..96c8f5a1 100644 --- a/operators/transpose/reference.py +++ b/iron/operators/transpose/reference.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import torch -from operators.common.utils import torch_dtype_map +from iron.common.utils import torch_dtype_map def generate_golden_reference(rows: int, cols: int, dtype="bf16", seed=42): diff --git a/operators/transpose/test.py b/iron/operators/transpose/test.py similarity index 92% rename from operators/transpose/test.py rename to iron/operators/transpose/test.py index 740590da..8f0d9981 100755 --- a/operators/transpose/test.py +++ b/iron/operators/transpose/test.py @@ -5,12 +5,11 @@ import sys from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) import pytest -from operators.transpose.op import AIETranspose -from operators.transpose.reference import generate_golden_reference -from operators.common.test_utils import run_test +from iron.operators.transpose.op import AIETranspose +from iron.operators.transpose.reference import generate_golden_reference +from iron.common.test_utils import run_test def generate_test_params(extensive=False): diff --git a/operators/common/aie_device_manager.py b/operators/common/aie_device_manager.py deleted file mode 100644 index a06d957a..00000000 --- a/operators/common/aie_device_manager.py +++ /dev/null @@ -1,100 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -""" -Global AIE Device Manager for resource sharing and cleanup -""" - -import logging -import os -import sys -from pathlib import Path -from typing import Dict, Optional, Any -import pyxrt -from aie.iron.hostruntime.config import detect_npu_device -from aie.iron.device import NPU1, NPU2 - - -class AIEDeviceManager: - """Singleton manager for AIE XRT resources""" - - _instance = None - - def __new__(cls): - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - - def __init__(self): - self.device = pyxrt.device(0) - self.device_type = detect_npu_device() - self.contexts = {} # xclbin_path -> (context, xclbin) - self.kernels = {} # (xclbin_path, kernel_name) -> kernel - - def get_context_and_kernel( - self, xclbin_path: str, kernel_name: str | None = None - ) -> (pyxrt.hw_context, pyxrt.kernel): - """Get or create hardware context and kernel for xclbin""" - # Check if we already have a context for this xclbin - - if xclbin_path not in self.contexts: - xclbin = pyxrt.xclbin(xclbin_path) - self.device.register_xclbin(xclbin) - xclbin_uuid = xclbin.get_uuid() - context = pyxrt.hw_context(self.device, xclbin_uuid) - self.contexts[xclbin_path] = (context, xclbin) - logging.debug(f"Created new context for {Path(xclbin_path).name}") - else: - context, xclbin = self.contexts[xclbin_path] - logging.debug(f"Reusing context for {Path(xclbin_path).name}") - - # Get kernel name if not provided - if kernel_name is None: - kernels = xclbin.get_kernels() - if not kernels: - raise RuntimeError("No kernels found in xclbin") - kernel_name = kernels[0].get_name() - - # Check if we already have the kernel - kernel_key = (xclbin_path, kernel_name) - if kernel_key not in self.kernels: - self.kernels[kernel_key] = pyxrt.kernel(context, kernel_name) - logging.debug( - f"Created new kernel {kernel_name} from xclbin {Path(xclbin_path).name}" - ) - else: - logging.debug( - f"Reusing kernel: {kernel_name} from xclbin {Path(xclbin_path).name}" - ) - - return context, self.kernels[kernel_key] - - def device_str(self) -> str: - return self.device_type.resolve().name - - def cleanup(self): - """Clean up all XRT resources""" - self.kernels.clear() - - # Clear contexts - for xclbin_path, (context, xclbin) in self.contexts.items(): - try: - del context - except: - pass - self.contexts.clear() - - # Clear device - if self.device is not None: - try: - del self.device - except: - pass - self.device = None - - logging.debug("Cleaned up AIE device manager") - - def reset(self): - """Reset the device manager (for debugging)""" - self.cleanup() - AIEDeviceManager._instance = None diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..7c92f047 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "iron" +version = "0.1.0" +description = "IRON: Unlocking the Full Potential of NPUs" +readme = "README.md" +requires-python = ">=3.10" +license = {text = "Apache-2.0"} +dependencies = [ + "numpy", + "torch", + "ml_dtypes", +] + +[tool.setuptools.packages.find] +include = ["iron*"] diff --git a/pytest.ini b/pytest.ini index 8e1d69d8..a0c80a27 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 [pytest] -testpaths = operators applications +testpaths = iron/operators iron/applications python_files = test.py python_classes = Test* python_functions = test_* diff --git a/requirements.txt b/requirements.txt index b0541ad2..0072a859 100755 --- a/requirements.txt +++ b/requirements.txt @@ -6,11 +6,11 @@ # version of torch (don't need CUDA), so we give this index precedence over the # main PyPI. These indices are consulted in order of precedence by pip. --index-url https://download.pytorch.org/whl/cpu ---extra-index-url https://github.com/Xilinx/mlir-aie/releases/expanded_assets/v1.1.4 +--extra-index-url https://github.com/Xilinx/mlir-aie/releases/expanded_assets/v1.2.0 --extra-index-url https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly --extra-index-url https://pypi.org/simple -mlir_aie==1.1.4 +mlir_aie==v1.2.0 llvm-aie black @@ -19,9 +19,5 @@ torch pytest pytest-xdist -# MLIR-extras (Python bindings/helpers for MLIR) -# For installing this, the following environment variables must be set: -# MLIR_PYTHON_EXTRAS_SET_VERSION="0.0.8.3" -# HOST_MLIR_PYTHON_PACKAGE_PREFIX="aie" -git+https://github.com/erwei-xilinx/mlir-python-extras@a801853ac0eef50a0f2779cfbbd7dabc931806ee --f https://github.com/llvm/eudsl/releases/expanded_assets/latest +# Install the local python code as the package "iron" +-e .