doc: add multidevice algorithm example

BillHuang2001 · BillHuang2001 · commit 092db70a740f · 2024-11-19T15:09:26.000+08:00
diff --git a/docs/source/guide/experimental/multidevice_algorithm.ipynb b/docs/source/guide/experimental/multidevice_algorithm.ipynb
@@ -0,0 +1,272 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Optional\n",
+    "\n",
+    "import jax\n",
+    "import jax.numpy as jnp\n",
+    "\n",
+    "from evox import Algorithm, dataclass, pytree_field, problems, workflows, monitors, use_state\n",
+    "from evox.core.distributed import ShardingType\n",
+    "from evox.utils import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example, we consider the following simple setup:\n",
+    "```\n",
+    "    Node1\n",
+    "      |\n",
+    " +----+----+\n",
+    " |    |    |\n",
+    "GPU  GPU  GPU\n",
+    "```\n",
+    "Where we only have one node with multiple GPUs. The communication between the GPUs is done through the PCIe or NVLink.\n",
+    "When running in a distributed setup, we need to make decisions on how to place the data on these GPUs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The only changes:\n",
+    "# Add the sharding metadata\n",
+    "@dataclass\n",
+    "class SpecialPSOState:\n",
+    "    population: jax.Array = pytree_field(sharding=ShardingType.SHARED_FIRST_DIM)\n",
+    "    velocity: jax.Array = pytree_field(sharding=ShardingType.SHARED_FIRST_DIM)\n",
+    "    fitness: jax.Array = pytree_field(sharding=ShardingType.SHARED_FIRST_DIM)\n",
+    "    local_best_location: jax.Array = pytree_field(sharding=ShardingType.SHARED_FIRST_DIM)\n",
+    "    local_best_fitness: jax.Array = pytree_field(sharding=ShardingType.SHARED_FIRST_DIM)\n",
+    "    global_best_location: jax.Array\n",
+    "    global_best_fitness: jax.Array\n",
+    "    key: jax.random.PRNGKey\n",
+    "\n",
+    "\n",
+    "@dataclass\n",
+    "class PSO(Algorithm):\n",
+    "    dim: jax.Array = pytree_field(static=True, init=False)\n",
+    "    lb: jax.Array\n",
+    "    ub: jax.Array\n",
+    "    pop_size: jax.Array = pytree_field(static=True)\n",
+    "    w: jax.Array = pytree_field(default=0.6)\n",
+    "    phi_p: jax.Array = pytree_field(default=2.5)\n",
+    "    phi_g: jax.Array = pytree_field(default=0.8)\n",
+    "    mean: Optional[jax.Array] = pytree_field(default=None)\n",
+    "    stdev: Optional[jax.Array] = pytree_field(default=None)\n",
+    "    bound_method: str = pytree_field(static=True, default=\"clip\")\n",
+    "\n",
+    "    def __post_init__(self):\n",
+    "        self.set_frozen_attr(\"dim\", self.lb.shape[0])\n",
+    "\n",
+    "    def setup(self, key):\n",
+    "        state_key, init_pop_key, init_v_key = jax.random.split(key, 3)\n",
+    "        if self.mean is not None and self.stdev is not None:\n",
+    "            population = self.stdev * jax.random.normal(\n",
+    "                init_pop_key, shape=(self.pop_size, self.dim)\n",
+    "            )\n",
+    "            population = jnp.clip(population, self.lb, self.ub)\n",
+    "            velocity = self.stdev * jax.random.normal(\n",
+    "                init_v_key, shape=(self.pop_size, self.dim)\n",
+    "            )\n",
+    "        else:\n",
+    "            length = self.ub - self.lb\n",
+    "            population = jax.random.uniform(\n",
+    "                init_pop_key, shape=(self.pop_size, self.dim)\n",
+    "            )\n",
+    "            population = population * length + self.lb\n",
+    "            velocity = jax.random.uniform(init_v_key, shape=(self.pop_size, self.dim))\n",
+    "            velocity = velocity * length * 2 - length\n",
+    "\n",
+    "        return SpecialPSOState(\n",
+    "            population=population,\n",
+    "            velocity=velocity,\n",
+    "            fitness=jnp.full((self.pop_size,), jnp.inf),\n",
+    "            local_best_location=population,\n",
+    "            local_best_fitness=jnp.full((self.pop_size,), jnp.inf),\n",
+    "            global_best_location=population[0],\n",
+    "            global_best_fitness=jnp.array([jnp.inf]),\n",
+    "            key=state_key,\n",
+    "        )\n",
+    "\n",
+    "    def ask(self, state):\n",
+    "        return state.population, state\n",
+    "\n",
+    "    def tell(self, state, fitness):\n",
+    "        key, rg_key, rp_key = jax.random.split(state.key, 3)\n",
+    "\n",
+    "        rg = jax.random.uniform(rg_key, shape=(self.pop_size, self.dim))\n",
+    "        rp = jax.random.uniform(rp_key, shape=(self.pop_size, self.dim))\n",
+    "\n",
+    "        compare = state.local_best_fitness > fitness\n",
+    "        local_best_location = jnp.where(\n",
+    "            compare[:, jnp.newaxis], state.population, state.local_best_location\n",
+    "        )\n",
+    "        local_best_fitness = jnp.minimum(state.local_best_fitness, fitness)\n",
+    "\n",
+    "        global_best_location, global_best_fitness = min_by(\n",
+    "            [state.global_best_location[jnp.newaxis, :], state.population],\n",
+    "            [state.global_best_fitness, fitness],\n",
+    "        )\n",
+    "\n",
+    "        global_best_fitness = jnp.atleast_1d(global_best_fitness)\n",
+    "\n",
+    "        velocity = (\n",
+    "            self.w * state.velocity\n",
+    "            + self.phi_p * rp * (local_best_location - state.population)\n",
+    "            + self.phi_g * rg * (global_best_location - state.population)\n",
+    "        )\n",
+    "        population = state.population + velocity\n",
+    "\n",
+    "        if self.bound_method == \"clip\":\n",
+    "            population = jnp.clip(population, self.lb, self.ub)\n",
+    "            velocity = jnp.clip(velocity, self.lb, self.ub)\n",
+    "        elif self.bound_method == \"reflect\":\n",
+    "            lower_bound_violation = population < self.lb\n",
+    "            upper_bound_violation = population > self.ub\n",
+    "\n",
+    "            population = jnp.where(\n",
+    "                lower_bound_violation, 2 * self.lb - population, population\n",
+    "            )\n",
+    "            population = jnp.where(\n",
+    "                upper_bound_violation, 2 * self.ub - population, population\n",
+    "            )\n",
+    "            velocity = jnp.where(\n",
+    "                lower_bound_violation | upper_bound_violation, -velocity, velocity\n",
+    "            )\n",
+    "            # enforce the bounds in case the reflected particles are still out of bounds\n",
+    "            population = jnp.clip(population, self.lb, self.ub)\n",
+    "            velocity = jnp.clip(velocity, self.lb, self.ub)\n",
+    "\n",
+    "        return state.replace(\n",
+    "            population=population,\n",
+    "            velocity=velocity,\n",
+    "            local_best_location=local_best_location,\n",
+    "            local_best_fitness=local_best_fitness,\n",
+    "            global_best_location=global_best_location,\n",
+    "            global_best_fitness=global_best_fitness,\n",
+    "            key=key,\n",
+    "        )\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pso = PSO(\n",
+    "    lb=jnp.full(shape=(2,), fill_value=-32),\n",
+    "    ub=jnp.full(shape=(2,), fill_value=32),\n",
+    "    pop_size=100,\n",
+    ")\n",
+    "ackley = problems.numerical.Ackley()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "monitor = monitors.EvalMonitor()\n",
+    "workflow = workflows.StdWorkflow(\n",
+    "    pso,\n",
+    "    ackley,\n",
+    "    monitors=[monitor],\n",
+    ")\n",
+    "key = jax.random.PRNGKey(42)\n",
+    "state = workflow.init(key)\n",
+    "state = workflow.enable_multi_devices(state)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "State(StdWorkflowState(generation=NamedSharding(mesh=Mesh('POP': 2), spec=PartitionSpec(), memory_kind=device), first_step=True), {'algorithm': State(SpecialPSOState(population=NamedSharding(mesh=Mesh('POP': 2), spec=PartitionSpec('POP',), memory_kind=device), velocity=NamedSharding(mesh=Mesh('POP': 2), spec=PartitionSpec('POP',), memory_kind=device), fitness=NamedSharding(mesh=Mesh('POP': 2), spec=PartitionSpec('POP',), memory_kind=device), local_best_location=NamedSharding(mesh=Mesh('POP': 2), spec=PartitionSpec('POP',), memory_kind=device), local_best_fitness=NamedSharding(mesh=Mesh('POP': 2), spec=PartitionSpec('POP',), memory_kind=device), global_best_location=NamedSharding(mesh=Mesh('POP': 2), spec=PartitionSpec(), memory_kind=device), global_best_fitness=NamedSharding(mesh=Mesh('POP': 2), spec=PartitionSpec(), memory_kind=device), key=NamedSharding(mesh=Mesh('POP': 2), spec=PartitionSpec(), memory_kind=device)), {}),'monitors0': State(EvalMonitorState(first_step=True, latest_solution=None, latest_fitness=None, topk_solutions=None, topk_fitness=None), {}),'problem': State({}, {})})"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# check if the state is correctly sharded\n",
+    "jax.tree.map(lambda x: x.sharding, state)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# run the workflow for 50 steps\n",
+    "for i in range(50):\n",
+    "    state = workflow.step(state)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "best_solution, _state = use_state(monitor.get_best_solution)(state)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 0.0002041  -0.00019218]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(best_solution)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}