Skip to content

Commit c2197b2

Browse files
committed
Various fixes to get planner to work
1 parent bfe6a81 commit c2197b2

File tree

7 files changed

+33
-22
lines changed

7 files changed

+33
-22
lines changed

scripts/pack_docker.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ set +e
1212

1313
>&2 echo -e "${BGreen}Building docker image${NC}"
1414
set -e
15-
>&2 sudo DOCKER_BUILDKIT=1 docker build -t skyplane --platform linux/x86_64 .
15+
>&2 DOCKER_BUILDKIT=1 docker build -t skyplane --platform linux/x86_64 .
1616
set +e
1717

1818
DOCKER_URL="ghcr.io/$1/skyplane:local-$(openssl rand -hex 16)"
1919
>&2 echo -e "${BGreen}Uploading docker image to $DOCKER_URL${NC}"
2020
set -e
21-
>&2 sudo docker tag skyplane $DOCKER_URL
22-
>&2 sudo docker push $DOCKER_URL
23-
>&2 sudo docker system prune -f
21+
>&2 docker tag skyplane $DOCKER_URL
22+
>&2 docker push $DOCKER_URL
23+
>&2 docker system prune -f
2424
set +e
2525

2626
>&2 echo -e "${BGreen}SKYPLANE_DOCKER_IMAGE=$DOCKER_URL${NC}"

skyplane/api/dataplane.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def _start_gateway(
112112

113113
# write gateway programs
114114
gateway_program_filename = Path(f"{gateway_log_dir}/gateway_program_{gateway_node.gateway_id}.json")
115+
print(gateway_program_filename)
115116
with open(gateway_program_filename, "w") as f:
116117
f.write(gateway_node.gateway_program.to_json())
117118

@@ -233,6 +234,8 @@ def provision(
233234
def copy_gateway_logs(self):
234235
# copy logs from all gateways in parallel
235236
def copy_log(instance):
237+
out_file = f"{self.transfer_dir}/gateway_{instance.uuid()}.stdout"
238+
err_file = f"{self.transfer_dir}/gateway_{instance.uuid()}.stderr"
236239
typer.secho(f"Downloading log: {self.transfer_dir}/gateway_{instance.uuid()}.stdout", fg="bright_black")
237240
typer.secho(f"Downloading log: {self.transfer_dir}/gateway_{instance.uuid()}.stderr", fg="bright_black")
238241

skyplane/api/pipeline.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def __init__(
5353
# self.cloud_regions = cloud_regions
5454
# TODO: set max instances with VM CPU limits and/or config
5555
self.max_instances = max_instances
56-
self.n_connections = n_connections
56+
self.n_connections = num_connections
5757
self.provisioner = provisioner
5858
self.transfer_config = transfer_config
5959
self.http_pool = urllib3.PoolManager(retries=urllib3.Retry(total=3))
@@ -71,19 +71,20 @@ def __init__(
7171
if self.planning_algorithm == "direct":
7272
# TODO: should find some ways to merge direct / Ndirect
7373
#self.planner = UnicastDirectPlanner(self.max_instances, num_connections)
74-
self.planner = MulticastDirectPlanner(self.max_instances, self.n_connections, self.transfer_config)
74+
#self.planner = MulticastDirectPlanner(self.max_instances, self.n_connections, self.transfer_config)
75+
self.planner = MulticastDirectPlanner(self.max_instances, self.n_connections)
7576
#elif self.planning_algorithm == "Ndirect":
7677
# self.planner = MulticastDirectPlanner(self.max_instances, num_connections)
7778
elif self.planning_algorithm == "MDST":
7879
self.planner = MulticastMDSTPlanner(self.max_instances, num_connections)
7980
elif self.planning_algorithm == "ILP":
80-
self.planning_algorithm = MulticastILPPlanner(self.max_instances, num_connections)
81+
self.planner = MulticastILPPlanner(self.max_instances, num_connections)
8182
elif self.planning_algorithm == "UnicastILP":
82-
self.planning_algorithm = UnicastILPPlanner(self.max_instances, num_connections)
83-
elif self.planning_algorithm == "src_one_sided":
84-
self.planner = DirectPlannerSourceOneSided(self.max_instances, self.n_connections, self.transfer_config)
85-
elif self.planning_algorithm == "dst_one_sided":
86-
self.planner = DirectPlannerDestOneSided(self.max_instances, self.n_connections, self.transfer_config)
83+
self.planner = UnicastILPPlanner(self.max_instances, num_connections)
84+
#elif self.planning_algorithm == "src_one_sided":
85+
# self.planner = DirectPlannerSourceOneSided(self.max_instances, self.n_connections, self.transfer_config)
86+
#elif self.planning_algorithm == "dst_one_sided":
87+
# self.planner = DirectPlannerDestOneSided(self.max_instances, self.n_connections, self.transfer_config)
8788
else:
8889
raise ValueError(f"No such planning algorithm {planning_algorithm}")
8990

skyplane/api/transfer_job.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ def _run_multipart_chunk_thread(
133133
for _ in range(num_chunks):
134134
file_size_bytes = min(chunk_size_bytes, src_object.size - offset)
135135
assert file_size_bytes > 0, f"file size <= 0 {file_size_bytes}"
136+
print("partition", part_num, self.num_partitions)
136137
chunk = Chunk(
137138
src_key=src_object.key,
138139
dest_key=dest_key, # dest_object.key, # TODO: upload basename (no prefix)
@@ -350,6 +351,7 @@ def chunk(self, transfer_pair_generator: Generator[TransferPair, None, None]) ->
350351
multipart_chunk_threads.append(t)
351352

352353
# begin chunking loop
354+
part_num = 0
353355
for transfer_pair in transfer_pair_generator:
354356
# print("transfer_pair", transfer_pair.src_obj.key, transfer_pair.dst_objs)
355357
src_obj = transfer_pair.src_obj
@@ -365,9 +367,11 @@ def chunk(self, transfer_pair_generator: Generator[TransferPair, None, None]) ->
365367
dest_key=transfer_pair.dst_key, # TODO: get rid of dest_key, and have write object have info on prefix (or have a map here)
366368
chunk_id=uuid.uuid4().hex,
367369
chunk_length_bytes=transfer_pair.src_obj.size,
368-
partition_id=str(0), # TODO: fix this to distribute across multiple partitions
370+
#partition_id=str(0), # TODO: fix this to distribute across multiple partitions
371+
partition_id=str(part_num % self.num_partitions),
369372
)
370373
)
374+
part_num += 1
371375

372376
if self.transfer_config.multipart_enabled:
373377
# drain multipart chunk queue and yield with updated chunk IDs
@@ -688,9 +692,10 @@ def chunk_request(server, chunk_batch, n_added):
688692
# send chunk requests to source gateways
689693
chunk_batch = [cr.chunk for cr in batch if cr.chunk is not None]
690694
# TODO: allow multiple partition ids per chunk
691-
for chunk in chunk_batch: # assign job UUID as partition ID
692-
chunk.partition_id = self.uuid
695+
#for chunk in chunk_batch: # assign job UUID as partition ID
696+
# chunk.partition_id = self.uuid
693697
min_idx = queue_size.index(min(queue_size))
698+
print([b.chunk.partition_id for b in batch if b.chunk])
694699
n_added = 0
695700
while n_added < len(chunk_batch):
696701
# TODO: should update every source instance queue size

skyplane/api/usage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import requests
1313
from rich import print as rprint
14-
from typing import Optional, Dict
14+
from typing import Optional, Dict, List
1515

1616
import skyplane
1717
from skyplane.utils.definitions import tmp_log_dir

skyplane/gateway/gateway_program.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,11 @@ def to_dict(self):
150150
for p in program_all:
151151
if p["value"] == program: # equivalent partition exists
152152
for pid in partition_id:
153-
p["partitions"].append(pid)
153+
p["partitions"].append(str(pid))
154154
exists = True
155155
break
156156
if not exists:
157-
program_all.append({"value": program, "partitions": partition_id})
157+
program_all.append({"value": program, "partitions": str(partition_id)})
158158

159159
return program_all
160160

skyplane/planner/planner.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def verify_job_src_dsts(self, jobs: List[TransferJob], multicast=False) -> Tuple
6464
return src_region_tag, dst_region_tags
6565

6666
@functools.lru_cache(maxsize=None)
67-
def make_nx_graph(self, tp_grid_path: Optional[Path] = files("skyplane.data") / "throughput.csv") -> nx.DiGraph:
67+
def make_nx_graph(self, tp_grid_path: Optional[Path] = files("data") / "throughput.csv") -> nx.DiGraph:
6868
# create throughput / cost graph for all regions for planner
6969
G = nx.DiGraph()
7070
throughput = pd.read_csv(tp_grid_path)
@@ -101,6 +101,7 @@ def add_src_or_overlay_operator(
101101
:param bucket_info: tuple of (bucket_name, bucket_region) for object store
102102
:param dst_op: if None, then this is either the source node or a overlay node; otherwise, this is the destination overlay node
103103
"""
104+
g = solution_graph
104105
# partition_ids are set of ids that follow the same path from the out edges of the region
105106
any_id = partition_ids[0] - partition_offset
106107
next_regions = set([edge[1] for edge in g.out_edges(region, data=True) if str(any_id) in edge[-1]["partitions"]])
@@ -127,7 +128,7 @@ def add_src_or_overlay_operator(
127128
receive_op = dst_op
128129

129130
# find set of regions to send to for all partitions in partition_ids
130-
g = solution_graph
131+
131132
region_to_id_map = {}
132133
for next_region in next_regions:
133134
region_to_id_map[next_region] = []
@@ -268,7 +269,7 @@ def logical_plan_to_topology_plan(self, jobs: List[TransferJob], solution_graph:
268269
partitions,
269270
partition_offset=i,
270271
plan=plan,
271-
obj_store=(src_bucket, node),
272+
bucket_info=(src_bucket, node),
272273
)
273274

274275
# dst receive data, write to object store, forward data if needed
@@ -287,7 +288,7 @@ def logical_plan_to_topology_plan(self, jobs: List[TransferJob], solution_graph:
287288
# overlay node only forward data
288289
else:
289290
self.add_src_or_overlay_operator(
290-
solution_graph, node_gateway_program, node, partitions, partition_offset=i, plan=plan, obj_store=None
291+
solution_graph, node_gateway_program, node, partitions, partition_offset=i, plan=plan, bucket_info=None
291292
)
292293
region_to_gateway_program[node] = node_gateway_program
293294
assert len(region_to_gateway_program) > 0, f"Empty gateway program {node}"
@@ -381,6 +382,7 @@ def logical_plan(
381382
filter_edge: bool = False,
382383
solver_verbose: bool = False,
383384
save_lp_path: Optional[str] = None,
385+
solver: Optional[str] = None,
384386
) -> nx.DiGraph:
385387
import cvxpy as cp
386388

0 commit comments

Comments
 (0)