Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions MarcDecoderWin/lib/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@

def get_filename(params: Dict[str, Any]) -> str:
if params["save_best_only"]:
filename = "weights.keras"
filename = "weights.h5"
return filename

filename = f"weights-{{epoch}}-{{{params['monitor']}}}.keras"
filename = f"weights-{{epoch}}-{{{params['monitor']}}}.h5"
return filename


Expand All @@ -26,6 +26,7 @@ def get_callbacks(config: Config) -> Tuple[Callback]:
checkpoint_filename = get_filename(params["checkpoint"])
model_checkpoint = ModelCheckpoint(
filepath=str(config.checkpoint_dir / checkpoint_filename),
save_weights_only=True,
**params["checkpoint"],
)

Expand Down
47 changes: 36 additions & 11 deletions MarcDecoderWin/lib/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import itertools
import random
import numpy as np

from qrennd.configs import Config
from qrennd.layouts import Layout
Expand All @@ -12,8 +12,34 @@
from .sequences import Sequence


def _pad_and_concat(arrays: tuple[np.ndarray, ...]) -> np.ndarray:
"""Pad arrays along non-batch dimensions before concatenation.

Each array is assumed to have the same rank and its first dimension is the
batch dimension. The remaining dimensions are padded with zeros so that all
arrays share a common shape, allowing them to be concatenated along the
batch axis without shape mismatches.
"""

if not arrays:
return np.array([])

ndims = arrays[0].ndim
# Compute the target size for every non-batch dimension
max_shape = [max(arr.shape[i] for arr in arrays) for i in range(1, ndims)]

padded = []
for arr in arrays:
pad_width = [(0, 0)]
pad_width += [
(0, max_dim - arr.shape[i + 1]) for i, max_dim in enumerate(max_shape)
]
padded.append(np.pad(arr, pad_width, mode="constant"))

return np.concatenate(padded, axis=0)


def load_datasets(config: Config, layout: Layout, dataset_name: str):
batch_size = config.train["batch_size"]
experiment_name = config.dataset["folder_format_name"]

input_names = config.dataset["input_names"]
Expand All @@ -35,15 +61,14 @@ def load_datasets(config: Config, layout: Layout, dataset_name: str):
to_inputs(dataset, proj_matrix, input_names=input_names) for dataset in datasets
]

# Process for keras.model input
input = [to_model_input(*arrs, data_type=data_type) for arrs in processed]
#
# sequences = (Sequence(*tensors, batch_size) for tensors in input)
# sequences = ((b for b in sequence) for sequence in sequences)
# sequences_flattened = itertools.chain.from_iterable(sequences)
# sequences_flattened = list(sequences_flattened)

return input
# Process for keras.model input and concatenate for tf.data
model_inputs = [to_model_input(*arrs, data_type=data_type) for arrs in processed]
rec_inputs, eval_inputs, log_errors = zip(*model_inputs)
rec_inputs = _pad_and_concat(rec_inputs)
eval_inputs = _pad_and_concat(eval_inputs)
log_errors = np.concatenate(log_errors, axis=0)

return rec_inputs, eval_inputs, log_errors


def load_datasets_backup(
Expand Down
73 changes: 38 additions & 35 deletions MarcDecoderWin/train.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import gc
import pathlib
import random
import numpy as np
Expand All @@ -13,7 +12,6 @@

from lib.util import load_datasets
from lib.callbacks import get_callbacks
from lib.sequences import Sequence
# from tensorflow.compat.v1 import ConfigProto
# from tensorflow.compat.v1 import InteractiveSession

Expand Down Expand Up @@ -77,25 +75,37 @@
train_data = load_datasets(config=config, layout=layout, dataset_name="train")
print("completed")

# this is for model.fit to know that the num_rounds coordinate is not fixed
# build tf.data pipeline
batch_size = config.train["batch_size"]
tensor1, tensor2 = train_data[0], train_data[-1]
seq1, seq2 = Sequence(*tensor1, batch_size), Sequence(*tensor2, batch_size)
first_batch, second_batch = seq1[0], seq2[0]


def infinite_gen(inputs):
while True:
random.shuffle(train_data)
sequences = (Sequence(*tensors, batch_size) for tensors in inputs)
# this is for model.fit to know that the num_rounds coordinate is not fixed
yield first_batch
yield second_batch
def make_dataset(rec_input, eval_input, labels, training=False):
"""Construct a ``tf.data.Dataset`` from numpy arrays.

for k, sequence in enumerate(sequences):
# cannot do 'yield from sequence' because it has no end!
for i in range(sequence._num_batches):
yield sequence[i]
Using ``from_tensor_slices`` removes Python overhead from the input
pipeline, enabling TensorFlow to better overlap input processing with GPU
execution. When ``training`` is ``True`` the dataset is shuffled and
repeated to provide an infinite stream of data.
"""

dataset = tf.data.Dataset.from_tensor_slices(
({"rec_input": rec_input, "eval_input": eval_input}, labels)
)
dataset = dataset.cache()
if training:
dataset = dataset.shuffle(len(labels)).repeat()
dataset = dataset.batch(batch_size)

# Prefetch to GPU if available to hide host-to-device transfer latency
gpus = tf.config.list_physical_devices("GPU")
if gpus:
dataset = dataset.apply(
tf.data.experimental.copy_to_device("/GPU:0")
).prefetch(tf.data.AUTOTUNE)
else:
dataset = dataset.prefetch(tf.data.AUTOTUNE)

return dataset


# load model
Expand Down Expand Up @@ -125,27 +135,20 @@ def infinite_gen(inputs):


# train model
train = config.dataset["train"]
val = config.dataset["val"]
batch_size = config.train["batch_size"]
train_rec, train_eval, train_labels = train_data
val_rec, val_eval, val_labels = val_data

train_ds = make_dataset(train_rec, train_eval, train_labels, training=True)
val_ds = make_dataset(val_rec, val_eval, val_labels)

history = model.fit(
infinite_gen(train_data),
validation_data=infinite_gen(val_data),
# batch_size=config.train["batch_size"],
train_ds,
validation_data=val_ds,
epochs=config.train["epochs"],
callbacks=callbacks,
# shuffle=True,
verbose=1,
steps_per_epoch=train["shots"]
* len(train["rounds"])
* len(train["states"])
// batch_size
+ 2, # +2 is for model.fit to know that the num_rounds coordinate is not fixed
validation_steps=val["shots"]
* len(val["rounds"])
* len(val["states"])
// batch_size
+ 2, # +2 is for model.fit to know that the num_rounds coordinate is not fixed
steps_per_epoch=train_rec.shape[0] // batch_size,
validation_steps=val_rec.shape[0] // batch_size,
)
model.save(config.checkpoint_dir / "final_weights.keras")
model.save_weights(config.checkpoint_dir / "final_weights.h5")