evaluate.py

import marimo

__generated_with = "0.11.2"
app = marimo.App(width="medium")


@app.cell
def _():
    import marimo as mo
    import torch
    import pathlib
    from easyfsl.methods import PrototypicalNetworks
    from torch import nn
    from pytorch_lightning import LightningDataModule
    import pandas as pd
    from torch.utils.data import Dataset
    import os

    from bacpipe.main import get_embeddings

    import pandas as pd
    from torch.utils.data import Dataset
    from sklearn.preprocessing import LabelEncoder
    import torch
    from torch.utils.data import DataLoader
    from easyfsl.samplers import TaskSampler
    from pytorch_lightning import LightningDataModule
    import pandas as pd
    import numpy as np
    import os
    return (
        DataLoader,
        Dataset,
        LabelEncoder,
        LightningDataModule,
        PrototypicalNetworks,
        TaskSampler,
        get_embeddings,
        mo,
        nn,
        np,
        os,
        pathlib,
        pd,
        torch,
    )


@app.cell
def _():
    from src.protopipeline import SimpleNN
    from src.data_utils.esc50.embeddings_esc50 import EmbeddingFileDataset, ClassicEmbeddingLoader, EmbeddingsMiniESC50DataModule
    return (
        ClassicEmbeddingLoader,
        EmbeddingFileDataset,
        EmbeddingsMiniESC50DataModule,
        SimpleNN,
    )


@app.cell
def _(pathlib):
    root_dir = pathlib.Path("/home/benjamin.cretois/data/esc50/miniESC50/")
    return (root_dir,)


@app.cell
def _(torch):
    def compute_global_prototypes(model, train_loader, device):
        model.eval()
        features_per_class = {}
        with torch.no_grad():
            for embeddings, labels in train_loader:
                embeddings = embeddings.to(device)
                labels = labels.to(device)
                feats = model(embeddings)
                for feat, label in zip(feats, labels):
                    # label is a tensor(X)
                    label = label.item() # this is X
        
                    if label not in features_per_class:
                        features_per_class[label] = []
                    features_per_class[label].append(feat.cpu())
        
        prototypes = {}
        for label, feats in features_per_class.items():
            prototypes[label] = torch.stack(feats).mean(dim=0)
        
        return prototypes
    return (compute_global_prototypes,)


@app.cell
def _(get_embeddings, root_dir):
    # Generate the embeddings (they will be saved in a folder)
    loader = get_embeddings("birdnet", root_dir / "audio/train", check_if_primary_combination_exists=True)
    train_embed_dir = loader.embed_dir

    loader = get_embeddings("birdnet", root_dir / "audio/val", check_if_primary_combination_exists=True)
    val_embed_dir = loader.embed_dir

    loader = get_embeddings("birdnet", root_dir / "audio/test", check_if_primary_combination_exists=True)
    test_embed_dir = loader.embed_dir
    return loader, test_embed_dir, train_embed_dir, val_embed_dir


@app.cell
def _(
    ClassicEmbeddingLoader,
    root_dir,
    test_embed_dir,
    train_embed_dir,
    val_embed_dir,
):
    classic_data_module = ClassicEmbeddingLoader(
        embed_dir_train = str(train_embed_dir),
        embed_dir_val   = str(val_embed_dir),
        embed_dir_test  = str(test_embed_dir),
        csv_file_train  = str(root_dir / "meta/esc50mini_train.csv"),
        csv_file_val    = str(root_dir / "meta/esc50mini_val.csv"),
        csv_file_test   = str(root_dir / "meta/esc50mini_test.csv"),
        batch_size = 1
    )
    return (classic_data_module,)


@app.cell
def _(classic_data_module):
    # We instantiate the trainloader because we need the 5 training samples (per class) to compute the class prototypes.
    # No need for episodic training since the fully connected layer has already been trained
    train_loader = classic_data_module.train_dataloader()

    # We instantiate the testloader so we can predict the new data
    test_loader = classic_data_module.test_dataloader()

    return test_loader, train_loader


@app.cell
def _(SimpleNN, torch):
    # Instantiate the model, BACPIPE model has already created the embeddings, 
    # We just need to instantiate the fully connected layer
    model = SimpleNN()
    ckpt = torch.load("/home/benjamin.cretois/Code/fewfox/lightning_logs/version_10/checkpoints/epoch=9-step=500.ckpt") 
    state_dict = ckpt["state_dict"]

    # Create a new state dict with renamed keys
    new_state_dict = {}
    for key, value in state_dict.items():
        new_key = key
        # Remove "backbone." prefix
        if key.startswith("backbone_model."):
            new_key = key.replace("backbone_model.", "")
        elif key.startswith("model.backbone."):
            new_key = key.replace("model.backbone.", "")
        new_state_dict[new_key] = value

    # Load state dic
    model.load_state_dict(new_state_dict)
    model.eval()
    return ckpt, key, model, new_key, new_state_dict, state_dict, value


@app.cell
def _(compute_global_prototypes, model, torch, train_loader):
    proto = compute_global_prototypes(model, train_loader, device="cpu")
    proto_tensor = torch.stack([proto[label] for label in sorted(proto.keys())])
    return proto, proto_tensor


@app.cell
def _(model, np, proto_tensor, test_loader, torch):
    embeddings = []
    labels = []
    accurracy = []


    for i, data in enumerate(test_loader):
        tensor, label = data
        output = model.forward(tensor)
        dist = torch.cdist(output, proto_tensor)
        indice = dist.argmin()

        if indice == label:
            accurracy.append(1)
        else:
            accurracy.append(0)

        embeddings.append(output.squeeze(0))
        labels.append(label.squeeze(0))

    embeddings_t = torch.stack(embeddings, dim=0)
    embeddings_t = torch.tensor(embeddings_t)
    labels = np.array(labels)
    return (
        accurracy,
        data,
        dist,
        embeddings,
        embeddings_t,
        i,
        indice,
        label,
        labels,
        output,
        tensor,
    )


@app.cell
def _(accurracy):
    # ACCURRACY
    sum(accurracy) / len(accurracy)
    return


@app.cell
def _(embeddings_t, proto_tensor, torch):
    proto_test = torch.cat([proto_tensor, embeddings_t], dim=0)
    return (proto_test,)


@app.cell
def _(labels, np, proto_test):
    from sklearn.manifold import TSNE
    import matplotlib.pyplot as plt
    import seaborn as sns 

    features_2d = TSNE(n_components=2, perplexity=5).fit_transform(proto_test)

    query_2d = features_2d[5:]
    query_labels = labels

    proto_2d = features_2d[:5]
    proto_labels = np.array([5,5,5,5,5])

    fig = sns.scatterplot(x=query_2d[:, 0], y=query_2d[:, 1], hue=query_labels, palette="deep")
    sns.scatterplot(x=proto_2d[:, 0], y=proto_2d[:, 1], hue=proto_labels, palette="deep", marker='s', s=100)

    sns.move_legend(fig, "upper left", bbox_to_anchor=(1, 1))
    plt.show()
    return (
        TSNE,
        features_2d,
        fig,
        plt,
        proto_2d,
        proto_labels,
        query_2d,
        query_labels,
        sns,
    )


if __name__ == "__main__":
    app.run()