diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
deleted file mode 100644
index 29bfc57..0000000
--- a/.github/workflows/deploy.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: Deploy to GitHub Pages
-
-permissions:
-  contents: write
-  pages: write
-
-on:
-  push:
-    branches: [ "main", "master" ]
-  workflow_dispatch:
-jobs:
-  deploy:
-    runs-on: ubuntu-latest
-    steps: [uses: fastai/workflows/quarto-ghp@master]
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
deleted file mode 100644
index 5608592..0000000
--- a/.github/workflows/test.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: CI
-on:  [workflow_dispatch, pull_request, push]
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    steps: [uses: fastai/workflows/nbdev-ci@master]
diff --git a/.gitignore b/.gitignore
index 900add7..8d86c7b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -153,3 +153,9 @@ checklink/cookies.txt
 
 # Quarto
 .quarto
+
+checkpoints/
+
+wandb/*
+
+*.parquet
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 3b106e8..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright 2022, fastai
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 5c0e7ce..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,5 +0,0 @@
-include settings.ini
-include LICENSE
-include CONTRIBUTING.md
-include README.md
-recursive-exclude * __pycache__
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/textbooks_A2YN/gpt_labeling.py b/examples/textbooks_A2YN/gpt_labeling.py
deleted file mode 100644
index b19c72e..0000000
--- a/examples/textbooks_A2YN/gpt_labeling.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import os
-
-from datasets import concatenate_datasets, load_dataset
-from squeakily.helpers import LLMLabeler
-from treasure_trove.core import label_dataset
-
-instruction = f"""Determine the following code's quality value for a software engineer whose goal is to improve their programming ability.
-High quality code has the following:
-* Readability: The code is written in a way that is easy to understand and follow, with consistent detailed comments, formatting, meaningful variable names, and appropriate code structure.
-* Modularity: The code is organized into reusable and independent modules or functions, making it easier to comprehend and reuse in other projects.
-* Detailed explanations: The code is accompanied by thorough explanations of the concepts and techniques used, providing learners with a deeper understanding of the underlying principles.
-* Good design principles: The code follows best practices for software design, such as encapsulation, separation of concerns, and adhering to design patterns, making it easier to understand and maintain.
-Medium quality code has the following:
-* Readability: The code is reasonably well-structured and readable, but there may be occasional inconsistencies, some comments, or less descriptive variable names.
-* Partial modularity: The code contains some reusable components, but not all parts of the code are organized into separate modules or functions.
-* Some explanations: The code may have limited explanations or comments that provide a general understanding of the code's logic and purpose.
-* Adequate design principles: The code follows basic design principles, such as separation of concerns, but may not fully adhere to advanced design patterns or best practices.
-Low quality code has the following:
-* Poor readability: The code is poorly structured and difficult to follow, with little to no comments, inconsistent formatting and unclear variable names.
-* No modularity: The code is written in a monolithic style, lacking any organization into reusable or independent modules or functions.
-* Limited explanations: The code provides minimal or no explanations, leaving learners with little guidance on its logic or purpose.
-* Neglects design principles: The code shows a lack of consideration for design principles, making it harder to comprehend, maintain, and extend.
-
-Output nothing other than one of the following labels:
-"""
-
-labels = ["high quality", "medium quality", "low quality"]
-api_key = os.environ["OPENAI_KEY"]
-labeler = LLMLabeler(instruction, labels, model_name="gpt-4", api_key=api_key) # gpt-3.5-turbo
-
-languages = ["python", "go", "java", "javascript", "c", "c++"]
-subsets = []
-for lang in languages:
-    ds = load_dataset("bigcode/the-stack-smol", data_dir=f"data/{lang}")["train"]
-    sample = 50 / len(ds)
-    subset = label_dataset(ds, "content", labeler, labels, sample=sample, num_workers=8)
-    new_column = [lang] * len(subset)
-    subset = subset.add_column("language", new_column)
-    subsets.append(subset)
-
-labeled_ds = concatenate_datasets(subsets)
-
-# upload to huggingface
-labeled_ds.push_to_hub("CarperAI/textbooks_A2YN_labeled_six_languages", private=True)
-
-# print number of each class
-print(f"Number of {labels[0]}: {len(labeled_ds.filter(lambda x: x['label'] == 0))}")
-print(f"Number of {labels[1]}: {len(labeled_ds.filter(lambda x: x['label'] == 1))}")
-print(f"Number of {labels[2]}: {len(labeled_ds.filter(lambda x: x['label'] == 2))}")
diff --git a/examples/textbooks_A2YN/train_labeler.py b/examples/textbooks_A2YN/train_labeler.py
deleted file mode 100644
index 1249930..0000000
--- a/examples/textbooks_A2YN/train_labeler.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from datasets import load_dataset
-from transformers import pipeline, TrainingArguments
-from treasure_trove.core import filter_dataset, label_dataset, train_labeler
-
-
-ds = load_dataset("CarperAI/textbooks_A2YN_labeled")["train"]
-batch_size = 32
-training_args = TrainingArguments(
-    output_dir="./code_edu",
-    num_train_epochs=3,
-    per_device_train_batch_size=batch_size,
-    per_device_eval_batch_size=batch_size,
-    warmup_steps=500,
-    weight_decay=0.01,
-    logging_dir="./logs",
-    logging_steps=10,
-    evaluation_strategy="epoch",
-    save_strategy="epoch",
-    load_best_model_at_end=True,
-    metric_for_best_model="accuracy",
-    greater_is_better=True,
-    seed=42,
-    push_to_hub=True,
-    hub_model_id="CarperAI/code_edu_classifier_py",
-    hub_private_repo=True,
-)
-base_model_name = "bigcode/starencoder"
-model, tokenizer = train_labeler(
-    ds,
-    "content",
-    base_model_name,
-    n_labels=2,
-    training_args=training_args,
-    num_workers=4,
-    max_length=512,
-    push_to_hub=True,
-)
\ No newline at end of file
diff --git a/generate_embeddings.py b/generate_embeddings.py
new file mode 100644
index 0000000..0e814d5
--- /dev/null
+++ b/generate_embeddings.py
@@ -0,0 +1,192 @@
+from abc import ABC
+from datasets import (
+    load_dataset,
+)
+from dotenv import load_dotenv
+import torch
+from typing import Union, List, Dict
+
+from train_labeler import EncoderParams
+
+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    AutoModel,
+)
+
+load_dotenv(".env")
+
+# https://huggingface.co/bigcode/starencoder/discussions/3
+# https://github.com/bigcode-project/bigcode-encoder/blob/master/embedding_sandbox.ipynb
+
+
+# https://github.com/bigcode-project/bigcode-encoder/blob/master/src/utils.py#L152
+def pooling(x: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+    """Pools a batch of vector sequences into a batch of vector global representations.
+    It does so by taking the last vector in the sequence, as indicated by the mask.
+
+    Args:
+        x (torch.Tensor): Batch of vector sequences with shape [B, T, F].
+        mask (torch.Tensor): Batch of masks with shape [B, T].
+
+    Returns:
+        torch.Tensor: Pooled version of the input batch with shape [B, F].
+    """
+
+    eos_idx = mask.sum(1) - 1
+    batch_idx = torch.arange(len(eos_idx), device=x.device)
+
+    mu = x[batch_idx, eos_idx, :]
+
+    return mu
+
+
+# https://github.com/bigcode-project/bigcode-encoder/blob/master/src/utils.py#L121
+def pool_and_normalize(
+    features_sequence: torch.Tensor,
+    attention_masks: torch.Tensor,
+    return_norms: bool = False,
+) -> Union[torch.Tensor, List[torch.Tensor]]:
+    """Temporal pooling of sequences of vectors and projection onto the unit sphere.
+
+    Args:
+        features_sequence (torch.Tensor): Inpute features with shape [B, T, F].
+        attention_masks (torch.Tensor): Pooling masks with shape [B, T, F].
+        return_norms (bool, optional): Whether to additionally return the norms. Defaults to False.
+
+    Returns:
+        Union[torch.Tensor, List[torch.Tensor]]: Pooled and normalized vectors with shape [B, F].
+    """
+
+    pooled_embeddings = pooling(features_sequence, attention_masks)
+    embedding_norms = pooled_embeddings.norm(dim=1)
+
+    normalizing_factor = torch.where(  # Only normalize embeddings with norm > 1.0.
+        embedding_norms > 1.0, embedding_norms, torch.ones_like(embedding_norms)
+    )
+
+    pooled_normalized_embeddings = pooled_embeddings / normalizing_factor[:, None]
+
+    if return_norms:
+        return pooled_normalized_embeddings, embedding_norms
+    else:
+        return pooled_normalized_embeddings
+
+
+# https://github.com/bigcode-project/bigcode-encoder/blob/master/src/constants.py
+
+
+def set_device(inputs: Dict[str, torch.Tensor], device: str) -> Dict[str, torch.Tensor]:
+    output_data = {}
+    for k, v in inputs.items():
+        output_data[k] = v.to(device)
+
+    return output_data
+
+
+def prepare_tokenizer(tokenizer_path):
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
+    except OSError:
+        tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, use_auth_token=True)
+
+    tokenizer.add_special_tokens({"pad_token": EncoderParams.PAD_TOKEN})
+    tokenizer.add_special_tokens({"sep_token": EncoderParams.SEPARATOR_TOKEN})
+    tokenizer.add_special_tokens({"cls_token": EncoderParams.CLS_TOKEN})
+    tokenizer.add_special_tokens({"mask_token": EncoderParams.MASK_TOKEN})
+    return tokenizer
+
+
+def truncate_sentences(
+    sentence_list: List[str], maximum_length: Union[int, float]
+) -> List[str]:
+    truncated_sentences = []
+
+    for sentence in sentence_list:
+        truncated_sentences.append(sentence[:maximum_length])
+
+    return truncated_sentences
+
+
+class StarEncoder(torch.nn.Module):
+    def __init__(self, device):
+        super().__init__()
+
+        self.tokenizer = prepare_tokenizer(EncoderParams.base_model_name)
+        self.encoder = (
+            AutoModel.from_pretrained(
+                EncoderParams.base_model_name, use_auth_token=True
+            )
+            .to(device)
+            .eval()
+        )
+        self.device = device
+        self.max_input_len = EncoderParams.max_input_length
+        self.maximum_token_len = EncoderParams.max_token_length
+
+    def forward(self, input_sentences):
+        inputs = self.tokenizer(
+            [
+                f"{EncoderParams.CLS_TOKEN}{sentence}{EncoderParams.SEPARATOR_TOKEN}"
+                for sentence in input_sentences
+            ],
+            padding="longest",
+            max_length=self.maximum_token_len,
+            truncation=True,
+            return_tensors="pt",
+        )
+
+        outputs = self.encoder(**set_device(inputs, self.device))
+        embedding = pool_and_normalize(outputs.hidden_states[-1], inputs.attention_mask)
+
+        return embedding
+
+    def encode(self, input_sentences, batch_size=32, **kwargs):
+        truncated_input_sentences = truncate_sentences(
+            input_sentences, self.max_input_len
+        )
+
+        n_batches = len(truncated_input_sentences) // batch_size + int(
+            len(truncated_input_sentences) % batch_size > 0
+        )
+
+        embedding_batch_list = []
+
+        for i in range(n_batches):
+            start_idx = i * batch_size
+            end_idx = min((i + 1) * batch_size, len(truncated_input_sentences))
+
+            with torch.no_grad():
+                embedding_batch_list.append(
+                    self.forward(truncated_input_sentences[start_idx:end_idx])
+                    .detach()
+                    .cpu()
+                )
+
+        input_sentences_embedding = torch.cat(embedding_batch_list)
+
+        return input_sentences_embedding
+
+
+tokenizer = AutoTokenizer.from_pretrained(
+    EncoderParams.base_model_name, max_length=EncoderParams.max_token_length
+)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+
+dataset = load_dataset("roborovski/phi-1")
+
+device = torch.device("cuda")
+model = StarEncoder(device)
+
+
+def process(x):
+    content = x["content"]
+    embedding = model.encode(content)
+    return {"embedding": embedding}
+
+
+# process(dataset["train"][0])
+
+processed_dataset = dataset.map(process, batched=True, batch_size=128)
+processed_dataset.push_to_hub("roborovski/phi-2-embeddings")
diff --git a/gpt_labeling.py b/gpt_labeling.py
new file mode 100644
index 0000000..9f5eac1
--- /dev/null
+++ b/gpt_labeling.py
@@ -0,0 +1,105 @@
+import os
+from pathlib import Path
+
+from datasets import (
+    concatenate_datasets,
+    load_dataset,
+    IterableDataset,
+    Dataset,
+    ReadInstruction,
+)
+from dotenv import load_dotenv
+
+import time
+from treasure_trove.core import ChatGPTLabeler, instruction
+
+load_dotenv(".env")
+labels = ["high quality", "medium quality", "low quality"]
+secondary_labels = ["high", "medium", "low"]
+lang = "python"
+max_chars = 4_096
+num_workers = 8
+labeler = ChatGPTLabeler(
+    instruction,
+    labels,
+    secondary_labels=secondary_labels,
+)
+dataset_chunks = []
+
+buffer_size = 500
+num_chunks = 100
+
+print("Loading dataset..")
+print("Loaded dataset.")
+
+api_key = os.environ["OPENAI_KEY"]
+
+max_failures = 5
+failures = 0
+
+ckpt_dir = "./checkpoints"
+Path(ckpt_dir).mkdir(exist_ok=True)
+
+
+def process(x):
+    failures = 0
+    total_cost = 0
+    label_idx, cost_info = 0, {}
+    while failures < max_failures:
+        try:
+            label_idx, cost_info = labeler(x["content"][:max_chars])
+            time.sleep(1)
+            break
+        except Exception as e:
+            failures += 1
+            print(e)
+            time.sleep(1)
+    if cost_info:
+        total_cost = cost_info["total_cost"]
+        print(
+            f"{label_idx} - tokens used: {cost_info['prompt_tokens']} | {cost_info['completion_tokens']} | {cost_info['total_cost']}"
+        )
+    else:
+        print("row not classified.")
+    return {"label": label_idx, "cost": total_cost}
+
+
+processed_chunk_datasets = []
+
+first_save_idx = 8000
+
+for i in range(num_chunks):
+    split = ReadInstruction(
+        "train", from_=i * buffer_size, to=(i + 1) * buffer_size, unit="abs"
+    )
+    # if i < first_save_idx // buffer_size:
+    #     print(f"skipping chunk {i}: {split}")
+    #     continue
+    print(f"processing chunk {i}: {split}")
+    subset = load_dataset(
+        "parquet", split=split, data_files={"train": "data-00000-of-00144.parquet"}
+    )
+
+    # Label the subset
+    subset = subset.map(process, batched=False, num_proc=4)
+
+    processed_chunk_datasets.append(subset)
+
+    if i > first_save_idx // buffer_size:
+        all_datasets: Dataset = concatenate_datasets(processed_chunk_datasets)
+        try:
+            all_datasets.push_to_hub("roborovski/phi-1", private=True)
+            all_datasets.to_parquet(os.path.join(ckpt_dir, f"processed_{i}"))
+        except Exception as e:
+            print(e)
+
+        # print number of each class
+        print(
+            f"Number of {labels[0]}: {len(all_datasets.filter(lambda x: x['label'] == 0))}"
+        )
+        print(
+            f"Number of {labels[1]}: {len(all_datasets.filter(lambda x: x['label'] == 1))}"
+        )
+        print(
+            f"Number of {labels[2]}: {len(all_datasets.filter(lambda x: x['label'] == 2))}"
+        )
diff --git a/llama_inference.py b/llama_inference.py
new file mode 100644
index 0000000..27daa08
--- /dev/null
+++ b/llama_inference.py
@@ -0,0 +1,49 @@
+from transformers import AutoTokenizer
+import transformers
+import torch
+
+model = "../llama-7bf-hf"
+
+instruction_simple = f"""Determine the following code's quality value for a software engineer whose goal is to improve their programming ability.
+High quality code has the following:
+* Readability: The code is written in a way that is easy to understand and follow.
+* Modularity: The code is organized into reusable and independent modules or functions.
+* Detailed explanations: The code is accompanied by explanations of the concepts used.
+* Good design principles: The code follows best practices for software design.
+Medium quality code has the following:
+* Readability: The code is reasonably well-structured and readable.
+* Partial modularity: The code contains some reusable components.
+* Some explanations: The code may have limited explanations or comments.
+* Adequate design principles: The code follows basic design principles.
+Low quality code has the following:
+* Poor readability: The code is poorly structured and difficult to follow.
+* No modularity: The code is written in a monolithic style.
+* Limited explanations: The code provides minimal or no explanations.
+* Neglects design principles: The code shows a lack of consideration for design principles.
+
+Output nothing other than one of the following labels:
+High quality
+Medium quality
+Low quality
+"""
+
+
+tokenizer = AutoTokenizer.from_pretrained(model)
+pipeline = transformers.pipeline(
+    "conversational",
+    model=model,
+    torch_dtype=torch.float16,
+    device_map="auto",
+)
+
+sequences = pipeline(
+    instruction_simple,
+    do_sample=True,
+    top_k=10,
+    num_return_sequences=1,
+    eos_token_id=tokenizer.eos_token_id,
+    max_length=200,
+)
+for seq in sequences:
+    print(f"Result: {seq['generated_text']}")
+
diff --git a/llama_labeling.py b/llama_labeling.py
new file mode 100644
index 0000000..0cffef9
--- /dev/null
+++ b/llama_labeling.py
@@ -0,0 +1,155 @@
+from typing import Optional, List
+
+import fire
+import re
+
+from llama import Llama
+
+
+instruction_simple = f"""Determine the following code's quality value for a software engineer whose goal is to improve their programming ability.
+High quality code has the following:
+* Readability: The code is written in a way that is easy to understand and follow.
+* Modularity: The code is organized into reusable and independent modules or functions.
+* Detailed explanations: The code is accompanied by explanations of the concepts used.
+* Good design principles: The code follows best practices for software design.
+Medium quality code has the following:
+* Readability: The code is reasonably well-structured and readable.
+* Partial modularity: The code contains some reusable components.
+* Some explanations: The code may have limited explanations or comments.
+* Adequate design principles: The code follows basic design principles.
+Low quality code has the following:
+* Poor readability: The code is poorly structured and difficult to follow.
+* No modularity: The code is written in a monolithic style.
+* Limited explanations: The code provides minimal or no explanations.
+* Neglects design principles: The code shows a lack of consideration for design principles.
+
+Output nothing other than one of the following labels:
+High quality
+Medium quality
+Low quality
+"""
+
+
+def find_label(text: str, labels: List[str]):
+    for i, label in enumerate(labels):
+        pattern = re.compile(re.escape(label), re.IGNORECASE | re.MULTILINE)
+        match = re.search(pattern, text)
+        if bool(match):
+            return i
+    return None
+
+
+import os
+from pathlib import Path
+
+from datasets import (
+    concatenate_datasets,
+    load_dataset,
+    IterableDataset,
+    Dataset,
+    ReadInstruction,
+)
+from dotenv import load_dotenv
+
+import time
+
+load_dotenv(".env")
+labels = ["high quality", "medium quality", "low quality"]
+secondary_labels = ["high", "medium", "low"]
+lang = "python"
+max_chars = 4_096
+num_workers = 8
+dataset_chunks = []
+
+buffer_size = 500
+num_chunks = 100
+
+print("Loading dataset..")
+print("Loaded dataset.")
+
+max_failures = 5
+failures = 0
+
+max_gen_len = 512
+max_seq_len = 1024
+temperature = 0.1
+top_p = 0.2
+max_batch_size = 4
+
+
+ckpt_dir = "../llama/7Bf"
+tokenizer_path = "../llama/tokenizer.model"
+
+generator = Llama.build(
+    ckpt_dir=ckpt_dir,
+    tokenizer_path=tokenizer_path,
+    max_seq_len=max_seq_len,
+    max_batch_size=max_batch_size,
+)
+
+
+def process(x):
+    total_cost = 0
+    label_idx = 0
+    dialogs = []
+    for i in range(len(x["content"])):
+        code_sample = x["content"][i][:max_gen_len]
+        dialogs.append(
+            [
+                {"role": "system", "content": instruction_simple},
+                {"role": "user", "content": code_sample},
+            ]
+        )
+    results = generator.chat_completion(
+        dialogs,  # type: ignore
+        max_gen_len=max_gen_len,
+        temperature=temperature,
+        top_p=top_p,
+    )
+    batch_labels = []
+    for i in range(len(dialogs)):
+        completion_text = results[i]["generation"]["content"]
+        label = find_label(completion_text, labels)
+        batch_labels.append(label)
+    return {"label": batch_labels}
+
+
+processed_chunk_datasets = []
+
+first_save_idx = 8000
+
+for i in range(num_chunks):
+    split = ReadInstruction(
+        "train", from_=i * buffer_size, to=(i + 1) * buffer_size, unit="abs"
+    )
+    # if i < first_save_idx // buffer_size:
+    #     print(f"skipping chunk {i}: {split}")
+    #     continue
+    print(f"processing chunk {i}: {split}")
+    subset = load_dataset(
+        "parquet", split=split, data_files={"train": "data-00000-of-00144.parquet"}
+    )
+
+    # Label the subset
+    subset = subset.map(process, batched=True, batch_size=max_batch_size, num_proc=1)
+
+    processed_chunk_datasets.append(subset)
+
+    if i > first_save_idx // buffer_size:
+        all_datasets: Dataset = concatenate_datasets(processed_chunk_datasets)
+        try:
+            all_datasets.push_to_hub("roborovski/phi-1", private=True)
+            all_datasets.to_parquet(os.path.join(ckpt_dir, f"processed_{i}"))
+        except Exception as e:
+            print(e)
+
+        # print number of each class
+        print(
+            f"Number of {labels[0]}: {len(all_datasets.filter(lambda x: x['label'] == 0))}"
+        )
+        print(
+            f"Number of {labels[1]}: {len(all_datasets.filter(lambda x: x['label'] == 1))}"
+        )
+        print(
+            f"Number of {labels[2]}: {len(all_datasets.filter(lambda x: x['label'] == 2))}"
+        )
diff --git a/nbs/00_core.ipynb b/nbs/00_core.ipynb
deleted file mode 100644
index 7e7aea7..0000000
--- a/nbs/00_core.ipynb
+++ /dev/null
@@ -1,513 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# core\n",
-    "\n",
-    "> Fill in a module description here"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp core"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "import evaluate\n",
-    "import time\n",
-    "\n",
-    "import numpy as np\n",
-    "\n",
-    "from transformers import (\n",
-    "    AutoModelForSequenceClassification,\n",
-    "    AutoTokenizer,\n",
-    "    DataCollatorWithPadding,\n",
-    "    Trainer,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | hide\n",
-    "from nbdev.showdoc import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def classify(x, labels, llm_labeler, max_failures=5, default_label=0):\n",
-    "    failures = 0\n",
-    "    while failures < max_failures:\n",
-    "        try:\n",
-    "            label = labels.index(llm_labeler(x)[0])\n",
-    "            time.sleep(1)\n",
-    "            return label\n",
-    "        except Exception as e:\n",
-    "            failures += 1\n",
-    "            print(e)\n",
-    "            time.sleep(1)\n",
-    "            pass\n",
-    "    if failures == max_failures:\n",
-    "        return default_label"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def label_dataset(\n",
-    "    dataset, text_column, labeler_model, labels, sample=0.1, num_workers=4, max_chars=4_096\n",
-    "):\n",
-    "    \"\"\"\n",
-    "    Filters a dataset using a labeler model.\n",
-    "\n",
-    "    Args:\n",
-    "        dataset (datasets.Dataset): Dataset to filter\n",
-    "        text_column (str): Name of the column containing the text to classify\n",
-    "        labeler_model (Any): Model to use for labeling\n",
-    "        labels (List[str]): List of labels\n",
-    "        sample (float): The fraction of the dataset to label and use for filtering\n",
-    "        batch_size (int): Batch size for labeling\n",
-    "        num_workers (int): Number of workers for labeling\n",
-    "        max_chars (int): Maximum number of characters to truncate the text to before labeling (reduces rate limiting errors)\n",
-    "    \"\"\"\n",
-    "\n",
-    "    # Get a subset of the dataset\n",
-    "    subset = dataset.shuffle(seed=115).select(range(int(len(dataset) * sample)))\n",
-    "\n",
-    "    # Label the subset\n",
-    "    subset = subset.map(\n",
-    "        lambda x: {\"label\": classify(x[text_column][:max_chars], labels, labeler_model)},\n",
-    "        batched=False,\n",
-    "        num_proc=num_workers,\n",
-    "    )\n",
-    "\n",
-    "    return subset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Using custom data configuration bigcode--the-stack-smol-8f8055c3a4e4b4e3\n",
-      "Found cached dataset json (/home/nathan/.cache/huggingface/datasets/bigcode___json/bigcode--the-stack-smol-8f8055c3a4e4b4e3/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cfb95116fc20477bb047848972658d69",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/1 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading cached shuffled indices for dataset at /home/nathan/.cache/huggingface/datasets/bigcode___json/bigcode--the-stack-smol-8f8055c3a4e4b4e3/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab/cache-feaf44b92e145e5a.arrow\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9033fc6799034c8abffcb46335958b20",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/1000 [00:00<?, ?ex/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from functools import partial\n",
-    "from datasets import load_dataset\n",
-    "\n",
-    "\n",
-    "def mock_labeler(x, labels):\n",
-    "    return [np.random.choice(labels, p=[0.25, 0.75])]\n",
-    "\n",
-    "\n",
-    "labels = [\"positive\", \"negative\"]\n",
-    "labeler = partial(mock_labeler, labels=labels)\n",
-    "ds = load_dataset(\"bigcode/the-stack-smol\", data_dir=\"data/python\")[\"train\"]\n",
-    "\n",
-    "subset = label_dataset(ds, \"content\", labeler, labels, sample=0.1)\n",
-    "\n",
-    "assert \"label\" in subset.column_names"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def train_labeler(\n",
-    "    dataset,\n",
-    "    text_column,\n",
-    "    base_model_name,\n",
-    "    n_labels,\n",
-    "    training_args,\n",
-    "    num_workers=4,\n",
-    "    max_length=512,\n",
-    "    push_to_hub=True,\n",
-    "):\n",
-    "    \"\"\"\n",
-    "    Trains a labeler model on a labeled dataset.\n",
-    "\n",
-    "    Args:\n",
-    "        dataset (datasets.Dataset): Dataset to train on\n",
-    "        text_column (str): Name of the text column\n",
-    "        base_model_name (str): Name of the base model to use\n",
-    "        n_labels (int): Number of labels\n",
-    "        epochs (int): Number of epochs to train\n",
-    "        batch_size (int): Batch size for training\n",
-    "        num_workers (int): Number of workers for training\n",
-    "        max_length (int): Maximum length of the input\n",
-    "    \"\"\"\n",
-    "    # Load the tokenizer\n",
-    "    tokenizer = AutoTokenizer.from_pretrained(base_model_name, max_length=max_length)\n",
-    "    if tokenizer.pad_token is None:\n",
-    "        tokenizer.pad_token = tokenizer.eos_token\n",
-    "\n",
-    "    # Load the model\n",
-    "    model = AutoModelForSequenceClassification.from_pretrained(\n",
-    "        base_model_name, num_labels=n_labels, max_length=max_length\n",
-    "    )\n",
-    "    model.config.id2label = {i: i for i in range(n_labels)}\n",
-    "\n",
-    "    # Preprocess the dataset\n",
-    "    dataset = dataset.map(\n",
-    "        lambda x: tokenizer(\n",
-    "            x[text_column], padding=\"max_length\", truncation=True, max_length=max_length\n",
-    "        ),\n",
-    "        batched=True,\n",
-    "        num_proc=num_workers,\n",
-    "    )\n",
-    "\n",
-    "    # Split the dataset\n",
-    "    dataset = dataset.train_test_split(test_size=0.1, seed=42)\n",
-    "\n",
-    "    # Get the data collator\n",
-    "    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
-    "\n",
-    "    def compute_metrics(eval_preds):\n",
-    "        metric = evaluate.load(\"glue\", \"mrpc\")\n",
-    "        logits, labels = eval_preds\n",
-    "        if isinstance(logits, tuple): # Some models return tuples\n",
-    "            logits = logits[0]\n",
-    "        print(logits.shape, labels)\n",
-    "        predictions = np.argmax(logits, axis=-1)\n",
-    "        return metric.compute(predictions=predictions, references=labels)\n",
-    "\n",
-    "    # Get the trainer\n",
-    "    trainer = Trainer(\n",
-    "        model=model,\n",
-    "        args=training_args,\n",
-    "        train_dataset=dataset[\"train\"],\n",
-    "        eval_dataset=dataset[\"test\"],\n",
-    "        data_collator=data_collator,\n",
-    "        compute_metrics=compute_metrics,\n",
-    "    )\n",
-    "\n",
-    "    # Train the model\n",
-    "    trainer.train()\n",
-    "\n",
-    "    # Push the model to the hub\n",
-    "    if push_to_hub:\n",
-    "        trainer.push_to_hub()\n",
-    "\n",
-    "    # Return the model\n",
-    "    return model, tokenizer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of the model checkpoint at prajjwal1/bert-small were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias']\n",
-      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-small and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "008f5b697a4a469d8e9e113140ff1938",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/1 [00:00<?, ?ba/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/nathan/miniconda3/envs/trove/lib/python3.8/site-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "18ca5beb1fbb4df9a9b871e9b929d5ff",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/225 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'loss': 0.7437, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.04}\n",
-      "{'loss': 0.711, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.09}\n",
-      "{'loss': 0.6896, 'learning_rate': 3e-06, 'epoch': 0.13}\n",
-      "{'loss': 0.6414, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.18}\n",
-      "{'loss': 0.6547, 'learning_rate': 5e-06, 'epoch': 0.22}\n",
-      "{'loss': 0.5845, 'learning_rate': 6e-06, 'epoch': 0.27}\n",
-      "{'loss': 0.5528, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.31}\n",
-      "{'loss': 0.6287, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.36}\n",
-      "{'loss': 0.6309, 'learning_rate': 9e-06, 'epoch': 0.4}\n",
-      "{'loss': 0.6, 'learning_rate': 1e-05, 'epoch': 0.44}\n",
-      "{'loss': 0.6651, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.49}\n",
-      "{'loss': 0.5361, 'learning_rate': 1.2e-05, 'epoch': 0.53}\n",
-      "{'loss': 0.674, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.58}\n",
-      "{'loss': 0.6853, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.62}\n",
-      "{'loss': 0.6342, 'learning_rate': 1.5e-05, 'epoch': 0.67}\n",
-      "{'loss': 0.6266, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.71}\n",
-      "{'loss': 0.4705, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.76}\n",
-      "{'loss': 0.5439, 'learning_rate': 1.8e-05, 'epoch': 0.8}\n",
-      "{'loss': 0.4427, 'learning_rate': 1.9e-05, 'epoch': 0.84}\n",
-      "{'loss': 0.5829, 'learning_rate': 2e-05, 'epoch': 0.89}\n",
-      "{'loss': 0.5624, 'learning_rate': 2.1e-05, 'epoch': 0.93}\n",
-      "{'loss': 0.6028, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.98}\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c72fabdbb3aa4dc8903d52013aa42f28",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/25 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'eval_loss': 0.5282490253448486, 'eval_accuracy': 0.8, 'eval_f1': 0.888888888888889, 'eval_runtime': 0.933, 'eval_samples_per_second': 107.178, 'eval_steps_per_second': 26.794, 'epoch': 1.0}\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers\n",
-      "pip install xformers.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'train_runtime': 19.5343, 'train_samples_per_second': 46.073, 'train_steps_per_second': 11.518, 'train_loss': 0.6103349855211047, 'epoch': 1.0}\n"
-     ]
-    }
-   ],
-   "source": [
-    "from transformers import pipeline\n",
-    "\n",
-    "base_model_name = \"prajjwal1/bert-small\"\n",
-    "model, tokenizer = train_labeler(\n",
-    "    subset,\n",
-    "    \"content\",\n",
-    "    base_model_name,\n",
-    "    n_labels=len(labels),\n",
-    "    epochs=1,\n",
-    "    batch_size=4,\n",
-    "    num_workers=4,\n",
-    ")\n",
-    "assert type(model) == AutoModelForSequenceClassification"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def filter_dataset(\n",
-    "    dataset, text_column, labeler_model, labels_to_keep, batch_size=32, num_workers=4\n",
-    "):\n",
-    "    \"\"\"\n",
-    "    Filters a dataset using a labeler model.\n",
-    "\n",
-    "    Args:\n",
-    "        dataset (datasets.Dataset): Dataset to filter\n",
-    "        text_column (str): Name of the text column\n",
-    "        labeler_model (transformers.pipelines.TextClassificationPipeline): Model to use for labeling\n",
-    "        labels_to_keep (list): List of labels to keep\n",
-    "        batch_size (int): Batch size for labeling\n",
-    "        num_workers (int): Number of workers for labeling\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def label(x):\n",
-    "        predicted = labeler_model(x, padding=True, truncation=True, max_length=512)\n",
-    "        return {\n",
-    "            \"label\": [l[\"label\"] for l in predicted],\n",
-    "            \"score\": [l[\"score\"] for l in predicted],\n",
-    "        }\n",
-    "\n",
-    "    # Label the dataset\n",
-    "    dataset = dataset.map(\n",
-    "        lambda x: label(x[text_column]),\n",
-    "        batched=True,\n",
-    "        batch_size=batch_size,\n",
-    "        num_proc=num_workers,\n",
-    "    )\n",
-    "\n",
-    "    # Filter the dataset\n",
-    "    dataset = dataset.filter(lambda x: x[\"label\"] in labels_to_keep)\n",
-    "\n",
-    "    return dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c14fd4c3288947358f8b9e01c4a50655",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/10 [00:00<?, ?ba/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c2897d09fb0a409793cd5bf9855e5999",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/10 [00:00<?, ?ba/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "pipe = pipeline(\n",
-    "    \"text-classification\", model=model, tokenizer=tokenizer, device=model.device\n",
-    ")\n",
-    "filtered_ds = filter_dataset(ds, \"content\", pipe, [0])\n",
-    "\n",
-    "assert len(filtered_ds) < len(ds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | hide\n",
-    "import nbdev\n",
-    "\n",
-    "nbdev.nbdev_export()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/nbs/02_tutorial.ipynb b/nbs/02_tutorial.ipynb
deleted file mode 100644
index 717e002..0000000
--- a/nbs/02_tutorial.ipynb
+++ /dev/null
@@ -1,104 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | hide\n",
-    "from treasure_trove.core import *\n",
-    "from squeakily.helpers import LLMLabeler"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# treasure_trove\n",
-    "\n",
-    "> Find the treasure in your trove of data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| eval: false\n",
-    "from datasets import load_dataset\n",
-    "from squeakily.helpers import LLMLabeler\n",
-    "from transformers import pipeline, TrainingArguments\n",
-    "from treasure_trove.core import filter_dataset, label_dataset, train_labeler\n",
-    "\n",
-    "instruction = \"\"\"Please label the following code as either educational or non-educational.\n",
-    "Educational code is code that is well written, follows best practices, has documentation such that it might be found in a textbook.\n",
-    "Non-educational code is code that is poorly written, lacks documentation, contain bugs, or is not idiomatic.\n",
-    "Labels:\n",
-    "\"\"\"\n",
-    "labels = [\"educational\", \"non-educational\"]\n",
-    "api_key = \"<api_key>\"\n",
-    "labeler = LLMLabeler(instruction, labels, model_name=\"gpt-4\", api_key=api_key)\n",
-    "\n",
-    "ds = load_dataset(\"bigcode/the-stack-smol\", data_dir=\"data/python\")[\"train\"]\n",
-    "\n",
-    "# Get the training arguments\n",
-    "batch_size=4,\n",
-    "training_args = TrainingArguments(\n",
-    "    output_dir=\"./code_edu\",\n",
-    "    num_train_epochs=1,\n",
-    "    per_device_train_batch_size=batch_size,\n",
-    "    per_device_eval_batch_size=batch_size,\n",
-    "    warmup_steps=500,\n",
-    "    weight_decay=0.01,\n",
-    "    logging_dir=\"./logs\",\n",
-    "    logging_steps=10,\n",
-    "    evaluation_strategy=\"epoch\",\n",
-    "    save_strategy=\"epoch\",\n",
-    "    load_best_model_at_end=True,\n",
-    "    metric_for_best_model=\"accuracy\",\n",
-    "    greater_is_better=True,\n",
-    "    seed=42,\n",
-    "    push_to_hub=True,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| eval: false\n",
-    "subset = label_dataset(ds, \"content\", labeler, labels, sample=0.001)\n",
-    "base_model_name = \"bigcode/starencoder\"\n",
-    "model, tokenizer = train_labeler(\n",
-    "    subset,\n",
-    "    \"content\",\n",
-    "    base_model_name,\n",
-    "    n_labels=len(labels),\n",
-    "    training_args=training_args,\n",
-    "    num_workers=4,\n",
-    "    max_length=512,\n",
-    "    push_to_hub=True,\n",
-    ")\n",
-    "pipe = pipeline(\n",
-    "    \"text-classification\", model=model, tokenizer=tokenizer, device=model.device\n",
-    ")\n",
-    "filtered_ds = filter_dataset(ds, \"content\", model, labels.index(\"educational\"))\n",
-    "filtered_ds.push_to_hub(\"ncoop57/code_edu\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/nbs/_quarto.yml b/nbs/_quarto.yml
deleted file mode 100644
index 0a6dfcb..0000000
--- a/nbs/_quarto.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-project:
-  type: website
-
-format:
-  html:
-    theme: cosmo
-    css: styles.css
-    toc: true
-
-website:
-  twitter-card: true
-  open-graph: true
-  repo-actions: [issue]
-  navbar:
-    background: primary
-    search: true
-  sidebar:
-    style: floating
-
-metadata-files: [nbdev.yml, sidebar.yml]
\ No newline at end of file
diff --git a/nbs/index.ipynb b/nbs/index.ipynb
deleted file mode 100644
index 5e9fc26..0000000
--- a/nbs/index.ipynb
+++ /dev/null
@@ -1,96 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | hide\n",
-    "from treasure_trove.core import *"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# treasure_trove\n",
-    "\n",
-    "> Find the treasure in your trove of data"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This file will become your README and also the index of your documentation."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Install"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "```sh\n",
-    "pip install treasure_trove\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## How to use"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Fill me in please! Don't forget code examples:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "2"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "1 + 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/nbs/nbdev.yml b/nbs/nbdev.yml
deleted file mode 100644
index 8264f3b..0000000
--- a/nbs/nbdev.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-project:
-  output-dir: _docs
-
-website:
-  title: "treasure_trove"
-  site-url: "https://CarperAI.github.io/treasure_trove"
-  description: "Find the treasure in your trove of data"
-  repo-branch: main
-  repo-url: "https://github.com/CarperAI/treasure_trove"
diff --git a/nbs/styles.css b/nbs/styles.css
deleted file mode 100644
index 66ccc49..0000000
--- a/nbs/styles.css
+++ /dev/null
@@ -1,37 +0,0 @@
-.cell {
-  margin-bottom: 1rem;
-}
-
-.cell > .sourceCode {
-  margin-bottom: 0;
-}
-
-.cell-output > pre {
-  margin-bottom: 0;
-}
-
-.cell-output > pre, .cell-output > .sourceCode > pre, .cell-output-stdout > pre {
-  margin-left: 0.8rem;
-  margin-top: 0;
-  background: none;
-  border-left: 2px solid lightsalmon;
-  border-top-left-radius: 0;
-  border-top-right-radius: 0;
-}
-
-.cell-output > .sourceCode {
-  border: none;
-}
-
-.cell-output > .sourceCode {
-  background: none;
-  margin-top: 0;
-}
-
-div.description {
-  padding-left: 2px;
-  padding-top: 5px;
-  font-style: italic;
-  font-size: 135%;
-  opacity: 70%;
-}
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..89399b9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,10 @@
+accelerate
+datasets
+evaluate
+fastcore
+openai
+transformers
+python-dotenv
+pandas
+wandb
+huggingface_hub
\ No newline at end of file
diff --git a/settings.ini b/settings.ini
deleted file mode 100644
index 3e8da59..0000000
--- a/settings.ini
+++ /dev/null
@@ -1,43 +0,0 @@
-[DEFAULT]
-# All sections below are required unless otherwise specified.
-# See https://github.com/fastai/nbdev/blob/master/settings.ini for examples.
-
-### Python library ###
-repo = treasure_trove
-lib_name = %(repo)s
-version = 0.0.1
-min_python = 3.7
-license = apache2
-black_formatting = False
-
-### nbdev ###
-doc_path = _docs
-lib_path = treasure_trove
-nbs_path = nbs
-recursive = True
-tst_flags = notest
-put_version_in_init = True
-
-### Docs ###
-branch = main
-custom_sidebar = False
-doc_host = https://%(user)s.github.io
-doc_baseurl = /%(repo)s
-git_url = https://github.com/%(user)s/%(repo)s
-title = %(lib_name)s
-
-### PyPI ###
-audience = Developers
-author = ncoop57
-author_email = nacooper01@email.wm.edu
-copyright = 2023 onwards, %(author)s
-description = Find the treasure in your trove of data
-keywords = nbdev jupyter notebook python
-language = English
-status = 3
-user = CarperAI
-
-### Optional ###
-requirements = accelerate datasets evaluate fastcore langchain openai squeakily transformers
-dev_requirements = black[jupyter] ipykernel
-# console_scripts =
\ No newline at end of file
diff --git a/train_labeler.py b/train_labeler.py
new file mode 100644
index 0000000..860c003
--- /dev/null
+++ b/train_labeler.py
@@ -0,0 +1,156 @@
+from dataclasses import dataclass
+from datasets import load_dataset
+from transformers import pipeline, TrainingArguments
+import evaluate
+import numpy as np
+import wandb
+from dotenv import load_dotenv
+from huggingface_hub import login
+import os
+
+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    DataCollatorWithPadding,
+    Trainer,
+)
+
+load_dotenv(".env")
+
+login(token=os.environ["HF_KEY"], add_to_git_credential=True)
+
+@dataclass
+class EncoderParams:
+    batch_size = 32
+    num_workers = 16
+    push_to_hub = True
+    n_labels = 3
+    text_column = "content"
+    labels = ["high quality", "medium quality", "low quality"]
+    base_model_name = "bigcode/starencoder"
+    id2label = {0: "HIGH_QUALITY", 1: "MEDIUM_QUALITY", 2: "LOW_QUALITY"}
+    label2id = {"HIGH_QUALITY": 0, "MEDIUM_QUALITY": 1, "LOW_QUALITY": 2}
+    MASK_TOKEN = "<mask>"
+    SEPARATOR_TOKEN = "<sep>"
+    PAD_TOKEN = "<pad>"
+    CLS_TOKEN = "<cls>"
+    max_input_length = 1024
+    max_token_length = 1024
+
+
+def train():
+
+    dataset = load_dataset("roborovski/phi-2-labeled")["train"]
+
+    tokenizer = AutoTokenizer.from_pretrained(
+        EncoderParams.base_model_name, max_length=EncoderParams.max_token_length
+    )
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+
+    model = AutoModelForSequenceClassification.from_pretrained(
+        EncoderParams.base_model_name,
+        num_labels=EncoderParams.n_labels,
+        max_length=EncoderParams.max_token_length,
+        id2label=EncoderParams.id2label,
+        label2id=EncoderParams.label2id,
+    )
+
+    sample_table_data = []
+
+    def compute_metrics(eval_pred):
+        logits, labels = eval_pred
+        if isinstance(logits, tuple):
+            logits = logits[0]
+        predictions = np.argmax(logits, axis=-1)
+        acc = acc_metric.compute(predictions=predictions, references=labels)
+        precision = precision_metric.compute(
+            predictions=predictions,
+            references=labels,
+            average="macro" if len(labels) > 2 else "binary",
+        )
+        recall = recall_metric.compute(
+            predictions=predictions,
+            references=labels,
+            average="macro" if len(labels) > 2 else "binary",
+        )
+        f1 = f1_metric.compute(
+            predictions=predictions,
+            references=labels,
+            average="macro" if len(labels) > 2 else "binary",
+        )
+
+        decoded_sample = tokenizer.decode(predictions)
+        sample_table_data.append([decoded_sample, labels[0]])
+        sample_table = wandb.Table(
+            columns=["sample", "label"],
+            data=sample_table_data,
+        )
+        wandb.log({"sample": sample_table})
+
+        return {**acc, **precision, **recall, **f1}
+
+    dataset = dataset.map(
+        lambda x: tokenizer(
+            x[EncoderParams.text_column],
+            padding="max_length",
+            truncation=True,
+            max_length=EncoderParams.max_input_length,
+        ),
+        batched=True,
+        num_proc=EncoderParams.num_workers,
+    )
+
+    dataset = dataset.train_test_split(test_size=0.05, seed=42)
+
+    train_dataset = dataset["train"].shuffle(seed=42)
+    eval_dataset = dataset["test"].shuffle(seed=42).select(range(200))
+
+    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+
+    acc_metric = evaluate.load("accuracy")
+    precision_metric = evaluate.load("precision")
+    recall_metric = evaluate.load("recall")
+    f1_metric = evaluate.load("f1")
+
+    wandb.login()
+
+    wandb.init(project="phi-2-classifier")
+
+    training_args = TrainingArguments(
+        output_dir="checkpoints",
+        num_train_epochs=100,
+        per_device_train_batch_size=EncoderParams.batch_size,
+        per_device_eval_batch_size=2,
+        warmup_steps=500,
+        weight_decay=0.01,
+        logging_dir="logs",
+        logging_steps=50,
+        eval_steps=5000,
+        evaluation_strategy="steps",
+        save_strategy="epoch",
+        save_steps=5,
+        seed=42,
+        push_to_hub=True,
+        hub_model_id="roborovski/phi-2-classifier",
+        hub_private_repo=True,
+        eval_accumulation_steps=1,
+    )
+
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        data_collator=data_collator,
+        compute_metrics=compute_metrics,
+    )
+
+    trainer.train()
+
+    if EncoderParams.push_to_hub:
+        trainer.push_to_hub()
+
+
+if __name__ == "__main__":
+    train()
diff --git a/treasure_trove/__init__.py b/treasure_trove/__init__.py
index f102a9c..e69de29 100644
--- a/treasure_trove/__init__.py
+++ b/treasure_trove/__init__.py
@@ -1 +0,0 @@
-__version__ = "0.0.1"
diff --git a/treasure_trove/_modidx.py b/treasure_trove/_modidx.py
deleted file mode 100644
index 79d02e9..0000000
--- a/treasure_trove/_modidx.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# Autogenerated by nbdev
-
-d = { 'settings': { 'branch': 'main',
-                'doc_baseurl': '/treasure_trove',
-                'doc_host': 'https://CarperAI.github.io',
-                'git_url': 'https://github.com/CarperAI/treasure_trove',
-                'lib_path': 'treasure_trove'},
-  'syms': { 'treasure_trove.core': { 'treasure_trove.core.classify': ('core.html#classify', 'treasure_trove/core.py'),
-                                     'treasure_trove.core.filter_dataset': ('core.html#filter_dataset', 'treasure_trove/core.py'),
-                                     'treasure_trove.core.label_dataset': ('core.html#label_dataset', 'treasure_trove/core.py'),
-                                     'treasure_trove.core.train_labeler': ('core.html#train_labeler', 'treasure_trove/core.py')}}}
diff --git a/treasure_trove/core.py b/treasure_trove/core.py
index 0fc06ac..372a8d9 100644
--- a/treasure_trove/core.py
+++ b/treasure_trove/core.py
@@ -1,13 +1,5 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_core.ipynb.
-
-# %% auto 0
-__all__ = ['classify', 'label_dataset', 'train_labeler', 'filter_dataset']
-
-# %% ../nbs/00_core.ipynb 2
-import evaluate
-import time
-
-import numpy as np
+import re
+import os
 
 from transformers import (
     AutoModelForSequenceClassification,
@@ -15,133 +7,113 @@
     DataCollatorWithPadding,
     Trainer,
 )
+import time
+import openai
+
+openai.api_key = os.getenv("OPENAI_KEY")
+
+
+from typing import List
+
+instruction = f"""Determine the following code's quality value for a software engineer whose goal is to improve their programming ability.
+High quality code has the following:
+* Readability: The code is written in a way that is easy to understand and follow, with consistent detailed comments, formatting, meaningful variable names, and appropriate code structure.
+* Modularity: The code is organized into reusable and independent modules or functions, making it easier to comprehend and reuse in other projects.
+* Detailed explanations: The code is accompanied by thorough explanations of the concepts and techniques used, providing learners with a deeper understanding of the underlying principles.
+* Good design principles: The code follows best practices for software design, such as encapsulation, separation of concerns, and adhering to design patterns, making it easier to understand and maintain.
+Medium quality code has the following:
+* Readability: The code is reasonably well-structured and readable, but there may be occasional inconsistencies, some comments, or less descriptive variable names.
+* Partial modularity: The code contains some reusable components, but not all parts of the code are organized into separate modules or functions.
+* Some explanations: The code may have limited explanations or comments that provide a general understanding of the code's logic and purpose.
+* Adequate design principles: The code follows basic design principles, such as separation of concerns, but may not fully adhere to advanced design patterns or best practices.
+Low quality code has the following:
+* Poor readability: The code is poorly structured and difficult to follow, with little to no comments, inconsistent formatting and unclear variable names.
+* No modularity: The code is written in a monolithic style, lacking any organization into reusable or independent modules or functions.
+* Limited explanations: The code provides minimal or no explanations, leaving learners with little guidance on its logic or purpose.
+* Neglects design principles: The code shows a lack of consideration for design principles, making it harder to comprehend, maintain, and extend.
+
+Output nothing other than one of the following labels:
+{0}
+"""
+
+instruction_simple = f"""Determine the following code's quality value for a software engineer whose goal is to improve their programming ability.
+High quality code has the following:
+* Readability: The code is written in a way that is easy to understand and follow.
+* Modularity: The code is organized into reusable and independent modules or functions.
+* Detailed explanations: The code is accompanied by explanations of the concepts used.
+* Good design principles: The code follows best practices for software design.
+Medium quality code has the following:
+* Readability: The code is reasonably well-structured and readable.
+* Partial modularity: The code contains some reusable components.
+* Some explanations: The code may have limited explanations or comments.
+* Adequate design principles: The code follows basic design principles.
+Low quality code has the following:
+* Poor readability: The code is poorly structured and difficult to follow.
+* No modularity: The code is written in a monolithic style.
+* Limited explanations: The code provides minimal or no explanations.
+* Neglects design principles: The code shows a lack of consideration for design principles.
+
+Output nothing other than one of the following labels:
+High quality
+Medium quality
+Low quality
+"""
+
+
+
+
+class ChatGPTLabeler:
+    def __init__(
+        self,
+        instruction: str,
+        labels: List[str],
+        secondary_labels: List[str],
+    ):
+        self.instruction = instruction
+        self.labels = labels
+        self.secondary_labels = secondary_labels
+
+    def find_label(self, text: str, labels: List[str]):
+        for i, label in enumerate(labels):
+            pattern = re.compile(re.escape(label), re.IGNORECASE | re.MULTILINE)
+            match = re.search(pattern, text)
+            if bool(match):
+                return i
+        return None
+
+    def cost_info(self, oai_response):
+        prompt_tokens = oai_response["usage"]["prompt_tokens"]
+        completion_tokens = oai_response["usage"]["completion_tokens"]
+        total_cost = 0.0015 * prompt_tokens + 0.0002 * completion_tokens
+
+        return dict(
+            total_cost=total_cost,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+
+    def __call__(self, text: str):
+        formatted_instruction = instruction.format(self.labels)
+        completion = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            temperature=0,
+            max_tokens=4,
+            messages=[
+                {"role": "system", "content": formatted_instruction},
+                {"role": "user", "content": text},
+            ],
+        )
+        if "error" in completion:
+            return 0, None
+        output_text = completion["choices"][0]["message"]["content"]
+        label_idx = self.find_label(output_text, self.labels)
+        if not label_idx:
+            label_idx = self.find_label(output_text, self.secondary_labels)
+        cost_info = self.cost_info(completion)
+        if not label_idx:
+            raise Exception(f"Label not found in text: {output_text}")
+        return label_idx, cost_info
 
-# %% ../nbs/00_core.ipynb 4
-def classify(x, labels, llm_labeler, max_failures=5, default_label=0):
-    failures = 0
-    while failures < max_failures:
-        try:
-            label = labels.index(llm_labeler(x)[0])
-            time.sleep(1)
-            return label
-        except Exception as e:
-            failures += 1
-            print(e)
-            time.sleep(1)
-            pass
-    if failures == max_failures:
-        return default_label
-
-# %% ../nbs/00_core.ipynb 5
-def label_dataset(
-    dataset, text_column, labeler_model, labels, sample=0.1, num_workers=4, max_chars=4_096
-):
-    """
-    Filters a dataset using a labeler model.
-
-    Args:
-        dataset (datasets.Dataset): Dataset to filter
-        text_column (str): Name of the column containing the text to classify
-        labeler_model (Any): Model to use for labeling
-        labels (List[str]): List of labels
-        sample (float): The fraction of the dataset to label and use for filtering
-        batch_size (int): Batch size for labeling
-        num_workers (int): Number of workers for labeling
-        max_chars (int): Maximum number of characters to truncate the text to before labeling (reduces rate limiting errors)
-    """
-
-    # Get a subset of the dataset
-    subset = dataset.shuffle(seed=115).select(range(int(len(dataset) * sample)))
-
-    # Label the subset
-    subset = subset.map(
-        lambda x: {"label": classify(x[text_column][:max_chars], labels, labeler_model)},
-        batched=False,
-        num_proc=num_workers,
-    )
-
-    return subset
-
-# %% ../nbs/00_core.ipynb 7
-def train_labeler(
-    dataset,
-    text_column,
-    base_model_name,
-    n_labels,
-    training_args,
-    num_workers=4,
-    max_length=512,
-    push_to_hub=True,
-):
-    """
-    Trains a labeler model on a labeled dataset.
-
-    Args:
-        dataset (datasets.Dataset): Dataset to train on
-        text_column (str): Name of the text column
-        base_model_name (str): Name of the base model to use
-        n_labels (int): Number of labels
-        epochs (int): Number of epochs to train
-        batch_size (int): Batch size for training
-        num_workers (int): Number of workers for training
-        max_length (int): Maximum length of the input
-    """
-    # Load the tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(base_model_name, max_length=max_length)
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-
-    # Load the model
-    model = AutoModelForSequenceClassification.from_pretrained(
-        base_model_name, num_labels=n_labels, max_length=max_length
-    )
-    model.config.id2label = {i: i for i in range(n_labels)}
-
-    # Preprocess the dataset
-    dataset = dataset.map(
-        lambda x: tokenizer(
-            x[text_column], padding="max_length", truncation=True, max_length=max_length
-        ),
-        batched=True,
-        num_proc=num_workers,
-    )
-
-    # Split the dataset
-    dataset = dataset.train_test_split(test_size=0.1, seed=42)
-
-    # Get the data collator
-    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
-
-    def compute_metrics(eval_preds):
-        metric = evaluate.load("glue", "mrpc")
-        logits, labels = eval_preds
-        if isinstance(logits, tuple): # Some models return tuples
-            logits = logits[0]
-        print(logits.shape, labels)
-        predictions = np.argmax(logits, axis=-1)
-        return metric.compute(predictions=predictions, references=labels)
-
-    # Get the trainer
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=dataset["train"],
-        eval_dataset=dataset["test"],
-        data_collator=data_collator,
-        compute_metrics=compute_metrics,
-    )
-
-    # Train the model
-    trainer.train()
-
-    # Push the model to the hub
-    if push_to_hub:
-        trainer.push_to_hub()
-
-    # Return the model
-    return model, tokenizer
-
-# %% ../nbs/00_core.ipynb 9
 def filter_dataset(
     dataset, text_column, labeler_model, labels_to_keep, batch_size=32, num_workers=4
 ):
diff --git a/view_dataset.py b/view_dataset.py
new file mode 100644
index 0000000..864889f
--- /dev/null
+++ b/view_dataset.py
@@ -0,0 +1,10 @@
+import os
+from pathlib import Path
+from collections import Counter
+
+from datasets import load_dataset
+
+ds = load_dataset("roborovski/phi-1")["train"]
+print(ds)
+print(Counter(ds['label']))
+print(Counter(ds['language']))