aws-neuron · arm-diaz · Jun 4, 2025 · Jun 4, 2025 · Jun 5, 2025 · Jun 5, 2025
diff --git a/.env.example b/.env.example
@@ -1,13 +1,24 @@
-# Model configuration
-## HuggingFace Model ID (https://huggingface.co/meta-llama/Meta-Llama-3-8B)
+# Example environment file for NKI-LLAMA
+# Copy this to .env and update with your values
+
+# Hugging Face Configuration
+HF_TOKEN=your_huggingface_token_here
 MODEL_ID=meta-llama/Meta-Llama-3-8B
-## Short name for model ID
-MODEL_NAME=meta-llama-3-8b
+MODEL_NAME=llama-3-8b
+
+# Inference Configuration
+INFERENCE_PORT=8080
+MAX_MODEL_LEN=8192 # used by vllm- ensure it is the same as seq len
+SEQ_LEN=8192 #used by main.py
+
+MAX_NUM_SEQS=4
+TENSOR_PARALLEL_SIZE=8
+
+# Dataset Configuration
+DATASET_NAME=databricks/databricks-dolly-15k
 
-# Server configurations
-PORT=8080
-MAX_MODEL_LEN=2048
-TENSOR_PARALLEL_SIZE=32
+# Neuron Configuration
+NEURON_RT_NUM_CORES=8
 
-# HuggingFace token for downloading models
-HF_TOKEN=your_token_here
+# Jupyter Configuration
+JUPYTER_PORT=8888
diff --git a/.gitignore b/.gitignore
@@ -267,6 +267,16 @@ test/inference/output
 **/neuronxcc-*
 global_metric_store.json
 benchmark_report.json
+benchmark_inference.json
 cached_requirements.txt
+benchmark_finetuning.json
+benchmark_results.json
+**/logs/
+compiled_merged_model/
+compiled_model/
+merged_model/
+src/self-attention/config
+requirements.txt.**
+model_env.sh
 
 # End of https://www.toptal.com/developers/gitignore/api/macos,windows,linux,jupyternotebooks,python
diff --git a/Makefile b/Makefile