minor updates

sammcj · Aug 1, 2024 · fa765dc · fa765dc
1 parent e17d11c
commit fa765dc
Show file tree

Hide file tree

Showing 4 changed files with 139 additions and 71 deletions.
diff --git a/README.md b/README.md
@@ -81,29 +81,52 @@ ingest -o output.md /path/to/project
 
 ### VRAM Estimation and Model Compatibility
 
-Ingest includes a feature to estimate VRAM requirements and check model compatibility using the [Gollama](https://github.com/sammcj/gollama)'s vramestimator package. This helps you determine if your generated content will fit within the specified model, VRAM, and quantisation constraints.
+Ingest includes a feature to estimate VRAM requirements and check model compatibility using the [Gollama](https://github.com/sammcj/gollama)'s vramestimator package. This helps you determine if your generated content will fit within the specified model, VRAM, and quantization constraints.
 
 To use this feature, add the following flags to your ingest command:
 
 ```shell
-ingest --estimate-vram --model <model_id> --vram <vram_size> --quant <quantisation_level> [other flags] <paths>
+ingest --vram --model <model_id> [--memory <memory_in_gb>] [--quant <quantization>] [--context <context_length>] [--kvcache <kv_cache_quant>] [--quanttype <quant_type>] [other flags] <paths>
 ```
 
-For example:
+Examples:
+
+Estimate VRAM usage for a specific context:
 
 ```shell
-ingest --estimate-vram --model NousResearch/Hermes-2-Theta-Llama-3-8B --vram 8 --quant q4_0 /path/to/project
+ingest --vram --model NousResearch/Hermes-2-Theta-Llama-3-8B --quant q4_k_m --context 2048 --kvcache q4_0
+# Estimated VRAM usage: 5.35 GB
 ```
 
-This will generate the content as usual and then check if it's compatible with the specified model, VRAM size (8 GB), and quantisation level (q4_0):
+Calculate maximum context for a given memory constraint:
 
 ```shell
-ingest --estimate-vram --model NousResearch/Hermes-2-Theta-Llama-3-8B --vram 8 --quant q4_0 .
-⠋ Traversing directory and building tree...  [0s]
+ingest --vram --model NousResearch/Hermes-2-Theta-Llama-3-8B --quant q4_k_m --memory 6 --kvcache q8_0
+# Maximum context for 6.00 GB of memory: 5069
+```
 
-[✓] The generated content fits within the specified model/VRAM/quantisation constraints.
-[i] 14,702 Tokens (Approximate)
+Find the best BPW (Bits Per Weight):
+
+```shell
+ingest --vram --model NousResearch/Hermes-2-Theta-Llama-3-8B --memory 6 --quanttype gguf
+# Best BPW for 6.00 GB of memory: IQ3_S
+```
+
+The tool also works for exl2 (ExllamaV2) models:
+
+```shell
+ingest --vram --model NousResearch/Hermes-2-Theta-Llama-3-8B --quant 5.0 --context 2048 --kvcache q4_0 # For exl2 models
+ingest --vram --model NousResearch/Hermes-2-Theta-Llama-3-8B --quant 5.0 --memory 6 --kvcache q8_0 # For exl2 models
+```
+
+When using the VRAM estimation feature along with content generation, ingest will provide information about the generated content's compatibility with the specified constraints:
 
+```shell
+ingest --vram --model NousResearch/Hermes-2-Theta-Llama-3-8B --memory 8 --quant q4_0 .
+⠋ Traversing directory and building tree... [0s]
+[i] 14,702 Tokens (Approximate)
+Maximum context for 8.00 GB of memory: 10240
+Generated content (14,702 tokens) fits within maximum context.
 Top 5 largest files (by estimated token count):
 1. /Users/samm/git/sammcj/ingest/main.go (4,682 tokens)
 2. /Users/samm/git/sammcj/ingest/filesystem/filesystem.go (2,694 tokens)
@@ -115,12 +138,15 @@ Top 5 largest files (by estimated token count):
 
 Available flags for VRAM estimation:
 
-- `--estimate-vram`: Enable VRAM estimation and model compatibility check
+- `--vram`: Enable VRAM estimation and model compatibility check
 - `--model`: Specify the model ID to check against (required for estimation)
-- `--vram`: Specify the VRAM size to check against, in GB (optional)
-- `--quant`: Specify the quantisation level to check against (e.g., fp16, q8_0, q4_0)
+- `--memory`: Specify the available memory in GB for context calculation (optional)
+- `--quant`: Specify the quantization type (e.g., q4_k_m) or bits per weight (e.g., 5.0)
+- `--context`: Specify the context length for VRAM estimation (optional)
+- `--kvcache`: Specify the KV cache quantization (fp16, q8_0, or q4_0)
+- `--quanttype`: Specify the quantization type (gguf or exl2)
 
-If the generated content fits within the specified constraints, you'll see a success message. Otherwise, you'll receive a warning that the content may not fit.
+Ingest will provide appropriate output based on the combination of flags used, such as estimating VRAM usage, calculating maximum context, or finding the best BPW. If the generated content fits within the specified constraints, you'll see a success message. Otherwise, you'll receive a warning that the content may not fit.
 
 ## LLM Integration
 
@@ -152,13 +178,6 @@ Ingest uses a configuration file located at `~/.config/ingest/config.json`.
 
 You can make Ollama processing run without prompting setting `"llm_auto_run": true` in the config file.
 
-The config file also contains
-## Configuration
-
-Ingest uses a configuration file located at `~/.config/ingest/config.json`.
-
-You can make Ollama processing run without prompting setting `"llm_auto_run": true` in the config file.
-
 The config file also contains:
 
 - `llm_model`: The model to use for processing the prompt, e.g. "llama3.1:8b-q5_k_m".

diff --git a/go.mod b/go.mod
@@ -12,32 +12,32 @@ require (
 	github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
 	github.com/sammcj/gollama v1.21.1
 	github.com/sashabaranov/go-openai v1.27.1
-	github.com/schollz/progressbar/v3 v3.14.4
+	github.com/schollz/progressbar/v3 v3.14.5
 	github.com/spf13/cobra v1.8.1
 )
 
 require (
-	github.com/alecthomas/chroma/v2 v2.8.0 // indirect
+	github.com/alecthomas/chroma/v2 v2.14.0 // indirect
 	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
 	github.com/aymerick/douceur v0.2.0 // indirect
-	github.com/dlclark/regexp2 v1.10.0 // indirect
-	github.com/google/uuid v1.3.0 // indirect
-	github.com/gorilla/css v1.0.0 // indirect
+	github.com/dlclark/regexp2 v1.11.2 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/gorilla/css v1.0.1 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mattn/go-runewidth v0.0.16 // indirect
-	github.com/microcosm-cc/bluemonday v1.0.25 // indirect
+	github.com/microcosm-cc/bluemonday v1.0.27 // indirect
 	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
 	github.com/muesli/reflow v0.3.0 // indirect
 	github.com/muesli/termenv v0.15.2 // indirect
 	github.com/olekukonko/tablewriter v0.0.5 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
-	github.com/yuin/goldmark v1.5.4 // indirect
-	github.com/yuin/goldmark-emoji v1.0.2 // indirect
-	golang.org/x/net v0.17.0 // indirect
+	github.com/yuin/goldmark v1.7.4 // indirect
+	github.com/yuin/goldmark-emoji v1.0.3 // indirect
+	golang.org/x/net v0.27.0 // indirect
 	golang.org/x/sys v0.22.0 // indirect
 	golang.org/x/term v0.22.0 // indirect
 )
diff --git a/go.sum b/go.sum
@@ -1,9 +1,13 @@
 github.com/alecthomas/assert/v2 v2.2.1 h1:XivOgYcduV98QCahG8T5XTezV5bylXe+lBxLG2K2ink=
 github.com/alecthomas/assert/v2 v2.2.1/go.mod h1:pXcQ2Asjp247dahGEmsZ6ru0UVwnkhktn7S0bBDLxvQ=
+github.com/alecthomas/assert/v2 v2.7.0 h1:QtqSACNS3tF7oasA8CU6A6sXZSBDqnm7RfpLl9bZqbE=
 github.com/alecthomas/chroma/v2 v2.8.0 h1:w9WJUjFFmHHB2e8mRpL9jjy3alYDlU0QLDezj1xE264=
 github.com/alecthomas/chroma/v2 v2.8.0/go.mod h1:yrkMI9807G1ROx13fhe1v6PN2DDeaR73L3d+1nmYQtw=
+github.com/alecthomas/chroma/v2 v2.14.0 h1:R3+wzpnUArGcQz7fCETQBzO5n9IMNi13iIs46aU4V9E=
+github.com/alecthomas/chroma/v2 v2.14.0/go.mod h1:QolEbTfmUHIMVpBqxeDnNBj2uoeI4EbYP4i6n68SG4I=
 github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk=
 github.com/alecthomas/repr v0.2.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
+github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
 github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
 github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
 github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
@@ -20,12 +24,18 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
 github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/dlclark/regexp2 v1.11.2 h1:/u628IuisSTwri5/UKloiIsH8+qF2Pu7xEQX+yIKg68=
+github.com/dlclark/regexp2 v1.11.2/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4=
 github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
 github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
+github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
+github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -44,6 +54,8 @@ github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6T
 github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/microcosm-cc/bluemonday v1.0.25 h1:4NEwSfiJ+Wva0VxN5B8OwMicaJvD8r9tlJWm9rtloEg=
 github.com/microcosm-cc/bluemonday v1.0.25/go.mod h1:ZIOjCQp1OrzBBPIJmfX4qDYFuhU02nx4bn030ixfHLE=
+github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
+github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
 github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
 github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
@@ -71,6 +83,8 @@ github.com/sashabaranov/go-openai v1.27.1 h1:7Nx6db5NXbcoutNmAUQulEQZEpHG/Skzfex
 github.com/sashabaranov/go-openai v1.27.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/schollz/progressbar/v3 v3.14.4 h1:W9ZrDSJk7eqmQhd3uxFNNcTr0QL+xuGNI9dEMrw0r74=
 github.com/schollz/progressbar/v3 v3.14.4/go.mod h1:aT3UQ7yGm+2ZjeXPqsjTenwL3ddUiuZ0kfQ/2tHlyNI=
+github.com/schollz/progressbar/v3 v3.14.5 h1:97RrSxbBASxQuZN9yemnyGrFZ/swnG6IrEe2R0BseX8=
+github.com/schollz/progressbar/v3 v3.14.5/go.mod h1:Nrzpuw3Nl0srLY0VlTvC4V6RL50pcEymjy6qyJAaLa0=
 github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
 github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
@@ -83,10 +97,17 @@ github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
 github.com/yuin/goldmark v1.3.7/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 github.com/yuin/goldmark v1.5.4 h1:2uY/xC0roWy8IBEGLgB1ywIoEJFGmRrX21YQcvGZzjU=
 github.com/yuin/goldmark v1.5.4/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+github.com/yuin/goldmark v1.7.1/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E=
+github.com/yuin/goldmark v1.7.4 h1:BDXOHExt+A7gwPCJgPIIq7ENvceR7we7rOS9TNoLZeg=
+github.com/yuin/goldmark v1.7.4/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E=
 github.com/yuin/goldmark-emoji v1.0.2 h1:c/RgTShNgHTtc6xdz2KKI74jJr6rWi7FPgnP9GAsO5s=
 github.com/yuin/goldmark-emoji v1.0.2/go.mod h1:RhP/RWpexdp+KHs7ghKnifRoIs/Bq4nDS7tRbCkOwKY=
+github.com/yuin/goldmark-emoji v1.0.3 h1:aLRkLHOuBR2czCY4R8olwMjID+tENfhyFDMCRhbIQY4=
+github.com/yuin/goldmark-emoji v1.0.3/go.mod h1:tTkZEbwu5wkPmgTcitqddVxY9osFZiavD+r4AzQrh1U=
 golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
 golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys=
+golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=

diff --git a/main.go b/main.go
@@ -46,10 +46,13 @@ var (
 	promptPrefix         string
 	promptSuffix         string
 	report               bool
-	useVRAMEstimator bool
-	modelID          string
-	vramSize         float64
-	quantLevel       string
+	vramFlag      bool
+	modelIDFlag   string
+	quantFlag     string
+	contextFlag   int
+	kvCacheFlag   string
+	memoryFlag    float64
+	quantTypeFlag string
 	verbose              bool
 	Version              string // This will be set by the linker at build time
 )
@@ -87,11 +90,14 @@ func main() {
 	rootCmd.Flags().StringArrayP("prompt", "p", nil, "Prompt suffix to append to the generated content")
 	rootCmd.Flags().StringVarP(&templatePath, "template", "t", "", "Optional Path to a custom Handlebars template")
 
-	// vRAM estimation via Gollama package
-	rootCmd.Flags().BoolVar(&useVRAMEstimator, "estimate-vram", false, "Use vramestimator to check token size compatibility")
-	rootCmd.Flags().StringVar(&modelID, "model", "", "Model ID to check against with vramestimator")
-	rootCmd.Flags().Float64Var(&vramSize, "vram", 0, "VRAM size to check against with vramestimator (in GB)")
-	rootCmd.Flags().StringVar(&quantLevel, "quant", "", "Quantisation level to check against with vramestimator")
+	// VRAM estimation flags
+	rootCmd.Flags().BoolVar(&vramFlag, "vram", false, "Estimate vRAM usage")
+	rootCmd.Flags().StringVar(&modelIDFlag, "model", "", "vRAM Estimation - Model ID")
+	rootCmd.Flags().StringVar(&quantFlag, "quant", "", "vRAM Estimation - Quantization type (e.g., q4_k_m) or bits per weight (e.g., 5.0)")
+	rootCmd.Flags().IntVar(&contextFlag, "context", 0, "vRAM Estimation - Context length for vRAM estimation")
+	rootCmd.Flags().StringVar(&kvCacheFlag, "kvcache", "fp16", "vRAM Estimation - KV cache quantization: fp16, q8_0, or q4_0")
+	rootCmd.Flags().Float64Var(&memoryFlag, "memory", 0, "vRAM Estimation - Available memory in GB for context calculation")
+	rootCmd.Flags().StringVar(&quantTypeFlag, "quanttype", "gguf", "vRAM Estimation - Quantization type: gguf or exl2")
 
 
 	rootCmd.ParseFlags(os.Args[1:])
@@ -246,15 +252,10 @@ func run(cmd *cobra.Command, args []string) error {
 		return fmt.Errorf("failed to render template: %w", err)
 	}
 
-	// Check with vramestimator if requested
-	if useVRAMEstimator {
-		compatible, err := checkWithVRAMEstimator(rendered)
-		if err != nil {
-			utils.PrintColouredMessage("!", fmt.Sprintf("Failed to check with vramestimator: %v", err), color.FgRed)
-		} else if !compatible {
-			utils.PrintColouredMessage("!", "The generated content may not fit within the specified model/VRAM/quantisation constraints.", color.FgYellow)
-		} else {
-			utils.PrintColouredMessage("✓", "The generated content fits within the specified model/VRAM/quantisation constraints.", color.FgGreen)
+	// VRAM estimation
+	if vramFlag {
+		if err := performVRAMEstimation(rendered); err != nil {
+			fmt.Printf("Error in VRAM estimation: %v\n", err)
 		}
 	}
 	useLLM, _ := cmd.Flags().GetBool("llm")
@@ -514,44 +515,71 @@ func handleLLMOutput(rendered string, llmConfig config.LLMConfig, countTokens bo
 }
 
 
-func checkWithVRAMEstimator(content string) (bool, error) {
-	if modelID == "" {
-		return false, fmt.Errorf("model ID is required for VRAM estimation")
-	}
 
-	tokenCount := token.CountTokens(content, encoding)
+func performVRAMEstimation(content string) error {
+	if modelIDFlag == "" {
+		return fmt.Errorf("model ID is required for VRAM estimation")
+	}
 
 	var kvCacheQuant vramestimator.KVCacheQuantisation
-	switch quantLevel {
+	switch kvCacheFlag {
 	case "fp16":
 		kvCacheQuant = vramestimator.KVCacheFP16
 	case "q8_0":
 		kvCacheQuant = vramestimator.KVCacheQ8_0
 	case "q4_0":
 		kvCacheQuant = vramestimator.KVCacheQ4_0
 	default:
-		return false, fmt.Errorf("invalid quantisation level: %s", quantLevel)
-	}
-
-	bpw, err := vramestimator.ParseBPWOrQuant(quantLevel)
-	if err != nil {
-		return false, fmt.Errorf("failed to parse quantisation level: %w", err)
+		fmt.Printf("Invalid KV cache quantization: %s. Using default fp16.\n", kvCacheFlag)
+		kvCacheQuant = vramestimator.KVCacheFP16
 	}
 
-	vramRequired, err := vramestimator.CalculateVRAM(modelID, bpw, tokenCount, kvCacheQuant, "")
-	if err != nil {
-		return false, fmt.Errorf("failed to calculate VRAM: %w", err)
-	}
+	tokenCount := token.CountTokens(content, encoding)
 
-	if vramSize > 0 {
-		return vramRequired <= vramSize, nil
-	}
+	if memoryFlag > 0 && contextFlag == 0 && quantFlag == "" {
+		// Calculate best BPW
+		bestBPW, err := vramestimator.CalculateBPW(modelIDFlag, memoryFlag, 0, kvCacheQuant, quantTypeFlag, "")
+		if err != nil {
+			return fmt.Errorf("error calculating BPW: %w", err)
+		}
+		fmt.Printf("Best BPW for %.2f GB of memory: %v\n", memoryFlag, bestBPW)
+	} else {
+		// Parse the quant flag for other operations
+		bpw, err := vramestimator.ParseBPWOrQuant(quantFlag)
+		if err != nil {
+			return fmt.Errorf("error parsing quantization: %w", err)
+		}
 
-	// If VRAM size is not specified, we'll calculate the maximum context
-	maxContext, err := vramestimator.CalculateContext(modelID, vramSize, bpw, kvCacheQuant, "")
-	if err != nil {
-		return false, fmt.Errorf("failed to calculate maximum context: %w", err)
+		if memoryFlag > 0 && contextFlag == 0 {
+			// Calculate maximum context
+			maxContext, err := vramestimator.CalculateContext(modelIDFlag, memoryFlag, bpw, kvCacheQuant, "")
+			if err != nil {
+				return fmt.Errorf("error calculating context: %w", err)
+			}
+			fmt.Printf("Maximum context for %.2f GB of memory: %d\n", memoryFlag, maxContext)
+			if tokenCount > maxContext {
+				fmt.Printf("Warning: Generated content (%d tokens) exceeds maximum context.\n", tokenCount)
+			} else {
+				fmt.Printf("Generated content (%d tokens) fits within maximum context.\n", tokenCount)
+			}
+		} else if contextFlag > 0 {
+			// Calculate VRAM usage
+			vram, err := vramestimator.CalculateVRAM(modelIDFlag, bpw, contextFlag, kvCacheQuant, "")
+			if err != nil {
+				return fmt.Errorf("error calculating VRAM: %w", err)
+			}
+			fmt.Printf("Estimated VRAM usage: %.2f GB\n", vram)
+			if memoryFlag > 0 {
+				if vram > memoryFlag {
+					fmt.Printf("Warning: Estimated VRAM usage (%.2f GB) exceeds available memory (%.2f GB).\n", vram, memoryFlag)
+				} else {
+					fmt.Printf("Estimated VRAM usage (%.2f GB) fits within available memory (%.2f GB).\n", vram, memoryFlag)
+				}
+			}
+		} else {
+			return fmt.Errorf("invalid combination of flags. Please specify either --memory, --context, or both")
+		}
 	}
 
-	return tokenCount <= maxContext, nil
+	return nil
 }