diff --git a/crates/llm-local/src/token_output_stream.rs b/crates/llm-local/src/token_output_stream.rs
index 6f082cdd5..1313942a5 100644
--- a/crates/llm-local/src/token_output_stream.rs
+++ b/crates/llm-local/src/token_output_stream.rs
@@ -1,9 +1,8 @@
-/// This is a wrapper around a tokenizer to ensure that tokens can be returned to the user in a
-/// streaming way rather than having to wait for the full decoding.
-/// Implementation for TokenOutputStream Code is borrowed from
-///
-/// Borrowed from https://github.com/huggingface/candle/blob/main/candle-examples/src/token_output_stream.rs
-/// (Commit SHA 4fd00b890036ef67391a9cc03f896247d0a75711)
+//! This is a wrapper around a tokenizer to ensure that tokens can be returned to the user in a
+//! streaming way rather than having to wait for the full decoding.
+//! Implementation for TokenOutputStream Code is borrowed from
+//!
+//! Borrowed from https://github.com/huggingface/candle/blob/4fd00b890036ef67391a9cc03f896247d0a75711/candle-examples/src/token_output_stream.rs
 pub struct TokenOutputStream {
     tokenizer: tokenizers::Tokenizer,
     tokens: Vec<u32>,
@@ -24,7 +23,6 @@ impl TokenOutputStream {
     /// Processes the next token in the sequence, decodes the current token stream,
     /// and returns any newly decoded text.
     ///
-    /// Based on the following code: <https://github.com/huggingface/text-generation-inference/blob/5ba53d44a18983a4de32d122f4cb46f4a17d9ef6/server/text_generation_server/models/model.py#L68>
     pub fn next_token(&mut self, token: u32) -> anyhow::Result<Option<String>> {
         let prev_text = if self.tokens.is_empty() {
             String::new()