From 1fc2161d648bdb0de89180502609c282c20b3c41 Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Mon, 27 Nov 2023 17:36:58 -0800
Subject: [PATCH 01/10] Add more meat to `tts_coqui_vc()`

---
 R/tts.R | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/R/tts.R b/R/tts.R
index 54e9e0c..6a49e52 100644
--- a/R/tts.R
+++ b/R/tts.R
@@ -443,3 +443,74 @@ tts_coqui <- function(
   }
   res
 }
+
+
+#' @export
+#' @rdname tts
+tts_coqui_vc <- function(
+    text,
+    speaker_wav,
+    language = "en",
+    gpu = FALSE,
+    bind_audio = TRUE,
+    save_local = FALSE,
+    save_local_dest = NULL,
+    ...) {
+  # TODO: Read Managing an R Package’s Python Dependencies
+  # https://rstudio.github.io/reticulate/articles/python_dependencies.html
+
+  # Specify version of Python to be used by reticulate
+  reticulate::use_python("/opt/homebrew/Caskroom/miniforge/base/bin/python")
+  # Import TTS
+  TTS_api <- reticulate::import("TTS.api")
+  # Model name
+  model_name <- "tts_models/multilingual/multi-dataset/xtts_v2"
+  # TTS
+  tts <- TTS_api$TTS(model_name, gpu = gpu)
+
+
+  # Execute model
+  res = lapply(text, function(string) {
+    string_processed = tts_split_text(string, limit = 600)
+
+    res = vapply(string_processed, function(tt) {
+      output_path = tts_temp_audio("wav")
+      tts$tts_to_file(text = tt,
+                      max_new_tokens = 600,
+                      file_path = output_path,
+                      speaker_wav = speaker_wav,
+                      language = language)
+      # Output file path
+      output_path
+    }, FUN.VALUE = character(1L), USE.NAMES = FALSE)
+    out = lapply(res, tts_audio_read,
+                 output_format = "wav")
+
+    # Output
+    dplyr::tibble(original_text = string,
+                  text = string_processed,
+                  wav = out, file = normalizePath(res))
+  })
+
+  # Post-processing
+  names(res) = seq_along(text)
+  res = dplyr::bind_rows(res, .id = "index")
+  res$index = as.numeric(res$index)
+  res$audio_type = "wav"
+
+  if (bind_audio) {
+    res = tts_bind_wav(res)
+  }
+  if ("wav" %in% colnames(res)) {
+    res$duration = vapply(res$wav, wav_duration, FUN.VALUE = numeric(1))
+  }
+  # Copy and paste audio file into local folder
+  if (save_local) {
+    if (!is.null(save_local_dest)) {
+      file.copy(normalizePath(res$file), save_local_dest)
+    } else {
+      cli::cli_alert_danger("Provide local destination where audio file will be saved")
+    }
+  }
+  res
+}

From 7bfafeab2fee38175865250782e26dc72ed18a3e Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Mon, 27 Nov 2023 17:52:00 -0800
Subject: [PATCH 02/10] Finish writing `tts_coqui_vc()`

---
 R/tts.R | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/R/tts.R b/R/tts.R
index 6a49e52..8f1de36 100644
--- a/R/tts.R
+++ b/R/tts.R
@@ -415,9 +415,11 @@ tts_coqui <- function(
       }, FUN.VALUE = character(1L), USE.NAMES = FALSE)
       out = lapply(res, tts_audio_read,
                    output_format = audio_type)
-      df = dplyr::tibble(original_text = string,
-                         text = string_processed,
-                         wav = out, file = normalizePath(res))
+
+      # Output
+      dplyr::tibble(original_text = string,
+                    text = string_processed,
+                    wav = out, file = normalizePath(res))
     })
   }
 
@@ -493,10 +495,10 @@ tts_coqui_vc <- function(
   })
 
   # Post-processing
-  names(res) = seq_along(text)
-  res = dplyr::bind_rows(res, .id = "index")
-  res$index = as.numeric(res$index)
-  res$audio_type = "wav"
+  names(res) <- seq_along(text)
+  res <- dplyr::bind_rows(res, .id = "index")
+  res$index <- as.numeric(res$index)
+  res$audio_type <- "wav"
 
   if (bind_audio) {
     res = tts_bind_wav(res)

From ba710b85a236bfcd045f0dea0f17205938315c2c Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Thu, 30 Nov 2023 17:56:22 -0800
Subject: [PATCH 03/10] gitignore wav audio files

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 174f712..ad78f46 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ inst/doc
 .Rhistory
 .DS_Store
 docs
+*.wav

From aaf3f7415eb41936c99abdd19d140058a6d60e68 Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Thu, 30 Nov 2023 18:01:32 -0800
Subject: [PATCH 04/10] Start incorporating `tts_coqui_vc()` into `tts()`

---
 R/tts.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/tts.R b/R/tts.R
index 8f1de36..4ceb6fe 100644
--- a/R/tts.R
+++ b/R/tts.R
@@ -1,7 +1,7 @@
 #' Text-to-Speech (Speech Synthesis)
 #'
 #' @description Convert text-to-speech using various engines, including Amazon
-#' Polly, Coqui TTS, Google Cloud Text-to-Speech API, and Microsoft Cognitive
+#' Polly, Coqui TTS and XTTS (Voice-Cloning), Google Cloud Text-to-Speech API, and Microsoft Cognitive
 #' Services Text to Speech REST API.
 #'
 #' With the exception of Coqui TTS, all these engines are accessible as R
@@ -49,6 +49,10 @@
 # Coqui TTS
 #' tts("Hello world! This is Coqui TTS", service = "coqui")
 #'
+#' Coqui Voice Cloning (XTTS)
+#' TODO
+#'
+#'
 # Google Cloud Text-to-Speech API
 #' tts("Hello world! This is Google Cloud", service = "google")
 #'
@@ -58,7 +62,7 @@
 tts = function(
     text,
     output_format = c("mp3", "wav"),
-    service = c("amazon", "google", "microsoft", "coqui"),
+    service = c("coqui", "coqui-vc", "amazon", "google", "microsoft"),
     bind_audio = TRUE,
     ...) {
 

From 2ea92389a4564b6fbab665071f16ce82b72dc532 Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Wed, 3 Jan 2024 10:58:57 -0800
Subject: [PATCH 05/10] Add `reticulate` to Imports

---
 DESCRIPTION | 1 +
 R/tts.R     | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a3c3521..8485444 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -39,6 +39,7 @@ Imports:
     knitr,
     magrittr,
     conrad,
+    reticulate,
     tidyr,
     tuneR,
     utils,
diff --git a/R/tts.R b/R/tts.R
index 4ceb6fe..f71fb7d 100644
--- a/R/tts.R
+++ b/R/tts.R
@@ -462,9 +462,6 @@ tts_coqui_vc <- function(
     save_local = FALSE,
     save_local_dest = NULL,
     ...) {
-  # TODO: Read Managing an R Package’s Python Dependencies
-  # https://rstudio.github.io/reticulate/articles/python_dependencies.html
-
   # Specify version of Python to be used by reticulate
   reticulate::use_python("/opt/homebrew/Caskroom/miniforge/base/bin/python")
   # Import TTS
@@ -474,7 +471,6 @@ tts_coqui_vc <- function(
   # TTS
   tts <- TTS_api$TTS(model_name, gpu = gpu)
 
-
   # Execute model
   res = lapply(text, function(string) {
     string_processed = tts_split_text(string, limit = 600)

From a6986ed2773d19044f553adca495284c42944231 Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Wed, 3 Jan 2024 11:01:18 -0800
Subject: [PATCH 06/10] Add function to open audio files

---
 R/aaa_utils.R | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/R/aaa_utils.R b/R/aaa_utils.R
index bf47e64..2c1dfc2 100644
--- a/R/aaa_utils.R
+++ b/R/aaa_utils.R
@@ -167,3 +167,10 @@ coqui_path_missing <- paste(
   "If you've already downloaded the software, use function",
   "'set_coqui_path(path = \"path/to/coqui/tts\")' to point R to your local coqui tts Executable File"
 )
+
+# Open private audio files
+system_open <- function(path) {
+  system(paste0("open ", path))
+}
+
+

From a66844e9897a499e5c68a09b2731699cebab2131 Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Wed, 3 Jan 2024 11:28:45 -0800
Subject: [PATCH 07/10] WIP: `tts()` and `tts_auth()`

---
 R/tts.R      | 45 +++++++++++++++++++++++++++++----------------
 R/tts_auth.R |  3 ++-
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/R/tts.R b/R/tts.R
index f71fb7d..f88deca 100644
--- a/R/tts.R
+++ b/R/tts.R
@@ -11,6 +11,7 @@
 #' * [conrad](https://github.com/fhdsl/conrad) is a client to the Microsoft Cognitive Services Text to Speech REST API
 #'
 #' @param text A character vector of text to be spoken
+#' @param speaker_wav (Coqui Voice Cloning only) Speaker voice to clone
 #' @param exec_path System path to Coqui TTS executable
 #' @param output_format Format of output files: "mp3" or "wav"
 #' @param voice Full voice name
@@ -61,6 +62,7 @@
 #' }
 tts = function(
     text,
+    speaker_wav = "speaker.wav",
     output_format = c("mp3", "wav"),
     service = c("coqui", "coqui-vc", "amazon", "google", "microsoft"),
     bind_audio = TRUE,
@@ -70,12 +72,27 @@ tts = function(
   if (!tts_auth(service = service)) {
     warning(paste0("Service ", service, " not authorized/unavailable"))
   }
-
   output_format = match.arg(output_format)
-  if (service == "google") {
-    res = tts_google(
+
+  if (service == "coqui") {
+    cli::cli_alert_info("This service does not support MP3 format; will produce a WAV audio output.")
+    use_coqui()
+    coqui_path <- getOption("path_to_coqui")
+
+    res <- tts_coqui(
       text = text,
-      output_format = output_format,
+      exec_path = coqui_path,
+      output_format = "wav",
+      bind_audio = bind_audio,
+      ...)
+  }
+  if (service == "coqui-vc") {
+    cli::cli_alert_info("This service does not support MP3 format; will produce a WAV audio output.")
+    # TODO: Specify Python version, just as we specify path to coqui above
+
+    res <- tts_coqui_vc(
+      text = text,
+      speaker_wav = speaker_wav,
       bind_audio = bind_audio,
       ...)
   }
@@ -86,22 +103,17 @@ tts = function(
       bind_audio = bind_audio,
       ...)
   }
-  if (service == "microsoft") {
-    res = tts_microsoft(
+  if (service == "google") {
+    res = tts_google(
       text = text,
       output_format = output_format,
       bind_audio = bind_audio,
       ...)
   }
-  if (service == "coqui") {
-    cli::cli_alert_info("Coqui TTS does not support MP3 format; will produce a WAV audio output.")
-    use_coqui()
-    coqui_path <- getOption("path_to_coqui")
-
-    res <- tts_coqui(
+  if (service == "microsoft") {
+    res = tts_microsoft(
       text = text,
-      exec_path = coqui_path,
-      output_format = "wav",
+      output_format = output_format,
       bind_audio = bind_audio,
       ...)
   }
@@ -457,17 +469,18 @@ tts_coqui_vc <- function(
     text,
     speaker_wav,
     language = "en",
+    python_version = "/opt/homebrew/Caskroom/miniforge/base/bin/python",
     gpu = FALSE,
     bind_audio = TRUE,
     save_local = FALSE,
     save_local_dest = NULL,
     ...) {
   # Specify version of Python to be used by reticulate
-  reticulate::use_python("/opt/homebrew/Caskroom/miniforge/base/bin/python")
+  reticulate::use_python(python_version)
   # Import TTS
   TTS_api <- reticulate::import("TTS.api")
   # Model name
-  model_name <- "tts_models/multilingual/multi-dataset/xtts_v2"
+  model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
   # TTS
   tts <- TTS_api$TTS(model_name, gpu = gpu)
 
diff --git a/R/tts_auth.R b/R/tts_auth.R
index 6b2afd5..f8b77df 100644
--- a/R/tts_auth.R
+++ b/R/tts_auth.R
@@ -31,7 +31,8 @@
 tts_auth = function(service = c("amazon",
                                 "google",
                                 "microsoft",
-                                "coqui"),
+                                "coqui",
+                                "coqui-vc"),
                     key_or_json_file = NULL,
                     ...) {
   service = match.arg(service)

From 619994fbc6e8aefca8533a294792712ee08db068 Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Thu, 4 Jan 2024 15:17:33 -0800
Subject: [PATCH 08/10] Add `tts_coqui_vc()` to NAMESPACE

---
 NAMESPACE | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NAMESPACE b/NAMESPACE
index 3b2cb2d..0bbf01b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,6 +12,7 @@ export(tts_auth)
 export(tts_bind_wav)
 export(tts_coqui)
 export(tts_coqui_auth)
+export(tts_coqui_vc)
 export(tts_coqui_voices)
 export(tts_default_voice)
 export(tts_google)

From 56ae97a4dfcb56783227bd64dbf6927f1f7a3ffd Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Thu, 4 Jan 2024 15:17:39 -0800
Subject: [PATCH 09/10] Document

---
 R/tts.R         |  1 -
 man/tts.Rd      | 24 ++++++++++++++++++++++--
 man/tts_auth.Rd |  2 +-
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/R/tts.R b/R/tts.R
index f88deca..956c81b 100644
--- a/R/tts.R
+++ b/R/tts.R
@@ -462,7 +462,6 @@ tts_coqui <- function(
   res
 }
 
-
 #' @export
 #' @rdname tts
 tts_coqui_vc <- function(
diff --git a/man/tts.Rd b/man/tts.Rd
index 6e95f75..f99dd5b 100644
--- a/man/tts.Rd
+++ b/man/tts.Rd
@@ -6,12 +6,14 @@
 \alias{tts_google}
 \alias{tts_microsoft}
 \alias{tts_coqui}
+\alias{tts_coqui_vc}
 \title{Text-to-Speech (Speech Synthesis)}
 \usage{
 tts(
   text,
+  speaker_wav = "speaker.wav",
   output_format = c("mp3", "wav"),
-  service = c("amazon", "google", "microsoft", "coqui"),
+  service = c("coqui", "coqui-vc", "amazon", "google", "microsoft"),
   bind_audio = TRUE,
   ...
 )
@@ -57,10 +59,24 @@ tts_coqui(
   save_local_dest = NULL,
   ...
 )
+
+tts_coqui_vc(
+  text,
+  speaker_wav,
+  language = "en",
+  python_version = "/opt/homebrew/Caskroom/miniforge/base/bin/python",
+  gpu = FALSE,
+  bind_audio = TRUE,
+  save_local = FALSE,
+  save_local_dest = NULL,
+  ...
+)
 }
 \arguments{
 \item{text}{A character vector of text to be spoken}
 
+\item{speaker_wav}{(Coqui Voice Cloning only) Speaker voice to clone}
+
 \item{output_format}{Format of output files: "mp3" or "wav"}
 
 \item{service}{Service to use (Amazon, Google, Microsoft, or Coqui)}
@@ -101,7 +117,7 @@ A standardized \code{tibble} featuring the following columns:
 }
 \description{
 Convert text-to-speech using various engines, including Amazon
-Polly, Coqui TTS, Google Cloud Text-to-Speech API, and Microsoft Cognitive
+Polly, Coqui TTS and XTTS (Voice-Cloning), Google Cloud Text-to-Speech API, and Microsoft Cognitive
 Services Text to Speech REST API.
 
 With the exception of Coqui TTS, all these engines are accessible as R
@@ -119,6 +135,10 @@ tts("Hello world! This is Amazon Polly", service = "amazon")
 
 tts("Hello world! This is Coqui TTS", service = "coqui")
 
+Coqui Voice Cloning (XTTS)
+TODO
+
+
 tts("Hello world! This is Google Cloud", service = "google")
 
 tts("Hello world! This is Microsoft", service = "microsoft")
diff --git a/man/tts_auth.Rd b/man/tts_auth.Rd
index 388f655..bcc1fcb 100644
--- a/man/tts_auth.Rd
+++ b/man/tts_auth.Rd
@@ -9,7 +9,7 @@
 \title{Authentication for Text-to-Speech (Speech Synthesis) Engines}
 \usage{
 tts_auth(
-  service = c("amazon", "google", "microsoft", "coqui"),
+  service = c("amazon", "google", "microsoft", "coqui", "coqui-vc"),
   key_or_json_file = NULL,
   ...
 )

From 8d540da95b23a14b60925c6ca4183bb40319d193 Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Wed, 10 Jan 2024 12:18:51 -0800
Subject: [PATCH 10/10] Improve comments on `system_open()`

---
 R/aaa_utils.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/aaa_utils.R b/R/aaa_utils.R
index 2c1dfc2..83677c0 100644
--- a/R/aaa_utils.R
+++ b/R/aaa_utils.R
@@ -168,7 +168,7 @@ coqui_path_missing <- paste(
   "'set_coqui_path(path = \"path/to/coqui/tts\")' to point R to your local coqui tts Executable File"
 )
 
-# Open private audio files
+# Open private audio (.wav, .mp3) or video (.mp4) files
 system_open <- function(path) {
   system(paste0("open ", path))
 }