diff --git a/data/.env.template b/data/.env.template
index c2a51198..89f8e5e6 100644
--- a/data/.env.template
+++ b/data/.env.template
@@ -35,3 +35,7 @@ xyz
 
 # DynamoDB AWS Account
 TAP_DYNAMODB_AWS_ASSUME_ROLE_ARN="arn:aws:iam::******"
+
+
+TARGET_PINECONE_API_KEY=****
+OPENAI_API_KEY=*****
diff --git a/data/extract/extractors.meltano.yml b/data/extract/extractors.meltano.yml
index 2faeb35e..b444dcea 100644
--- a/data/extract/extractors.meltano.yml
+++ b/data/extract/extractors.meltano.yml
@@ -74,7 +74,7 @@ plugins:
         start_date: '2020-01-01T00:00:00Z'
         key_properties: [id]
         name: azure_ips
-        pattern: ServiceTags_Public_20230710.json
+        pattern: ServiceTags_Public_20230724.json
         json_path: values
   - name: tap-slack
     variant: meltanolabs
@@ -271,3 +271,38 @@ plugins:
       streams:
       - stream_name: animals
         input_filename: https://raw.githubusercontent.com/meltano/tap-smoke-test/main/demo-data/animals-data.jsonl
+  - name: tap-beautifulsoup
+    variant: meltanolabs
+    pip_url: git+https://github.com/meltanolabs/tap-beautifulsoup.git
+  - name: tap-beautifulsoup-sdk
+    inherit_from: tap-beautifulsoup
+    config:
+      source_name: sdk-docs
+      site_url: https://sdk.meltano.com/en/latest/
+      output_folder: output
+      parser: html.parser
+      download_recursively: true
+      find_all_kwargs:
+        attrs:
+          role: main
+  - name: tap-beautifulsoup-edk
+    inherit_from: tap-beautifulsoup
+    config:
+      source_name: edk-docs
+      site_url: https://edk.meltano.com/en/latest/
+      output_folder: output
+      parser: html.parser
+      download_recursively: true
+      find_all_kwargs:
+        attrs:
+          role: main
+  - name: tap-beautifulsoup-meltano
+    inherit_from: tap-beautifulsoup
+    config:
+      source_name: meltano-docs
+      site_url: https://docs.meltano.com/
+      output_folder: output
+      parser: html.parser
+      download_recursively: true
+      find_all_kwargs:
+        text: true
\ No newline at end of file
diff --git a/data/extract/mappers.meltano.yml b/data/extract/mappers.meltano.yml
deleted file mode 100644
index 2103fe68..00000000
--- a/data/extract/mappers.meltano.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-plugins:
-  mappers:
-  - name: meltano-map-transformer
-    variant: meltano
-    pip_url: git+https://github.com/MeltanoLabs/meltano-map-transform.git@v0.0.4
-    mappings:
-    - name: coalesce-gcp-ips
-      config:
-        stream_maps:
-          gcp_ips:
-            ipv4prefix:
-            ipv6prefix:
-            ipv4: record.get('ipv4prefix', '')
-            ipv6: record.get('ipv6prefix', '')
-            id: md5(record.get('ipv4prefix', record.get('ipv6prefix')))
diff --git a/data/load/loaders.meltano.yml b/data/load/loaders.meltano.yml
index 12f2e061..13a5c991 100644
--- a/data/load/loaders.meltano.yml
+++ b/data/load/loaders.meltano.yml
@@ -87,3 +87,12 @@ plugins:
       - columnObjectTypeId: 0-2
         columnName: org_last_active_date
         propertyName: telemetry__last_active_at
+  - name: target-pinecone
+    variant: meltanolabs
+    config:
+      index_name: target-pinecone-index
+      environment: asia-southeast1-gcp-free
+      document_text_property: page_content
+      embeddings_property: embeddings
+      metadata_property: metadata
+      pinecone_metadata_text_key: text
diff --git a/data/mappers/clean_text.py b/data/mappers/clean_text.py
new file mode 100644
index 00000000..8917eee8
--- /dev/null
+++ b/data/mappers/clean_text.py
@@ -0,0 +1,15 @@
+import typing as t
+
+from singer_sdk._singerlib.messages import (
+    Message,
+)
+
+class Mapper():
+
+    def map_record_message(self, message_dict: dict) -> t.Iterable[Message]:
+        page_content = message_dict["record"]["page_content"]
+        text_nl = " ".join(page_content.split("\n"))
+        text_spaces = " ".join(text_nl.split())
+        message_dict["record"]["page_content"] = text_spaces
+        return message_dict
+
diff --git a/data/mappers/mappers.meltano.yml b/data/mappers/mappers.meltano.yml
new file mode 100644
index 00000000..3e238384
--- /dev/null
+++ b/data/mappers/mappers.meltano.yml
@@ -0,0 +1,32 @@
+plugins:
+  mappers:
+  - name: meltano-map-transformer
+    variant: meltano
+    pip_url: git+https://github.com/MeltanoLabs/meltano-map-transform.git@v0.0.4
+    mappings:
+    - name: coalesce-gcp-ips
+      config:
+        stream_maps:
+          gcp_ips:
+            ipv4prefix:
+            ipv6prefix:
+            ipv4: record.get('ipv4prefix', '')
+            ipv6: record.get('ipv6prefix', '')
+            id: md5(record.get('ipv4prefix', record.get('ipv6prefix')))
+  - name: map-gpt-embeddings
+    namespace: map_gpt_embeddings
+    pip_url: git+https://github.com/MeltanoLabs/map-gpt-embeddings.git@tap_mapper
+    executable: map-gpt-embeddings
+    mappings:
+    - name: add-embeddings
+      config:
+        document_text_property: page_content
+        document_metadata_property: metadata
+  - name: mapper-generic
+    namespace: mapper_generic
+    pip_url: git+https://github.com/pnadolny13/mapper-generic.git
+    executable: mapper-generic
+    mappings:
+    - name: clean-text
+      config:
+        code_path: mappers/clean_text.py
diff --git a/data/meltano.yml b/data/meltano.yml
index 6122bd80..31d60c65 100644
--- a/data/meltano.yml
+++ b/data/meltano.yml
@@ -9,3 +9,4 @@ include_paths:
 - ./orchestrate/*.meltano.yml
 - ./transform/*.meltano.yml
 - ./utilities/*.meltano.yml
+- ./mappers/*.meltano.yml
diff --git a/data/orchestrate/orchestrators.meltano.yml b/data/orchestrate/orchestrators.meltano.yml
index 2118f238..fdb9bbf8 100644
--- a/data/orchestrate/orchestrators.meltano.yml
+++ b/data/orchestrate/orchestrators.meltano.yml
@@ -71,6 +71,10 @@ schedules:
   interval: 0 0 * * *
   job: sample_job
 
+- name: reload_pinecone
+  interval: 0 0 * * 0
+  job: reload_pinecone
+
 jobs:
 
 - name: dynanmodb_el
@@ -177,3 +181,7 @@ jobs:
 - name: sample_job
   tasks:
   - tap-smoke-test target-jsonl
+
+- name: reload_pinecone
+  tasks:
+  - tap-beautifulsoup-sdk clean-text add-embeddings target-pinecone
diff --git a/data/plugins/extractors/tap-beautifulsoup--meltanolabs.lock b/data/plugins/extractors/tap-beautifulsoup--meltanolabs.lock
new file mode 100644
index 00000000..75ab25fd
--- /dev/null
+++ b/data/plugins/extractors/tap-beautifulsoup--meltanolabs.lock
@@ -0,0 +1,100 @@
+{
+  "plugin_type": "extractors",
+  "name": "tap-beautifulsoup",
+  "namespace": "tap_beautifulsoup",
+  "variant": "meltanolabs",
+  "label": "BeautifulSoup",
+  "docs": "https://hub.meltano.com/extractors/tap-beautifulsoup--meltanolabs",
+  "repo": "https://github.com/MeltanoLabs/tap-beautifulsoup",
+  "pip_url": "git+https://github.com/MeltanoLabs/tap-beautifulsoup.git",
+  "executable": "tap-beautifulsoup",
+  "description": "Python library for pulling data out of HTML and XML files.",
+  "logo_url": "https://hub.meltano.com/assets/logos/extractors/beautifulsoup.png",
+  "capabilities": [
+    "about",
+    "catalog",
+    "discover",
+    "schema-flattening",
+    "state",
+    "stream-maps"
+  ],
+  "settings_group_validation": [
+    [
+      "output_folder",
+      "parser",
+      "site_url",
+      "source_name"
+    ]
+  ],
+  "settings": [
+    {
+      "name": "download_recursively",
+      "kind": "boolean",
+      "value": true,
+      "label": "Download Recursively",
+      "description": "Attempt to download all pages recursively into the output directory prior to parsing files. Set this to False if you've previously run `wget -r -A. Html https://sdk.meltano.com/en/latest/`"
+    },
+    {
+      "name": "find_all_kwargs",
+      "kind": "object",
+      "label": "Find All Kwargs",
+      "description": "This dict contains all the kwargs that should be passed to the [`find_all`](https://www.crummy.com/software/BeautifulSoup/bs4/doc/#find-all) call in order to extract text from the pages."
+    },
+    {
+      "name": "flattening_enabled",
+      "kind": "boolean",
+      "label": "Flattening Enabled",
+      "description": "'True' to enable schema flattening and automatically expand nested properties."
+    },
+    {
+      "name": "flattening_max_depth",
+      "kind": "integer",
+      "label": "Flattening Max Depth",
+      "description": "The max depth to flatten schemas."
+    },
+    {
+      "name": "output_folder",
+      "kind": "string",
+      "value": "output",
+      "label": "Output Folder",
+      "description": "The file path of where to write the intermediate downloaded HTML files to."
+    },
+    {
+      "name": "parser",
+      "kind": "options",
+      "value": "html.parser",
+      "label": "Parser",
+      "description": "The BeautifulSoup parser to use.",
+      "options": [
+        {
+          "label": "Html Parser",
+          "value": "html.parser"
+        }
+      ]
+    },
+    {
+      "name": "site_url",
+      "kind": "string",
+      "label": "Site URL",
+      "description": "The site you'd like to scrape. The tap will download all pages recursively into the output directory prior to parsing files."
+    },
+    {
+      "name": "source_name",
+      "kind": "string",
+      "label": "Source Name",
+      "description": "The name of the source you're scraping. This will be used as the stream name."
+    },
+    {
+      "name": "stream_map_config",
+      "kind": "object",
+      "label": "Stream Map Config",
+      "description": "User-defined config values to be used within map expressions."
+    },
+    {
+      "name": "stream_maps",
+      "kind": "object",
+      "label": "Stream Maps",
+      "description": "Config object for stream maps capability. For more information check out [Stream Maps](https://sdk.meltano.com/en/latest/stream_maps.html)."
+    }
+  ]
+}
\ No newline at end of file
diff --git a/data/plugins/loaders/target-pinecone--meltanolabs.lock b/data/plugins/loaders/target-pinecone--meltanolabs.lock
new file mode 100644
index 00000000..4cba8d25
--- /dev/null
+++ b/data/plugins/loaders/target-pinecone--meltanolabs.lock
@@ -0,0 +1,105 @@
+{
+  "plugin_type": "loaders",
+  "name": "target-pinecone",
+  "namespace": "target_pinecone",
+  "variant": "meltanolabs",
+  "label": "Pinecone",
+  "docs": "https://hub.meltano.com/loaders/target-pinecone--meltanolabs",
+  "repo": "https://github.com/MeltanoLabs/target-pinecone",
+  "pip_url": "git+https://github.com/MeltanoLabs/target-pinecone.git",
+  "executable": "target-pinecone",
+  "description": "Vector Database for Vector Search",
+  "logo_url": "https://hub.meltano.com/assets/logos/loaders/pinecone.png",
+  "capabilities": [
+    "about",
+    "schema-flattening",
+    "stream-maps"
+  ],
+  "settings_group_validation": [
+    [
+      "api_key",
+      "document_text_property",
+      "index_name",
+      "pinecone_metadata_text_key"
+    ]
+  ],
+  "settings": [
+    {
+      "name": "api_key",
+      "kind": "password",
+      "label": "API Key",
+      "description": "Your Pinecone API key."
+    },
+    {
+      "name": "dimensions",
+      "kind": "integer",
+      "value": 1536,
+      "label": "Dimensions",
+      "description": "The amount of dimensions to use if creating a new index. An index is only created if it doesn't already exist. The default is `1536` which is the dimensions of the embeddings using OpenAI's text-embedding-ada-002 model."
+    },
+    {
+      "name": "document_text_property",
+      "kind": "string",
+      "value": "text",
+      "label": "Document Text Property",
+      "description": "The property containing the document text in the input records."
+    },
+    {
+      "name": "embeddings_property",
+      "kind": "string",
+      "value": "embeddings",
+      "label": "Embeddings Property",
+      "description": "The property containing the embeddings in the input records."
+    },
+    {
+      "name": "environment",
+      "kind": "string",
+      "label": "Environment",
+      "description": "Your Pinecone index name to write data to."
+    },
+    {
+      "name": "flattening_enabled",
+      "kind": "boolean",
+      "label": "Flattening Enabled",
+      "description": "'True' to enable schema flattening and automatically expand nested properties."
+    },
+    {
+      "name": "flattening_max_depth",
+      "kind": "integer",
+      "label": "Flattening Max Depth",
+      "description": "The max depth to flatten schemas."
+    },
+    {
+      "name": "index_name",
+      "kind": "string",
+      "label": "Index Name",
+      "description": "Your Pinecone index name to write data to."
+    },
+    {
+      "name": "metadata_property",
+      "kind": "string",
+      "value": "metadata",
+      "label": "Metadata Property",
+      "description": "The property containing the document metadata in the input records."
+    },
+    {
+      "name": "pinecone_metadata_text_key",
+      "kind": "password",
+      "value": "text",
+      "label": "Pinecone Metadata Text Key",
+      "description": "The key in the Pinecone metadata entry that will contain the text document."
+    },
+    {
+      "name": "stream_map_config",
+      "kind": "object",
+      "label": "Stream Map Config",
+      "description": "User-defined config values to be used within map expressions."
+    },
+    {
+      "name": "stream_maps",
+      "kind": "object",
+      "label": "Stream Maps",
+      "description": "Config object for stream maps capability. For more information check out [Stream Maps](https://sdk.meltano.com/en/latest/stream_maps.html)."
+    }
+  ]
+}
\ No newline at end of file