Add files via upload

e-mission · Aug 11, 2023 · 43813ec · 43813ec
1 parent 58013f9
commit 43813ec
Showing 1 changed file with 376 additions and 0 deletions.
diff --git a/bin/API_Migration_scripts_readme/WebScraper_Java_Code.ipynb b/bin/API_Migration_scripts_readme/WebScraper_Java_Code.ipynb
@@ -0,0 +1,376 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "361542ff",
+   "metadata": {},
+   "source": [
+    "# Python Scripts - Text extraction (webscraping) and matching contents of Java file(s)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a2f0458",
+   "metadata": {},
+   "source": [
+    "## Created for API Migration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab88f421",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#imports\n",
+    "import os\n",
+    "import csv\n",
+    "import requests\n",
+    "from bs4 import BeautifulSoup\n",
+    "import csv"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aaa1ab25",
+   "metadata": {},
+   "source": [
+    "## >>>WebScraper - will save a CSV and a JSON file --JSON not required--<<<"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a251d7fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"https://developer.android.com/sdk/api_diff/33/changes/alldiffs_index_changes\" #URL from android website - only changes\n",
+    "\n",
+    "#Request and creation of Beautiful Soup with the response\n",
+    "response = requests.get(url)\n",
+    "soup = BeautifulSoup(response.content, \"html.parser\")\n",
+    "\n",
+    "#getting links from the webpage\n",
+    "links = []\n",
+    "for a in soup.find_all(\"a\"):\n",
+    "    if a.has_attr(\"href\") and a[\"href\"].startswith(\"/sdk/api_diff/33/changes/\"):\n",
+    "        links.append(a[\"href\"])\n",
+    "\n",
+    "#creation and writing the file with DictWriter\n",
+    "csv_file = open(\"classes.csv\", \"w\", newline=\"\")\n",
+    "csv_writer = csv.DictWriter(csv_file, fieldnames=[\"Package Class\"])\n",
+    "csv_writer.writeheader()\n",
+    "\n",
+    "#technically not writing any links but the classes themselves\n",
+    "for link in links:\n",
+    "    css_class = link.split('/')[-1]\n",
+    "    csv_writer.writerow({\"Package Class\": css_class})\n",
+    "\n",
+    "\n",
+    "#JSON not required\n",
+    "json_file = open(\"classes.json\", \"w\")\n",
+    "json_data = {\"links\": links}\n",
+    "json.dump(json_data, json_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "355d9124",
+   "metadata": {},
+   "source": [
+    "## python script to compare the CSV against Java files in a given directory\n",
+    "##### python script to use the obtained CSV file. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e458972",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def match_csv_java(csv_file, directory): #method accepting CSV and a directory\n",
+    "    java_files = os.listdir(directory)\n",
+    "    with open(csv_file, \"r\") as f: # open CSV in read mode\n",
+    "        reader = csv.reader(f)\n",
+    "        for row in reader:\n",
+    "            classes = row[1]         \n",
+    "    #java_files = os.listdir(directory)\n",
+    "            for java_file in java_files:\n",
+    "                if not os.path.isdir(java_file) and java_file.endswith(\".java\"): #to avoid error when encountering a directory - if another directory exists\n",
+    "                    with open(os.path.join(directory, java_file), \"r\", encoding=\"utf-8\") as f:\n",
+    "                        text = f.read()\n",
+    "                        if classes in text:\n",
+    "                            print(f\"Found {classes} in {java_file}\") # printing this in such a way we know which package/ class is in which file   \n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c7b00cef",
+   "metadata": {},
+   "source": [
+    "### 1. data collection"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fba6313a",
+   "metadata": {},
+   "source": [
+    "#### primary folder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "577a2047",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#absolute PATH: format - csv, directory                            \n",
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c671243",
+   "metadata": {},
+   "source": [
+    "#### location"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9a3bad65",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6fad8583",
+   "metadata": {},
+   "source": [
+    "#### location -> actions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8deb408e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "99036ee8",
+   "metadata": {},
+   "source": [
+    "#### sensors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4646eed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "de5fe5a3",
+   "metadata": {},
+   "source": [
+    "#### verification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "caa49791",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e0b35ea6",
+   "metadata": {},
+   "source": [
+    "#### wrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "412308ac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f8bc9c2b",
+   "metadata": {},
+   "source": [
+    "### 2. OPcodeauth"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "253a2005",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "36b339a4",
+   "metadata": {},
+   "source": [
+    "### 3. server communication"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b1054a92",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cf19bce4",
+   "metadata": {},
+   "source": [
+    "### 4. Serversync"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "401d1e5f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5058994c",
+   "metadata": {},
+   "source": [
+    "### 5. settings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a4849f6b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4440291c",
+   "metadata": {},
+   "source": [
+    "### 6. unified logger"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "86975b04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c7f14032",
+   "metadata": {},
+   "source": [
+    "### 7. usercache"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ead0592d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "    match_csv_java(\"/tmp\",\"/tmp\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c0c0837d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}