Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
niccolopaganini authored Aug 11, 2023
1 parent 58013f9 commit 43813ec
Showing 1 changed file with 376 additions and 0 deletions.
376 changes: 376 additions & 0 deletions bin/API_Migration_scripts_readme/WebScraper_Java_Code.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,376 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "361542ff",
"metadata": {},
"source": [
"# Python Scripts - Text extraction (webscraping) and matching contents of Java file(s)"
]
},
{
"cell_type": "markdown",
"id": "8a2f0458",
"metadata": {},
"source": [
"## Created for API Migration"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab88f421",
"metadata": {},
"outputs": [],
"source": [
"#imports\n",
"import os\n",
"import csv\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"import csv"
]
},
{
"cell_type": "markdown",
"id": "aaa1ab25",
"metadata": {},
"source": [
"## >>>WebScraper - will save a CSV and a JSON file --JSON not required--<<<"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a251d7fc",
"metadata": {},
"outputs": [],
"source": [
"url = \"https://developer.android.com/sdk/api_diff/33/changes/alldiffs_index_changes\" #URL from android website - only changes\n",
"\n",
"#Request and creation of Beautiful Soup with the response\n",
"response = requests.get(url)\n",
"soup = BeautifulSoup(response.content, \"html.parser\")\n",
"\n",
"#getting links from the webpage\n",
"links = []\n",
"for a in soup.find_all(\"a\"):\n",
" if a.has_attr(\"href\") and a[\"href\"].startswith(\"/sdk/api_diff/33/changes/\"):\n",
" links.append(a[\"href\"])\n",
"\n",
"#creation and writing the file with DictWriter\n",
"csv_file = open(\"classes.csv\", \"w\", newline=\"\")\n",
"csv_writer = csv.DictWriter(csv_file, fieldnames=[\"Package Class\"])\n",
"csv_writer.writeheader()\n",
"\n",
"#technically not writing any links but the classes themselves\n",
"for link in links:\n",
" css_class = link.split('/')[-1]\n",
" csv_writer.writerow({\"Package Class\": css_class})\n",
"\n",
"\n",
"#JSON not required\n",
"json_file = open(\"classes.json\", \"w\")\n",
"json_data = {\"links\": links}\n",
"json.dump(json_data, json_file)"
]
},
{
"cell_type": "markdown",
"id": "355d9124",
"metadata": {},
"source": [
"## python script to compare the CSV against Java files in a given directory\n",
"##### python script to use the obtained CSV file. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2e458972",
"metadata": {},
"outputs": [],
"source": [
"def match_csv_java(csv_file, directory): #method accepting CSV and a directory\n",
" java_files = os.listdir(directory)\n",
" with open(csv_file, \"r\") as f: # open CSV in read mode\n",
" reader = csv.reader(f)\n",
" for row in reader:\n",
" classes = row[1] \n",
" #java_files = os.listdir(directory)\n",
" for java_file in java_files:\n",
" if not os.path.isdir(java_file) and java_file.endswith(\".java\"): #to avoid error when encountering a directory - if another directory exists\n",
" with open(os.path.join(directory, java_file), \"r\", encoding=\"utf-8\") as f:\n",
" text = f.read()\n",
" if classes in text:\n",
" print(f\"Found {classes} in {java_file}\") # printing this in such a way we know which package/ class is in which file \n"
]
},
{
"cell_type": "markdown",
"id": "c7b00cef",
"metadata": {},
"source": [
"### 1. data collection"
]
},
{
"cell_type": "markdown",
"id": "fba6313a",
"metadata": {},
"source": [
"#### primary folder"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "577a2047",
"metadata": {},
"outputs": [],
"source": [
"#absolute PATH: format - csv, directory \n",
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "1c671243",
"metadata": {},
"source": [
"#### location"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a3bad65",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "6fad8583",
"metadata": {},
"source": [
"#### location -> actions"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8deb408e",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "99036ee8",
"metadata": {},
"source": [
"#### sensors"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d4646eed",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "de5fe5a3",
"metadata": {},
"source": [
"#### verification"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "caa49791",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "e0b35ea6",
"metadata": {},
"source": [
"#### wrapper"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "412308ac",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "f8bc9c2b",
"metadata": {},
"source": [
"### 2. OPcodeauth"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "253a2005",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "36b339a4",
"metadata": {},
"source": [
"### 3. server communication"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1054a92",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "cf19bce4",
"metadata": {},
"source": [
"### 4. Serversync"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "401d1e5f",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "5058994c",
"metadata": {},
"source": [
"### 5. settings"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4849f6b",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "4440291c",
"metadata": {},
"source": [
"### 6. unified logger"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86975b04",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "markdown",
"id": "c7f14032",
"metadata": {},
"source": [
"### 7. usercache"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ead0592d",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" match_csv_java(\"/tmp\",\"/tmp\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c0c0837d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 43813ec

Please sign in to comment.