Skip to content

Commit

Permalink
Merge pull request #7 from shivendrra/dev
Browse files Browse the repository at this point in the history
pulling new changes from dev
  • Loading branch information
shivendrra authored Sep 3, 2024
2 parents 823751d + bf12c06 commit c05bd20
Show file tree
Hide file tree
Showing 11 changed files with 78 additions and 40 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@ __pycache__/
*.py[cod]
*.exe

*.pypirc
build
.vscode

*.egg-info/

# extras
*.env
Datasets
Expand Down
28 changes: 14 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,23 @@ This library contains some topics, keywords, search queries & channel ids which
#### Channel Ids

```python
from graze.queries import Queries
from webgraze.queries import Queries

queries = Queries(category="channel")
```

#### Search Queries

```python
from graze.queries import Queries
from webgraze.queries import Queries

queries = Queries(category="search")
```

#### Image Topics

```python
from graze.queries import Queries
from webgraze.queries import Queries

queries = Queries(category="channel")
```
Expand Down Expand Up @@ -96,8 +96,8 @@ os.chdir(current_directory)

api_key = os.getenv('yt_key')

from graze import Youtube
from graze.queries import Queries
from webgraze import Youtube
from webgraze.queries import Queries

queries = Queries(category="channel")

Expand All @@ -112,8 +112,8 @@ The Wikipedia scraper generates target URLs from provided queries, fetches the c
#### Running the Scraper

```python
from graze import Wikipedia
from graze.queries import Queries
from webgraze import Wikipedia
from webgraze.queries import Queries

queries = Queries(category="search")
wiki = Wikipedia(filepath='../data.txt', metrics=True)
Expand All @@ -134,8 +134,8 @@ The Unsplash Image scraper fetches images based on given topics & saves them in
#### Running the Scraper

```python
from graze import Unsplash
from graze.queries import Queries
from webgraze import Unsplash
from webgraze.queries import Queries

topics = Queries("images")

Expand All @@ -159,8 +159,8 @@ The Britannica scraper generates target URLs from provided queries, fetches the
#### Running the scraper

```python
from graze import Britannica
from graze.queries import Queries
from webgraze import Britannica
from webgraze.queries import Queries

queries = Queries(category="search")
scraper = Britannica(filepath='../data.txt', metrics=True)
Expand All @@ -183,7 +183,7 @@ load_dotenv()

API_KEY = os.getenv("freesound_key")

from graze import Freesound
from webgraze import Freesound

sound = Freesound(api_key=API_KEY, download_dir="audios", metrics=True)
sound(topics=["clicks", "background", "nature"])
Expand Down Expand Up @@ -220,8 +220,8 @@ Scrapes & downloads pictures from [pexels.com](https://www.pexels.com/) & saves
#### Running the scraper

```python
from graze import Pexels
from graze.queries import Queries
from webgraze import Pexels
from webgraze.queries import Queries

queries = Queries("images")
scraper = Pexels(directory="./images", metrics=True)
Expand Down
10 changes: 2 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
bs4
requests
tqdm
timeit
json
re
googleapiclient
google-api-python-client
youtube_transcript_api
logging
typing
selenium
webdriver-manager
base64
webdriver-manager
4 changes: 2 additions & 2 deletions run.py/run_britannica.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
current_directory = os.path.dirname(os.path.abspath(__file__))
os.chdir(current_directory)

from graze import Britannica
from graze.queries import Queries
from webgraze import Britannica
from webgraze.queries import Queries

queries = Queries(category="search")
wiki = Britannica(filepath='../data.txt', metrics=True)
Expand Down
2 changes: 1 addition & 1 deletion run.py/run_freesound.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

API_KEY = os.getenv("freesound_key")

from graze import Freesound
from webgraze import Freesound

sound = Freesound(api_key=API_KEY, download_dir="audios", metrics=True)
sound(topics=["clicks", "background", "nature"])
4 changes: 2 additions & 2 deletions run.py/run_pexels.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from graze import Pexels
from graze.queries import Queries
from webgraze import Pexels
from webgraze.queries import Queries

queries = Queries("images")
scraper = Pexels(directory="./images", metrics=True)
Expand Down
4 changes: 2 additions & 2 deletions run.py/run_transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

api_key = os.getenv('yt_key')

from graze import Youtube
from graze.queries import Queries
from webgraze import Youtube
from webgraze.queries import Queries

queries = Queries(category="channel")

Expand Down
4 changes: 2 additions & 2 deletions run.py/run_unsplash.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
current_directory = os.path.dirname(os.path.abspath(__file__))
os.chdir(current_directory)

from graze import Unsplash
from graze.queries import Queries
from webgraze import Unsplash
from webgraze.queries import Queries

topics = Queries("images")

Expand Down
4 changes: 2 additions & 2 deletions run.py/run_wiki.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from graze import Wikipedia
from graze.queries import Queries
from webgraze import Wikipedia
from webgraze.queries import Queries

queries = Queries(category="search")
wiki = Wikipedia(filepath='../data.txt', metrics=True)
Expand Down
43 changes: 43 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from setuptools import setup, find_packages
import codecs
import os

current_dir = os.path.abspath(os.path.dirname(__file__))

with codecs.open(os.path.join(current_dir, "README.md"), encoding="utf-8") as file:
long_description = "\n" + file.read()

VERSION = '1.1.2'
DESCRIPTION = 'WebScraping library that scrapes & gathers data from multiple sources on the internet'

setup(
name="webgraze",
version=VERSION,
author="shivendra",
author_email="shivharsh44@gmail.com",
description=DESCRIPTION,
long_description=long_description,
long_description_content_type="text/markdown",
license="MIT",
packages=find_packages(),
keywords=["webscraping", "scraping", "webscraping library", "web scraping", "python webscraping", "beautifulsoup", "selenium"],
classifiers=[
"Development Status :: 1 - Planning",
"Intended Audience :: Developers",
"Programming Language :: Python",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"License :: OSI Approved :: MIT License",
],
install_requires=[
"bs4",
"tqdm",
"google-api-python-client",
"requests",
"youtube-transcript-api",
"selenium",
"webdriver-manager",
],
)
12 changes: 5 additions & 7 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import os
current_directory = os.path.dirname(os.path.abspath(__file__))
os.chdir(current_directory)
from webgraze import Pexels
from webgraze.queries import Queries

from graze import Freesound

sound = Freesound(api_key="lMKgKjaRmNMZKKxNqkjx", download_dir="audios", metrics=True)
sound(topics=["clicks", "background", "nature"])
queries = Queries("images")
scraper = Pexels(directory="./images", metrics=True)
scraper(topics=queries())

0 comments on commit c05bd20

Please sign in to comment.