-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
177 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,32 @@ | ||
# LotteryScraping-RD | ||
LotteryScraping-RD es un proyecto de web scraping diseñado para extraer datos en tiempo real de los resultados de loterías en la República Dominicana. Los datos extraídos incluyen un id, números ganadores, fecha de los sorteos y nombres de las loterías en formato JSON. | ||
|
||
|
||
para el funcionamiento de este cargamos un archivo **json** de la siguiente manera: | ||
|
||
``` python | ||
with open('lottery.json') as file: | ||
json_data = file.read() | ||
data = json.loads(json_data) | ||
``` | ||
Aunque tambien lo pudemos hacer de manera dirrecta: | ||
|
||
``` python | ||
|
||
json_data = json_data = ''' | ||
[ | ||
{ | ||
"id": 1, | ||
"name": "La Primera Día" | ||
}, | ||
{ | ||
"id": 2, | ||
"name": "Anguila Mañana" | ||
}, | ||
# ... otros elementos ... | ||
] | ||
''' | ||
data = json.loads(json_data) | ||
``` | ||
|
||
Dicho **Json** se utiliza para filtrar y seleccionar las distintas loterías de la página web, permitiendo devolver únicamente las loterías específicas que se deseen. Cada entrada en el JSON posee un identificador (ID) que es empleado para establecer un orden dentro de la estructura, y el nombre en el JSON debe coincidir con el nombre de la lotería según se presenta en la página [loteriasdominicanas](https://loteriasdominicanas.com/) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
from flask import Flask,jsonify | ||
from flask_cors import CORS, cross_origin | ||
import re | ||
import urllib.request | ||
from bs4 import BeautifulSoup | ||
import os | ||
import json | ||
|
||
|
||
def load_html(): | ||
url1 = "https://loteriasdominicanas.com/" | ||
# esta Segunda Url es porque en la Pag. principal no aparecen las Loterias Anguila | ||
url2 = "https://loteriasdominicanas.com/anguila" | ||
|
||
# Crea una lista para almacenar los elementos de ambos soups | ||
games_blocks = [] | ||
|
||
try: | ||
html1 = urllib.request.urlopen(url1).read() | ||
html2 = urllib.request.urlopen(url2).read() | ||
|
||
soup1 = BeautifulSoup(html1, "html.parser") | ||
soup2 = BeautifulSoup(html2, "html.parser") | ||
|
||
# Encuentra los elementos deseados del soup y agrégalos a la lista | ||
blocks1 = soup1.find_all("div", class_="game-block") | ||
games_blocks.extend(blocks1) | ||
|
||
# Encuentra los elementos del soup y agrégalos a la lista | ||
blocks2 = soup2.find_all("div", class_="game-block") | ||
games_blocks.extend(blocks2) | ||
except: | ||
return [] | ||
|
||
return games_blocks | ||
|
||
|
||
def scraping(): | ||
data = [] | ||
loteries_parser = [] | ||
# Cargar JSON en un Archivo | ||
with open('lottery.json') as file: | ||
json_data = file.read() | ||
data = json.loads(json_data) | ||
|
||
# Load HTML | ||
games_blocks = load_html() | ||
|
||
for game_block in games_blocks: | ||
block = {} | ||
title = game_block.find("a", "game-title").getText().strip().lower() | ||
|
||
filtered_data = [item for item in data if item["name"].lower() == title] | ||
if len(filtered_data) == 0: | ||
continue | ||
|
||
pather_score = game_block.find_all("span", "score") | ||
pather_date = game_block.find("div", "session-date").getText().strip() | ||
score = "-".join(span.text.strip() for span in pather_score) | ||
|
||
block['id'] = filtered_data[0]["id"] | ||
block['name'] = filtered_data[0]["name"] | ||
block['date'] = pather_date | ||
block['number'] = score | ||
loteries_parser.append(block) | ||
|
||
return sorted(loteries_parser, key=lambda k:k["id"]) | ||
|
||
|
||
app = Flask(__name__) | ||
CORS(app) | ||
port = int(os.environ.get("PORT", 5000)) | ||
@app.route("/") | ||
def search_lotery(): | ||
return jsonify(scraping()) | ||
|
||
app.run(port=port) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
[ | ||
{ | ||
"id": 1, | ||
"name": "La Primera Día" | ||
}, | ||
{ | ||
"id": 2, | ||
"name": "Anguila Mañana" | ||
}, | ||
{ | ||
"id": 3, | ||
"name": "La Suerte 12:30" | ||
}, | ||
{ | ||
"id": 4, | ||
"name": "Anguila Medio Día" | ||
}, | ||
{ | ||
"id": 5, | ||
"name": "Quiniela Real" | ||
}, | ||
{ | ||
"id": 6, | ||
"name": "Florida Día" | ||
}, | ||
{ | ||
"id": 7, | ||
"name": "Quiniela LoteDom" | ||
}, | ||
{ | ||
"id": 8, | ||
"name": "New York Tarde" | ||
}, | ||
{ | ||
"id": 9, | ||
"name": "Gana Más" | ||
}, | ||
{ | ||
"id": 10, | ||
"name": "La Suerte 18:00" | ||
}, | ||
{ | ||
"id": 11, | ||
"name": "Anguila Tarde" | ||
}, | ||
{ | ||
"id": 12, | ||
"name": "Quiniela Loteka" | ||
}, | ||
{ | ||
"id": 13, | ||
"name": "Lotería Nacional" | ||
}, | ||
{ | ||
"id": 14, | ||
"name": "Anguila Noche" | ||
}, | ||
{ | ||
"id": 15, | ||
"name": "Quiniela Leidsa" | ||
}, | ||
{ | ||
"id": 16, | ||
"name": "Florida Noche" | ||
}, | ||
{ | ||
"id": 17, | ||
"name": "New York Noche" | ||
} | ||
] |