Skip to content

Commit

Permalink
Merge pull request #9 from levshuster/Database-Based
Browse files Browse the repository at this point in the history
Database based
  • Loading branch information
levshuster authored Oct 18, 2024
2 parents 49457a2 + 188bd7c commit 1464cc7
Show file tree
Hide file tree
Showing 14 changed files with 634 additions and 114 deletions.
58 changes: 58 additions & 0 deletions Back End/Database/Query Development/Votes to Gender.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
SELECT count(*) AS count_of_joined_votes FROM (
SELECT value, partial_name, lower(name), first_name, pairing.debater.team
FROM pairing.speaker_points
LEFT JOIN pairing.debater
ON pairing.debater.team = speaker_points.team
WHERE lower(name) LIKE '%' || partial_name || '%'
ORDER BY pairing.debater.team
) as a;

SELECT count(*) AS vote_count FROM pairing.speaker_points;

-- Counts are off there are some double counts going on but its only off 1% of the time

CREATE TEMP VIEW speaker_points_and_gender AS
SELECT
name,
pairing.debater.first_name,
value AS speaker_points,
female_count,
male_count,
pairing.speaker_points.division,
CASE
WHEN female_count IS NULL THEN 'Unknown - Name Not in DB'
WHEN female_count + male_count < 20 THEN 'Unknown - Too Few Names'
WHEN LEAST(female_count, male_count) + ((female_count+male_count)/10) > GREATEST(female_count, male_count) THEN 'Unknown - Small Difference Between Counts'
WHEN female_count > male_count THEN 'Female'
WHEN male_count > female_count THEN 'Male'
ELSE 'Error gender case never matched'
END AS gender
FROM pairing.speaker_points
LEFT JOIN pairing.debater
ON pairing.debater.team = speaker_points.team
LEFT JOIN gender_binding
ON lower(gender_binding.first_name) = lower(pairing.debater.first_name)
WHERE lower(name) LIKE '%' || partial_name || '%'
AND value < 35
ORDER BY name;

SELECT female_count IS NOT NULL AS debater_gender_found, count(*)
FROM speaker_points_and_gender
GROUP BY female_count IS NOT NULL;

-- SELECT * from speaker_points_and_gender WHERE female_count IS NULL;


SELECT * FROM speaker_points_and_gender;

SELECT gender, count(*), AVG(speaker_points), STDDEV(speaker_points)
FROM speaker_points_and_gender
GROUP BY gender;

SELECT
gender,
speaker_points
FROM speaker_points_and_gender
WHERE gender IN ('Male', 'Female');

-- SELECT * FROM gender_binding;
10 changes: 4 additions & 6 deletions Back End/Database/database_structure.dbml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Table division {
round text [note: "Semi-Final, Round 1, 3"]
is_elimination bool
url text
date datetime
date timestamp
details json
to_scrape bool
leaf bool [note: 'to_scrape is false and leaf is true IFF division has never been scraped but is refered to by a round that came up when scraping a judge']
Expand All @@ -65,7 +65,7 @@ Table pairing.judge {
}

Table pairing.votes {
judge integer [ref: > pairing.judge.url]
judge text [ref: > pairing.judge.url]
team text [ref: <> pairing.team.url]
division integer [ref: > division.id]
tournament integer [ref: > tournament.id]
Expand Down Expand Up @@ -95,8 +95,6 @@ Table judge {

Table gender_binding {
first_name text [primary key]
gender text
confidance decimal
updated timestamp
source text
male_count integer
female_count integer
}
10 changes: 4 additions & 6 deletions Back End/Database/debate_bias_calc.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ CREATE TABLE "division" (
"round" text,
"is_elimination" bool,
"url" text,
"date" datetime,
"date" timestamp,
"details" json,
"to_scrape" bool,
"leaf" bool
Expand All @@ -54,10 +54,8 @@ CREATE TABLE "judge" (

CREATE TABLE "gender_binding" (
"first_name" text PRIMARY KEY,
"gender" text,
"confidance" decimal,
"updated" timestamp,
"source" text
"male_count" integer,
"female_count" integer
);

CREATE TABLE "pairing"."team" (
Expand All @@ -79,7 +77,7 @@ CREATE TABLE "pairing"."judge" (
);

CREATE TABLE "pairing"."votes" (
"judge" integer,
"judge" text,
"team" text,
"division" integer,
"tournament" integer,
Expand Down
4 changes: 2 additions & 2 deletions Back End/Database/debate_bias_calc.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 4 additions & 10 deletions Back End/Database/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ Linux - `sudo service postgresql start`
createdb debate_db

psql -d debate_db -f debate_bias_calc.sql # some unique constraint error are fine here
psql -d debate_db -c "\copy gender_binding FROM '../../Helper Functions/Python/Scratch Work/Gender Analysis/gender_compendium.csv' WITH (FORMAT CSV, HEADER)"

sudo -u postgres psql -d debate_db

CREATE USER debate_bias_user WITH PASSWORD 'debate_bias_user';

GRANT ALL ON ALL TABLES IN SCHEMA public TO debate_bias_user;
Expand All @@ -23,19 +25,11 @@ ALTER TABLE pairing.debater DROP CONSTRAINT debater_first_name_fkey;
ALTER TABLE pairing.judge DROP CONSTRAINT judge_id_fkey;
ALTER TABLE pairing.votes DROP CONSTRAINT votes_judge_fkey;
ALTER TABLE judge DROP CONSTRAINT judge_first_name_fkey;
ALTER TABLE pairing.votes DROP CONSTRAINT votes_tournament_fkey;
ALTER TABLE pairing.speaker_points DROP CONSTRAINT speaker_points_tournament_fkey;

\q


```

# load in bogus data

```psql
INSERT INTO tournament (id, name, url, updated, details, to_scrape)
VALUES
(1, 'Bogus Invitational', 'https://www.example.com/tournament/1', CURRENT_TIMESTAMP, '{"location": "Example City", "date": "2024-10-01"}', FALSE),
(2, 'Fictional Championship', 'https://www.example.com/tournament/2', CURRENT_TIMESTAMP, '{"location": "Sample Town", "date": "2024-11-15"}', FALSE);
```

# Other Useful Commands
Expand Down
65 changes: 58 additions & 7 deletions Front End/GUI/pages/0_Scrape Judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import pandas as pd
from datetime import datetime
from sqlalchemy import Table, MetaData, select
from sqlalchemy.dialects.postgresql import insert


st.set_page_config(
page_title="Debate Bias Calc",
Expand Down Expand Up @@ -155,7 +157,7 @@
debater_table.insert().values(
name=debater_name,
school=team_name,
first_name=debater_name.split()[0],
first_name=lower(debater_name.split()[0]),
team=debater_url,
)
)
Expand All @@ -170,14 +172,63 @@
st.write(conn.query("SELECT * FROM pairing.debater;", ttl=0))

st.write("# Scraping Votes")
# get all tournament_id from debater_urls_to_process
# get all judge URLS for this judge+tournament_id
# create pairing.judge for each url
# create vote and speaker points for each to_scrape pairing.judge
judge_urls_to_process = [
f'https://www.tabroom.com/index/tourn/postings/judge.mhtml?judge_id={id}&tourn_id={tournament_id}'
for tournament_id in scrape_judge.get_tournament_ids_from_judge(url)
]


judges_progress = st.progress(0, "No tournaments Have Been Found that Require Further Processing")
warnings = st.expander("See Exceptions", icon='⚠️')
for count, judges_url in enumerate(judge_urls_to_process):
judges_progress.progress((count+1)/len(judge_urls_to_process), f"Processing {judges_url}")
votes, speaker_points = scrape_debaters_and_judges.get_votes_and_speaker_points_for_a_tournament_from_judge_url(warnings, judges_url)
with conn.session as session:
result = session.execute(
select(judge_table.c.url).where(judge_table.c.url == judges_url)
).fetchone()
if result is None:
session.execute(
judge_table.insert().values(
url=judges_url,
to_scrape=False,
id=id
)
)
for vote in votes:
session.execute(
votes_table.insert().values(
judge=vote.judge_id,
team=vote.team_link,
division=vote.division_id,
tournament=vote.tourn_id,
won=vote.won,
side=vote.side
)
)
for point in speaker_points:
session.execute(
points_table.insert().values(
judge=point.judge_id,
team=point.team_link,
partial_name=point.name,
division=point.division_id,
tournament=point.tourn_id,
value=point.points,
)
)
session.commit()

st.write(conn.query(f"SELECT * FROM pairing.votes WHERE judge LIKE '%{id}%';", ttl=0))
st.write(conn.query(f"SELECT * FROM pairing.speaker_points WHERE judge LIKE '%{id}%';", ttl=0))

st.write("# Scraping Relivant Tournament Details")
# Scrape tournament for each tournament URL while setting leaf to TRUE
# judge_urls_to_process = [
# f'https://www.tabroom.com/index/tourn/postings/judge.mhtml?judge_id={id}&tourn_id={tournament_id}'
# for tournament_id in scrape_judge.get_tournament_ids_from_judge(url)
# ]
st.write("# Scraping Division details")
# scrape divisions for each tournament URL while setting leaf to TRUE
# else:
# "Please provide a valid link"
else:
"Please provide a valid link"
7 changes: 3 additions & 4 deletions Front End/GUI/pages/1_Scrape Tournaments.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,15 +248,15 @@ def upload_tournament():
debaters_progress = st.progress(0, "No Debaters Have Been Found that Require Further Processing")
for count, debater_url in debater_urls_to_process.itertuples():
debater_names, team_name = scrape_debaters_and_judges.get_debater_and_team_from_url(debater_url)
if team_name != None and debater_name != []:
if team_name != None and debater_names != []:
debaters_progress.progress((count+1)/len(debater_urls_to_process), f"Processing {debater_names[0]} from {team_name}")
with conn.session as session:
for debater_name in debater_names:
session.execute(
debater_table.insert().values(
name=debater_name,
school=team_name,
first_name=debater_name.split()[0],
first_name=lower(debater_name.split()[0]),
team=debater_url,
)
)
Expand All @@ -278,10 +278,9 @@ def upload_tournament():
judges_progress = st.progress(0, "No Judges Have Been Found that Require Further Processing")
warnings = st.expander("See Exceptions", icon='⚠️')
for count, judges_url in judge_urls_to_process.itertuples():
division_progress.progress((count+1)/len(judge_urls_to_process), f"Processing {judges_url}")
judges_progress.progress((count+1)/len(judge_urls_to_process), f"Processing {judges_url}")
votes, speaker_points = scrape_debaters_and_judges.get_votes_and_speaker_points_for_a_tournament_from_judge_url(warnings, judges_url)
with conn.session as session:
# stopped here, need to insert all votes and speaker points then set the judge to scraped
for vote in votes:
session.execute(
votes_table.insert().values(
Expand Down
1 change: 1 addition & 0 deletions Front End/GUI/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ sqlalchemy==2.0.34
beautifulsoup4==4.12.3
lxml==5.3.0
plotly==5.24.1
scipy==1.10.1
Loading

0 comments on commit 1464cc7

Please sign in to comment.