-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
121 additions
and
11 deletions.
There are no files selected for viewing
83 changes: 83 additions & 0 deletions
83
examples/party_stance_detection/party_stance_detection.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Detecting Stance with Respect to Political Parties\n", | ||
"\n", | ||
"This tries to reproduce the results of [fair and balanced?](https://www-jstor-org.cmu.idm.oclc.org/stable/44014619) by Budak et al. (2016)." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from llments.lm import empirical, hugging_face\n", | ||
"from llments.distance.norm import L1Distance" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Load the base LM\n", | ||
"base_lm = hugging_face.load_from_spec('base_lm_spec.json')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Use few-shot learning from this dataset to turn it into a few-shot classifier\n", | ||
"# https://huggingface.co/datasets/SetFit/tweet_eval_stance/viewer/stance_hillary\n", | ||
"stance_dataset = empirical.EmpiricalDistribution(\n", | ||
" \"Passage: If a man demanded staff to get him an ice tea he'd be called a sexists elitist pig.. Oink oink #Hillary #SemST\\nTarget: Hillary Clinton\\nStance: against\",\n", | ||
" \"Passage: We're out here in G-town, and where are you #sctweets #SemST\\nTarget: Hillary Clinton\\nStance: none\",\n", | ||
" \"Passage: If you're not watching @user speech right now you're missing her drop tons of wisdom. #SemST\\nTarget: Hillary Clinton\\nStance: favor\",\n", | ||
")\n", | ||
"stance_lm = base_lm.fit(stance_dataset, task_description=\"Predict the stance of a passage with respect to the target.\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Classify stance in various different passages\n", | ||
"answer_distances = {}\n", | ||
"distance_function = L1Distance()\n", | ||
"\n", | ||
"news_source_names = [\"cnn\", \"wsj\", \"fox\", \"npr\"]\n", | ||
"party_names = [\"Democratic\", \"Republican\"]\n", | ||
"for news_source in news_source_names:\n", | ||
" # Load the dataset (empirical distribution) for this source\n", | ||
" news_dataset = empirical.load_from_text_file(f'news_data_{news_source}.txt')\n", | ||
" # Enumerate the entire news dataset\n", | ||
" for news_datapoint in news_dataset:\n", | ||
" for party_name in party_names:\n", | ||
" probs = stance_lm.log_probability([\"favor\", \"against\", \"none\"], f\"Passage: {news_datapoint}\\nTarget: {party_name}\\nStance: \")\n", | ||
" raise NotImplementedError(\"Not finished yet.\")" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters