Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/propaganda arabicn #249

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import re

from llmebench.datasets import ArProBinaryDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import ArProTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
"scores": {"Micro-F1": "0.592"},
}


def config():
return {
"dataset": ArProBinaryDataset,
"task": ArProTask,
"model": OpenAIModel,
"model_args": {
"max_tries": 3,
},
}


def prompt(input_sample, examples):
prompt_text = (
f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
f"Below you will find a few examples that can help you to understand:\n\n"
)

fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
return [
{
"role": "system",
"content": "You are an expert annotator.",
},
{
"role": "user",
"content": fs_prompt,
},
]


def few_shot_prompt(input_sample, base_prompt, examples):
out_prompt = base_prompt
for index, example in enumerate(examples):
sent = example["input"]
label = example["label"]
out_prompt = (
out_prompt
+ "Example "
+ str(index)
+ ":"
+ "\n"
+ "text: "
+ sent
+ "\nlabel: "
+ label
+ "\n\n"
)

out_prompt = (
out_prompt
+ "Based on the instructions and examples above analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n"
)
out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"

return out_prompt


def post_process(response):
input_label = response["choices"][0]["message"]["content"]
pred_label = input_label.replace(".", "").strip().lower()

pred_label = pred_label.replace("label:", "").strip()

if "true" == pred_label:
pred_label = "true"

elif "false" == pred_label:
pred_label = "false"
else:
pred_label = None

return pred_label
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import re

from llmebench.datasets import ArProBinaryDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import ArProTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
"scores": {"Micro-F1": "0.526"},
}


def config():
return {
"dataset": ArProBinaryDataset,
"task": ArProTask,
"model": OpenAIModel,
"model_args": {
"max_tries": 3,
},
}


def prompt(input_sample):
prompt_text = (
f"Your task is to analyze the text and determine if it contains elements of propaganda. Based on the instructions, analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n"
f"text: {input_sample}\n"
f"label: \n"
)

return [
{
"role": "system",
"content": "You are an expert fact checker.",
},
{
"role": "user",
"content": prompt_text,
},
]


def few_shot_prompt(input_sample, base_prompt, examples):
out_prompt = base_prompt
for example in examples:
sent = example["input"]
label = example["label"]

out_prompt = (
out_prompt + "Sentence: " + sent + "\n" + "label: " + label + "\n\n"
)

# Append the sentence we want the model to predict for but leave the Label blank
out_prompt = out_prompt + "Sentence: " + input_sample + "\nlabel: \n"

# print("=========== FS Prompt =============\n")
# print(out_prompt)

return out_prompt


def post_process(response):
input_label = response["choices"][0]["message"]["content"]
input_label = input_label.replace(".", "").strip().lower()

if (
"true" in input_label
or "label: 1" in input_label
or "label: yes" in input_label
):
pred_label = "true"
elif (
"false" in input_label
or "label: 0" in input_label
or "label: no" in input_label
):
pred_label = "false"
else:
print("label problem!! " + input_label)
pred_label = None

return pred_label
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import re

from llmebench.datasets import ArProCoarseDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import MultilabelPropagandaTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
"scores": {"Micro-F1": "0.587"},
}


def config():
return {
"dataset": ArProCoarseDataset,
"task": MultilabelPropagandaTask,
"model": OpenAIModel,
"model_args": {
"max_tries": 3,
},
}


def prompt(input_sample, examples):
prompt_text = (
f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
f"The following coarse-grained propaganda techniques is defined based on their appearance of any of the fine-grained propaganda techniques. The left side of the equal sign indicate coarse-grained techniques and right side indicate fine-grained techniques.\n\n"
f"no_technique = ['no propaganda']\n"
f"Manipulative Wording = ['Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation, Vagueness, Confusion' , 'Repetition']\n"
f"Reputation = ['Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation']\n"
f"Justification = ['Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving']\n"
f"Simplification = ['Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice']\n"
f"Distraction = ['Red Herring' , 'Straw Man' , 'Whataboutism']\n"
f"Call = ['Appeal to Time' , 'Conversation Killer' , 'Slogans']\n"
f"Below you will find a few examples of text with coarse-grained propaganda techniques:\n\n"
)

fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
return [
{
"role": "system",
"content": "You are an expert annotator.",
},
{
"role": "user",
"content": fs_prompt,
},
]


def few_shot_prompt(input_sample, base_prompt, examples):
out_prompt = base_prompt
for index, example in enumerate(examples):
sent = example["input"]
tech_str = ""
for t in example["label"]:
tech_str += "'" + t + "', "

out_prompt = (
out_prompt
+ "Example "
+ str(index)
+ ":"
+ "\n"
+ "text: "
+ sent
+ "\nlabel: "
+ tech_str
+ "\n\n"
)

out_prompt = out_prompt + (
f"Based on the instructions and examples above analyze the following text and provide only labels as a list of string.\n\n"
)
out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"

return out_prompt


def post_process(response):
label = response["choices"][0]["message"]["content"] # .lower()
# pred_label = eval(label)

labels = []

response = [
s.strip().replace("'", "").replace("[", "").replace("]", "")
for s in label.split(",")
if len(s) > 1
]

# print(response)
for label in response:
label = label.lower()
if "manipulative" in label:
labels.append("Manipulative_Wording")
if "call" in label:
labels.append("Call")
if "reputation" in label:
labels.append("Reputation")
if "technique" in label or "propaganda" in label:
labels.append("no_technique")
if "justification" in label:
labels.append("Justification")
if "simplification" in label:
labels.append("Simplification")
if "distraction" in label:
labels.append("Distraction")

if len(labels) == 0:
labels.append("no_technique")
if len(labels) > 1 and "no_technique" in labels:
labels.remove("no_technique")

return labels
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from llmebench.datasets import ArProCoarseDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import MultilabelPropagandaTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
"scores": {"Micro-F1": "0.540"},
}


def config():
return {
"dataset": ArProCoarseDataset,
"task": MultilabelPropagandaTask,
"model": OpenAIModel,
"model_args": {
"max_tries": 3,
},
}


def prompt(input_sample):
prompt_text = (
f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
f"The following coarse-grained propaganda techniques are defined based on the appearance of any of the fine-grained propaganda techniques. The left side of the equal sign indicates coarse-grained techniques and right side indicates fine-grained techniques.\n\n"
f"no_technique = ['no propaganda']\n"
f"Manipulative Wording = ['Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation, Vagueness, Confusion' , 'Repetition']\n"
f"Reputation = ['Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation']\n"
f"Justification = ['Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving']\n"
f"Simplification = ['Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice']\n"
f"Distraction = ['Red Herring' , 'Straw Man' , 'Whataboutism']\n"
f"Call = ['Appeal to Time' , 'Conversation Killer' , 'Slogans']\n"
)
out_prompt = prompt_text + (
f"Based on the instructions above analyze the following text and provide only coarse-grained propaganda techniques as a list of strings.\n\n"
)
out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"

return [
{
"role": "system",
"content": "You are an expert annotator.",
},
{
"role": "user",
"content": out_prompt,
},
]


def post_process(response):
label = response["choices"][0]["message"]["content"] # .lower()
# pred_label = eval(label)

labels = []

response = [
s.strip().replace("'", "").replace("[", "").replace("]", "")
for s in label.split(",")
if len(s) > 1
]

# print(response)
for label in response:
label = label.lower()
if "manipulative" in label:
labels.append("Manipulative_Wording")
if "call" in label:
labels.append("Call")
if "reputation" in label:
labels.append("Reputation")
if "technique" in label or "propaganda" in label:
labels.append("no_technique")
if "justification" in label:
labels.append("Justification")
if "simplification" in label:
labels.append("Simplification")
if "distraction" in label:
labels.append("Distraction")

if len(labels) == 0:
labels.append("no_technique")
if len(labels) > 1 and "no_technique" in labels:
labels.remove("no_technique")

return labels
Loading
Loading