Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature 36 plot status of tokens #38

Merged
merged 3 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/release_script.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: Publish Python 🐍 distribution 📦 to PyPI

on:
workflow_dispatch:
workflow_dispatch

jobs:
build:
Expand Down
151 changes: 117 additions & 34 deletions inception_reports/generate_reports_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,9 @@ def select_method_to_import_data():
inception_status = st.session_state.get("inception_status", False)
inception_client = st.session_state.get("inception_client", None)
if not inception_status:
inception_status, inception_client = login_to_inception(api_url, username, password)
inception_status, inception_client = login_to_inception(
api_url, username, password
)
st.session_state["inception_status"] = inception_status
st.session_state["inception_client"] = inception_client

Expand All @@ -258,13 +260,15 @@ def select_method_to_import_data():
if inception_status and "available_projects" in st.session_state:
st.sidebar.write("Select the projects to import:")
selected_projects = st.session_state.get("selected_projects", {})

for inception_project in st.session_state["available_projects"]:
project_name = inception_project.project_name
project_id = inception_project.project_id
selected_projects[project_id] = st.sidebar.checkbox(project_name, value=False)
selected_projects[project_id] = st.sidebar.checkbox(
project_name, value=False
)
st.session_state["selected_projects"] = selected_projects

selected_projects_names = []
button = st.sidebar.button("Generate Reports")
if button:
Expand All @@ -274,12 +278,16 @@ def select_method_to_import_data():
selected_projects_names.append(project.project_name)
file_path = f"{projects_folder}/{project.project_name}.zip"
st.sidebar.write(f"Importing project: {project.project_name}")
project_export = inception_client.api.export_project(project, "jsoncas")
project_export = inception_client.api.export_project(
project, "jsoncas"
)
with open(file_path, "wb") as f:
f.write(project_export)

st.session_state["method"] = "API"
st.session_state["projects"] = read_dir(projects_folder, selected_projects_names)
st.session_state["projects"] = read_dir(
projects_folder, selected_projects_names
)
set_sidebar_state("collapsed")


Expand Down Expand Up @@ -308,30 +316,43 @@ def get_type_counts(annotations):
annotations (dict): A dictionary containing the annotations.

Returns:
dict: A dictionary containing the count of each type.
dict: A dictionary containing the count of each type, both total and per document.
The structure is {type_name: {'total': count, 'documents': {doc_id: count}}}.
"""
count_dict = {}
layerDefinition = next(iter(annotations.values())).select(
type_count_dict = {}

# Assuming that all documents have the same layer definition
first_doc = next(iter(annotations.values()))
layer_definitions = first_doc.select(
"de.tudarmstadt.ukp.clarin.webanno.api.type.LayerDefinition"
)
for doc in annotations:

for doc_id, cas in annotations.items():
# Get the list of types for the current CAS object
type_names = [
(
find_element_by_name(layerDefinition, t.name),
len(annotations[doc].select(t.name)),
)
for t in annotations[doc].typesystem.get_types()
(find_element_by_name(layer_definitions, t.name), len(cas.select(t.name)))
for t in cas.typesystem.get_types()
if t.name != "de.tudarmstadt.ukp.clarin.webanno.api.type.LayerDefinition"
]

for type_name, count in type_names:
if type_name not in count_dict:
count_dict[type_name] = 0
count_dict[type_name] += count
count_dict = {k: v for k, v in count_dict.items() if v > 0}
count_dict = dict(sorted(count_dict.items(), key=lambda item: item[1]))
if type_name is None:
continue

return count_dict
if type_name not in type_count_dict:
type_count_dict[type_name] = {"total": 0, "documents": {}}
type_count_dict[type_name]["total"] += count

if doc_id not in type_count_dict[type_name]["documents"]:
type_count_dict[type_name]["documents"][doc_id] = 0
type_count_dict[type_name]["documents"][doc_id] += count

type_count_dict = {k: v for k, v in type_count_dict.items() if v["total"] > 0}
type_count_dict = dict(
sorted(type_count_dict.items(), key=lambda item: item[1]["total"])
)

return type_count_dict


def export_data(project_data, output_directory=None):
Expand Down Expand Up @@ -383,6 +404,7 @@ def plot_project_progress(project) -> None:
project_tags = project["tags"]
project_annotations = project["annotations"]
project_documents = project["documents"]
type_counts = get_type_counts(project_annotations)

if project_tags:
st.write(
Expand All @@ -408,22 +430,44 @@ def plot_project_progress(project) -> None:
if state in doc_categories:
doc_categories[state] += 1

doc_token_categories = {
"ANNOTATION_IN_PROGRESS": 0,
"ANNOTATION_FINISHED": 0,
"CURATION_IN_PROGRESS": 0,
"CURATION_FINISHED": 0,
"NEW": 0,
}

for doc in project_documents:
state = doc["state"]
if state in doc_token_categories:
doc_token_categories[state] += type_counts["Token"]["documents"][
doc["name"]
]

project_data = {
"project_name": project_name,
"project_tags": project_tags,
"doc_categories": doc_categories,
"doc_token_categories": doc_token_categories,
}

type_counts = get_type_counts(project_annotations)

data_sizes = [
data_sizes_docs = [
project_data["doc_categories"]["NEW"],
project_data["doc_categories"]["ANNOTATION_IN_PROGRESS"],
project_data["doc_categories"]["ANNOTATION_FINISHED"],
project_data["doc_categories"]["CURATION_IN_PROGRESS"],
project_data["doc_categories"]["CURATION_FINISHED"],
]

data_sizes_tokens = [
project_data["doc_token_categories"]["NEW"],
project_data["doc_token_categories"]["ANNOTATION_IN_PROGRESS"],
project_data["doc_token_categories"]["ANNOTATION_FINISHED"],
project_data["doc_token_categories"]["CURATION_IN_PROGRESS"],
project_data["doc_token_categories"]["CURATION_FINISHED"],
]

pie_labels = [
"New",
"Annotation In Progress",
Expand All @@ -432,26 +476,41 @@ def plot_project_progress(project) -> None:
"Curation Finished",
]

df_pie = pd.DataFrame({"Labels": pie_labels, "Sizes": data_sizes}).sort_values(
by="Labels", ascending=True
)
df_pie_docs = pd.DataFrame(
{"Labels": pie_labels, "Sizes": data_sizes_docs}
).sort_values(by="Labels", ascending=True)
df_pie_tokens = pd.DataFrame(
{"Labels": pie_labels, "Sizes": data_sizes_tokens}
).sort_values(by="Labels", ascending=True)

df_bar = pd.DataFrame(
{
"Types": [type for type, _ in type_counts.items()],
"Counts": list(type_counts.values()),
"Types": type_counts.keys(),
"Counts": [type_counts[type_name]["total"] for type_name in type_counts],
}
)

pie_chart = go.Figure(
pie_chart = go.Figure()

pie_chart.add_trace(
go.Pie(
labels=df_pie["Labels"],
values=df_pie["Sizes"],
labels=df_pie_docs["Labels"],
values=df_pie_docs["Sizes"],
sort=False,
hole=0.4,
hoverinfo="label+value",
)
)
pie_chart.add_trace(
go.Pie(
labels=df_pie_tokens["Labels"],
values=df_pie_tokens["Sizes"],
sort=False,
hole=0.4,
hoverinfo="label+value",
visible=False,
)
)

pie_chart.update_layout(
title=dict(
Expand All @@ -466,6 +525,30 @@ def plot_project_progress(project) -> None:
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
margin=dict(l=40, r=40),
updatemenus=[
{
"buttons": [
{
"label": "Documents",
"method": "update",
"args": [
{"visible": [True, False]},
{"title": "Documents Status"},
],
},
{
"label": "Tokens",
"method": "update",
"args": [
{"visible": [False, True]},
{"title": "Tokens Status"},
],
},
],
"direction": "down",
"showactive": True,
}
],
)

bar_chart = go.Figure()
Expand Down Expand Up @@ -493,7 +576,7 @@ def plot_project_progress(project) -> None:
),
xaxis_title="Number of Annotations",
barmode="overlay",
height= 60 * len(df_bar),
height=60 * len(df_bar),
font=dict(size=18),
legend=dict(font=dict(size=12)),
paper_bgcolor="rgba(0,0,0,0)",
Expand Down