diff --git a/.github/workflows/release_script.yml b/.github/workflows/release_script.yml index 0c7006f..531652d 100644 --- a/.github/workflows/release_script.yml +++ b/.github/workflows/release_script.yml @@ -1,7 +1,7 @@ name: Publish Python 🐍 distribution 📦 to PyPI on: - workflow_dispatch: + workflow_dispatch jobs: build: diff --git a/inception_reports/generate_reports_manager.py b/inception_reports/generate_reports_manager.py index e92ef0a..a012009 100644 --- a/inception_reports/generate_reports_manager.py +++ b/inception_reports/generate_reports_manager.py @@ -247,7 +247,9 @@ def select_method_to_import_data(): inception_status = st.session_state.get("inception_status", False) inception_client = st.session_state.get("inception_client", None) if not inception_status: - inception_status, inception_client = login_to_inception(api_url, username, password) + inception_status, inception_client = login_to_inception( + api_url, username, password + ) st.session_state["inception_status"] = inception_status st.session_state["inception_client"] = inception_client @@ -258,13 +260,15 @@ def select_method_to_import_data(): if inception_status and "available_projects" in st.session_state: st.sidebar.write("Select the projects to import:") selected_projects = st.session_state.get("selected_projects", {}) - + for inception_project in st.session_state["available_projects"]: project_name = inception_project.project_name project_id = inception_project.project_id - selected_projects[project_id] = st.sidebar.checkbox(project_name, value=False) + selected_projects[project_id] = st.sidebar.checkbox( + project_name, value=False + ) st.session_state["selected_projects"] = selected_projects - + selected_projects_names = [] button = st.sidebar.button("Generate Reports") if button: @@ -274,12 +278,16 @@ def select_method_to_import_data(): selected_projects_names.append(project.project_name) file_path = f"{projects_folder}/{project.project_name}.zip" st.sidebar.write(f"Importing project: {project.project_name}") - project_export = inception_client.api.export_project(project, "jsoncas") + project_export = inception_client.api.export_project( + project, "jsoncas" + ) with open(file_path, "wb") as f: f.write(project_export) - + st.session_state["method"] = "API" - st.session_state["projects"] = read_dir(projects_folder, selected_projects_names) + st.session_state["projects"] = read_dir( + projects_folder, selected_projects_names + ) set_sidebar_state("collapsed") @@ -308,30 +316,43 @@ def get_type_counts(annotations): annotations (dict): A dictionary containing the annotations. Returns: - dict: A dictionary containing the count of each type. + dict: A dictionary containing the count of each type, both total and per document. + The structure is {type_name: {'total': count, 'documents': {doc_id: count}}}. """ - count_dict = {} - layerDefinition = next(iter(annotations.values())).select( + type_count_dict = {} + + # Assuming that all documents have the same layer definition + first_doc = next(iter(annotations.values())) + layer_definitions = first_doc.select( "de.tudarmstadt.ukp.clarin.webanno.api.type.LayerDefinition" ) - for doc in annotations: + + for doc_id, cas in annotations.items(): + # Get the list of types for the current CAS object type_names = [ - ( - find_element_by_name(layerDefinition, t.name), - len(annotations[doc].select(t.name)), - ) - for t in annotations[doc].typesystem.get_types() + (find_element_by_name(layer_definitions, t.name), len(cas.select(t.name))) + for t in cas.typesystem.get_types() if t.name != "de.tudarmstadt.ukp.clarin.webanno.api.type.LayerDefinition" ] for type_name, count in type_names: - if type_name not in count_dict: - count_dict[type_name] = 0 - count_dict[type_name] += count - count_dict = {k: v for k, v in count_dict.items() if v > 0} - count_dict = dict(sorted(count_dict.items(), key=lambda item: item[1])) + if type_name is None: + continue - return count_dict + if type_name not in type_count_dict: + type_count_dict[type_name] = {"total": 0, "documents": {}} + type_count_dict[type_name]["total"] += count + + if doc_id not in type_count_dict[type_name]["documents"]: + type_count_dict[type_name]["documents"][doc_id] = 0 + type_count_dict[type_name]["documents"][doc_id] += count + + type_count_dict = {k: v for k, v in type_count_dict.items() if v["total"] > 0} + type_count_dict = dict( + sorted(type_count_dict.items(), key=lambda item: item[1]["total"]) + ) + + return type_count_dict def export_data(project_data, output_directory=None): @@ -383,6 +404,7 @@ def plot_project_progress(project) -> None: project_tags = project["tags"] project_annotations = project["annotations"] project_documents = project["documents"] + type_counts = get_type_counts(project_annotations) if project_tags: st.write( @@ -408,15 +430,29 @@ def plot_project_progress(project) -> None: if state in doc_categories: doc_categories[state] += 1 + doc_token_categories = { + "ANNOTATION_IN_PROGRESS": 0, + "ANNOTATION_FINISHED": 0, + "CURATION_IN_PROGRESS": 0, + "CURATION_FINISHED": 0, + "NEW": 0, + } + + for doc in project_documents: + state = doc["state"] + if state in doc_token_categories: + doc_token_categories[state] += type_counts["Token"]["documents"][ + doc["name"] + ] + project_data = { "project_name": project_name, "project_tags": project_tags, "doc_categories": doc_categories, + "doc_token_categories": doc_token_categories, } - type_counts = get_type_counts(project_annotations) - - data_sizes = [ + data_sizes_docs = [ project_data["doc_categories"]["NEW"], project_data["doc_categories"]["ANNOTATION_IN_PROGRESS"], project_data["doc_categories"]["ANNOTATION_FINISHED"], @@ -424,6 +460,14 @@ def plot_project_progress(project) -> None: project_data["doc_categories"]["CURATION_FINISHED"], ] + data_sizes_tokens = [ + project_data["doc_token_categories"]["NEW"], + project_data["doc_token_categories"]["ANNOTATION_IN_PROGRESS"], + project_data["doc_token_categories"]["ANNOTATION_FINISHED"], + project_data["doc_token_categories"]["CURATION_IN_PROGRESS"], + project_data["doc_token_categories"]["CURATION_FINISHED"], + ] + pie_labels = [ "New", "Annotation In Progress", @@ -432,26 +476,41 @@ def plot_project_progress(project) -> None: "Curation Finished", ] - df_pie = pd.DataFrame({"Labels": pie_labels, "Sizes": data_sizes}).sort_values( - by="Labels", ascending=True - ) + df_pie_docs = pd.DataFrame( + {"Labels": pie_labels, "Sizes": data_sizes_docs} + ).sort_values(by="Labels", ascending=True) + df_pie_tokens = pd.DataFrame( + {"Labels": pie_labels, "Sizes": data_sizes_tokens} + ).sort_values(by="Labels", ascending=True) df_bar = pd.DataFrame( { - "Types": [type for type, _ in type_counts.items()], - "Counts": list(type_counts.values()), + "Types": type_counts.keys(), + "Counts": [type_counts[type_name]["total"] for type_name in type_counts], } ) - pie_chart = go.Figure( + pie_chart = go.Figure() + + pie_chart.add_trace( go.Pie( - labels=df_pie["Labels"], - values=df_pie["Sizes"], + labels=df_pie_docs["Labels"], + values=df_pie_docs["Sizes"], sort=False, hole=0.4, hoverinfo="label+value", ) ) + pie_chart.add_trace( + go.Pie( + labels=df_pie_tokens["Labels"], + values=df_pie_tokens["Sizes"], + sort=False, + hole=0.4, + hoverinfo="label+value", + visible=False, + ) + ) pie_chart.update_layout( title=dict( @@ -466,6 +525,30 @@ def plot_project_progress(project) -> None: paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", margin=dict(l=40, r=40), + updatemenus=[ + { + "buttons": [ + { + "label": "Documents", + "method": "update", + "args": [ + {"visible": [True, False]}, + {"title": "Documents Status"}, + ], + }, + { + "label": "Tokens", + "method": "update", + "args": [ + {"visible": [False, True]}, + {"title": "Tokens Status"}, + ], + }, + ], + "direction": "down", + "showactive": True, + } + ], ) bar_chart = go.Figure() @@ -493,7 +576,7 @@ def plot_project_progress(project) -> None: ), xaxis_title="Number of Annotations", barmode="overlay", - height= 60 * len(df_bar), + height=60 * len(df_bar), font=dict(size=18), legend=dict(font=dict(size=12)), paper_bgcolor="rgba(0,0,0,0)",