Skip to content

Commit

Permalink
email info with unigram and bigram percentage
Browse files Browse the repository at this point in the history
  • Loading branch information
minutogit committed Dec 7, 2024
1 parent 967c5c2 commit bddd690
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/_locales/de/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
"message": "Wahrscheinlichkeit"
},
"emailinfo_label_known_tokens": {
"message": "Bekannte Tokens"
"message": "Bekannte Token (Uni-/Bigrams)"
},
"emailinfo_top_tokens": {
"message": "Top Tokens für Schlagwort \"$1\""
Expand Down
2 changes: 1 addition & 1 deletion src/_locales/en/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
"message": "Probability"
},
"emailinfo_label_known_tokens": {
"message": "Known Tokens"
"message": "Known Token (Uni-/Bigrams)"
},
"emailinfo_top_tokens": {
"message": "Top Tokens for Tag \"$1\""
Expand Down
22 changes: 15 additions & 7 deletions src/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,10 @@ function onNewMailReceived(folder, messages) {
}


/**
* Displays Bayes information for a specific email message.
* @param {string} messageId - The ID of the email message.
*/
function showEMailInfo(messageId) {
console.log(`Displaying Bayes info for message ID: ${messageId}`);

Expand All @@ -854,26 +858,29 @@ function showEMailInfo(messageId) {
const tokenContributions = probabilityData.tokenContributions;

// Berechnung des bekannten Token-Prozentsatzes
let knownPercentage = 0;
let knownUnigramsPercentage = 0;
let knownBigramsPercentage = 0;
if (bayesData[tagName].tokenList) {
const knownTokenData = getKnownTokenPercentage(tokens, bayesData[tagName].tokenList);
knownPercentage = knownTokenData.knownPercentage;
// Verwende die neue Funktion calculateKnownTokenTypesPercentage
const knownTokenData = calculateKnownTokenTypesPercentage(tokens, bayesData[tagName].tokenList);
knownUnigramsPercentage = knownTokenData.knownUnigramsPercentage;
knownBigramsPercentage = knownTokenData.knownBigramsPercentage;
}

probabilities.push({
tag: tagName,
tagKey: tagKey,
probability: (probability * 100).toFixed(2),
tokenContributions: tokenContributions,
knownTokenPercentage: knownPercentage.toFixed(2)
knownUnigramsPercentage: knownUnigramsPercentage.toFixed(2),
knownBigramsPercentage: knownBigramsPercentage.toFixed(2)
});

console.log(
`Probability for ${tagName}: ${(probability * 100).toFixed(2)}%`
);
console.log(
`Known Token Percentage for ${tagName}: ${knownPercentage.toFixed(2)}%`
);
console.log(`Known Unigrams/Bigrams for ${tagName}: ${knownUnigramsPercentage.toFixed(2)}% / ${knownBigramsPercentage.toFixed(2)}%`);

}
});

Expand Down Expand Up @@ -901,6 +908,7 @@ function showEMailInfo(messageId) {
});
}


messenger.runtime.onMessage.addListener((message, sender, sendResponse) => {
if (message.action === "refreshBayesData") {
messenger.storage.local
Expand Down
56 changes: 32 additions & 24 deletions src/email_info.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,16 @@ document.addEventListener("DOMContentLoaded", async () => {
}
});


function displayEmailInfo() {
const tableBody = document.getElementById("probabilities-table");
const tokenTablesContainer = document.getElementById("token-tables-container");
const toggleButton = document.getElementById("toggle-tokens-button");
tokenTablesContainer.style.display = 'none'; // Token-Tabellen anfangs ausblenden
tokenTablesContainer.style.display = 'none'; // Initially hide token tables

let tagKeyToNameMap = {};
let tagNameToKeyMap = {};

// Setze HTML-Übersetzungen mit der trans-Funktion
// Set translated texts using the trans function
document.getElementById('email-info-title').textContent = trans("emailinfo_title");
document.getElementById('emailinfo_label_tag').textContent = trans("emailinfo_label_tag");
document.getElementById('emailinfo_label_probability').textContent = trans("emailinfo_label_probability");
Expand All @@ -100,7 +101,7 @@ function displayEmailInfo() {
const probabilities = result.bayesInfoData || [];
const bayesData = result.bayesData || {};

// Mapping von Tag-Key zu Tag-Name und umgekehrt erstellen
// Create mappings from Tag-Key to Tag-Name and vice versa
tags.forEach((tag) => {
tagKeyToNameMap[tag.key] = tag.tag;
tagNameToKeyMap[tag.tag] = tag.key;
Expand All @@ -109,56 +110,57 @@ function displayEmailInfo() {
probabilities.forEach((item) => {
const row = document.createElement("tr");

// Tag Cell
const tagCell = document.createElement("td");
tagCell.textContent = item.tag;
row.appendChild(tagCell);

// Probability Cell
const probCell = document.createElement("td");

if (!bayesData[item.tag]) {
probCell.textContent = "50%";
} else if (!bayesData[item.tag].trainingCount) {
probCell.textContent = "50%";
} else {
probCell.textContent = item.probability + "%";
}

row.appendChild(probCell);

// Hinzufügen der Zelle für bekannte Tokens
// Known Tokens Percentage Cell (Unigrams / Bigrams)
const knownTokensCell = document.createElement("td");
if (item.knownTokenPercentage !== undefined) {
knownTokensCell.textContent = item.knownTokenPercentage + "%";
if (item.knownUnigramsPercentage !== undefined && item.knownBigramsPercentage !== undefined) {
knownTokensCell.textContent = `${item.knownUnigramsPercentage}% / ${item.knownBigramsPercentage}%`;
} else {
knownTokensCell.textContent = "0.00%"; // Fallback, falls nicht definiert
knownTokensCell.textContent = "0.00% / 0.00%"; // Fallback if percentages are not defined
}
row.appendChild(knownTokensCell);

tableBody.appendChild(row);

// Verarbeitung der TokenContributions, um Top 5 positive und negative Tokens zu finden
// Process Token Contributions to find Top 5 Positive and Negative Tokens
const tokenContributions = item.tokenContributions || [];

// Filtere die Tokens, die in der E-Mail vorhanden sind
// Filter tokens present in the email
const tokensInEmail = tokenContributions.filter(tc => tc.isPresent);

// Top 5 positive Tokens (höchste positive Beiträge)
// Top 10 Positive Tokens (highest positive contributions)
const topPositiveTokens = tokensInEmail
.filter(tc => tc.contribution > 0)
.sort((a, b) => b.contribution - a.contribution)
.slice(0, 5);
.slice(0, 10);

// Top 5 negative Tokens (niedrigste negative Beiträge)
// Top 10 Negative Tokens (lowest negative contributions)
const topNegativeTokens = tokensInEmail
.filter(tc => tc.contribution < 0)
.sort((a, b) => a.contribution - b.contribution)
.slice(0, 5);
.slice(0, 10);

// Erstelle eine Tabelle für das aktuelle Schlagwort
// Create a table for the current tag
const tokenTable = document.createElement("table");
tokenTable.style.marginTop = "15px";
tokenTable.style.width = "100%";

// Table Header
const tokenTableHeader = document.createElement("thead");
const tokenTableHeaderRow = document.createElement("tr");
const tokenTableHeaderCell = document.createElement("th");
Expand All @@ -169,6 +171,7 @@ function displayEmailInfo() {
tokenTableHeader.appendChild(tokenTableHeaderRow);
tokenTable.appendChild(tokenTableHeader);

// Table Subheader
const tokenTableSubHeader = document.createElement("tr");
const tokenSubHeaderToken = document.createElement("th");
tokenSubHeaderToken.textContent = trans("emailinfo_token");
Expand All @@ -185,9 +188,10 @@ function displayEmailInfo() {

const tokenTableBody = document.createElement("tbody");

// Füge die positiven Tokens hinzu
// Add Positive Tokens
topPositiveTokens.forEach(tc => {
const tr = document.createElement("tr");

const tdToken = document.createElement("td");
tdToken.textContent = tc.token;
tdToken.classList.add("positive-token");
Expand All @@ -197,15 +201,17 @@ function displayEmailInfo() {

const tdType = document.createElement("td");
tdType.textContent = trans("emailinfo_positive");

tr.appendChild(tdToken);
tr.appendChild(tdContribution);
tr.appendChild(tdType);
tokenTableBody.appendChild(tr);
});

// Füge die negativen Tokens hinzu
// Add Negative Tokens
topNegativeTokens.forEach(tc => {
const tr = document.createElement("tr");

const tdToken = document.createElement("td");
tdToken.textContent = tc.token;
tdToken.classList.add("negative-token");
Expand All @@ -215,6 +221,7 @@ function displayEmailInfo() {

const tdType = document.createElement("td");
tdType.textContent = trans("emailinfo_negative");

tr.appendChild(tdToken);
tr.appendChild(tdContribution);
tr.appendChild(tdType);
Expand All @@ -225,31 +232,32 @@ function displayEmailInfo() {
tokenTablesContainer.appendChild(tokenTable);
});

// Entferne bayesInfoData nach der Anzeige
// Remove bayesInfoData after display
messenger.storage.local.remove(["bayesInfoData"]);

// Fenstergröße nach dem Laden des Inhalts anpassen
// Adjust window size after loading content
adjustWindowSize();
}).catch((error) => {
console.error("Error loading Bayes info data:", error);
});

// Event Listener für den Toggle-Button hinzufügen
// Add Event Listener to the Toggle Button
if (toggleButton) {
toggleButton.addEventListener('click', () => {
if (tokenTablesContainer.style.display === 'none' || tokenTablesContainer.style.display === '') {
tokenTablesContainer.style.display = 'block';
toggleButton.innerHTML = '&#9650;'; // Pfeil nach oben
toggleButton.innerHTML = '&#9650;'; // Up arrow
} else {
tokenTablesContainer.style.display = 'none';
toggleButton.innerHTML = '&#9660;'; // Pfeil nach unten
toggleButton.innerHTML = '&#9660;'; // Down arrow
}

adjustWindowSize();
});
}
}


function adjustWindowSize() {
// Warten, bis der Inhalt gerendert wurde
setTimeout(() => {
Expand Down
53 changes: 53 additions & 0 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,56 @@ function getKnownTokenPercentage(tokens, tokenList) {

return { knownPercentage };
}


/**
* Calculates the percentage of known unigrams and bigrams in an email compared to the tokenList,
* with duplicates removed from the tokens.
* A token is considered a unigram if it does not contain an underscore "_",
* and a bigram if it contains an underscore "_".
*
* @param {Array} tokens - List of tokens in the email.
* @param {Object} tokenList - The existing token database for a specific tag (keyword).
* @returns {Object} - An object with the percentage of known unigrams and bigrams.
*/
function calculateKnownTokenTypesPercentage(tokens, tokenList) {
// Remove duplicates from the tokens
const uniqueTokens = new Set(tokens);

// Initialize counters
let knownUnigrams = 0;
let totalUnigrams = 0;
let knownBigrams = 0;
let totalBigrams = 0;

// Iterate through each unique token once
uniqueTokens.forEach(token => {
if (token.includes('_')) {
// It's a bigram
totalBigrams++;
if (tokenList[token]) {
knownBigrams++;
}
} else {
// It's a unigram
totalUnigrams++;
if (tokenList[token]) {
knownUnigrams++;
}
}
});

// Calculate percentages
const knownUnigramsPercentage = totalUnigrams > 0 ? (knownUnigrams / totalUnigrams) * 100 : 0;
const knownBigramsPercentage = totalBigrams > 0 ? (knownBigrams / totalBigrams) * 100 : 0;

// Log the results for debugging
console.log(`Known unigrams: ${knownUnigramsPercentage.toFixed(2)}% (Known: ${knownUnigrams}, Total: ${totalUnigrams})`);
console.log(`Known bigrams: ${knownBigramsPercentage.toFixed(2)}% (Known: ${knownBigrams}, Total: ${totalBigrams})`);

// Return the percentages
return {
knownUnigramsPercentage,
knownBigramsPercentage
};
}

0 comments on commit bddd690

Please sign in to comment.