Skip to content
This repository has been archived by the owner on Nov 4, 2024. It is now read-only.

Commit

Permalink
Also fix MIME header parsing in JavaScript, read issue #1403
Browse files Browse the repository at this point in the history
  • Loading branch information
the-djmaze committed Feb 4, 2024
1 parent 0914ede commit ec9197c
Show file tree
Hide file tree
Showing 8 changed files with 314 additions and 223 deletions.
3 changes: 2 additions & 1 deletion dev/Component/EmailAddresses.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { doc, createElement, addEventsListeners } from 'Common/Globals';
import { EmailModel, addressparser } from 'Model/Email';
import { EmailModel } from 'Model/Email';
import { addressparser } from 'Mime/Address';

const contentType = 'snappymail/emailaddress',
getAddressKey = li => li?.emailaddress?.key,
Expand Down
197 changes: 197 additions & 0 deletions dev/Mime/Address.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import { decodeEncodedWords } from 'Mime/Encoding';

/**
* Parses structured e-mail addresses from an address/mailbox(-list) field
* https://datatracker.ietf.org/doc/html/rfc2822#section-3.4
*
* Example:
*
* "Name <address@domain>"
*
* will be converted to
*
* [{name: "Name", email: "address@domain"}]
*
* @param {String} str Address field
* @return {Array} An array of address objects
*/
export function addressparser(str) {
str = (str || '').toString();

let
endOperator = '',
node = {
type: 'text',
value: ''
},
escaped = false,
address = [],
addresses = [];

const
/*
* Operator tokens and which tokens are expected to end the sequence
*/
OPERATORS = {
'"': '"',
'(': ')',
'<': '>',
',': '',
// Groups are ended by semicolons
':': ';',
// Semicolons are not a legal delimiter per the RFC2822 grammar other
// than for terminating a group, but they are also not valid for any
// other use in this context. Given that some mail clients have
// historically allowed the semicolon as a delimiter equivalent to the
// comma in their UI, it makes sense to treat them the same as a comma
// when used outside of a group.
';': ''
},
pushToken = token => {
token.value = (token.value || '').toString().trim();
token.value.length && address.push(token);
node = {
type: 'text',
value: ''
},
escaped = false;
},
pushAddress = () => {
if (address.length) {
address = _handleAddress(address);
if (address.length) {
addresses = addresses.concat(address);
}
}
address = [];
};

[...str].forEach(chr => {
if (!escaped && (chr === endOperator || (!endOperator && chr in OPERATORS))) {
pushToken(node);
if (',' === chr || ';' === chr) {
pushAddress();
} else {
endOperator = endOperator ? '' : OPERATORS[chr];
if ('<' === chr) {
node.type = 'email';
} else if ('(' === chr) {
node.type = 'comment';
} else if (':' === chr) {
node.type = 'group';
}
}
} else {
node.value += chr;
escaped = !escaped && '\\' === chr;
}
});
pushToken(node);

pushAddress();

return addresses;
}

/**
* Converts tokens for a single address into an address object
*
* @param {Array} tokens Tokens object
* @return {Object} Address object
*/
function _handleAddress(tokens) {
let
isGroup = false,
address = {},
addresses = [],
data = {
email: [],
comment: [],
group: [],
text: []
};

tokens.forEach(token => {
isGroup = isGroup || 'group' === token.type;
data[token.type].push(token.value);
});

// If there is no text but a comment, replace the two
if (!data.text.length && data.comment.length) {
data.text = data.comment;
data.comment = [];
}

if (isGroup) {
// http://tools.ietf.org/html/rfc2822#appendix-A.1.3
/*
addresses.push({
email: '',
name: data.text.join(' ').trim(),
group: addressparser(data.group.join(','))
// ,comment: data.comment.join(' ').trim()
});
*/
addresses = addresses.concat(addressparser(data.group.join(',')));
} else {
// If no address was found, try to detect one from regular text
if (!data.email.length && data.text.length) {
var i = data.text.length;
while (i--) {
if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) {
data.email = data.text.splice(i, 1);
break;
}
}

// still no address
if (!data.email.length) {
i = data.text.length;
while (i--) {
data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, address => {
if (!data.email.length) {
data.email = [address.trim()];
return '';
}
return address.trim();
});
if (data.email.length) {
break;
}
}
}
}

// If there's still no text but a comment exists, replace the two
if (!data.text.length && data.comment.length) {
data.text = data.comment;
data.comment = [];
}

// Keep only the first address occurence, push others to regular text
if (data.email.length > 1) {
data.text = data.text.concat(data.email.splice(1));
}

address = {
// Join values with spaces
email: decodeEncodedWords(data.email.join(' ').trim()),
name: decodeEncodedWords(data.text.join(' ').trim())
// ,comment: data.comment.join(' ').trim()
};

if (address.email === address.name) {
if (address.email.includes('@')) {
address.name = '';
} else {
address.email = '';
}
}

// address.email = address.email.replace(/^[<]+(.*)[>]+$/g, '$1');

addresses.push(address);
}

return addresses;
}
35 changes: 35 additions & 0 deletions dev/Mime/Encoding.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
const
QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))];

export const
// https://datatracker.ietf.org/doc/html/rfc2045#section-6.8
BDecode = atob,

// unescape(encodeURIComponent()) makes the UTF-16 DOMString to an UTF-8 string
BEncode = data => btoa(unescape(encodeURIComponent(data))),
/* // Without deprecated 'unescape':
BEncode = data => btoa(encodeURIComponent(data).replace(
/%([0-9A-F]{2})/g, (match, p1) => String.fromCharCode('0x' + p1)
)),
*/

// https://datatracker.ietf.org/doc/html/rfc2045#section-6.7
QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams),

// https://datatracker.ietf.org/doc/html/rfc2047#section-4.1
// https://datatracker.ietf.org/doc/html/rfc2047#section-4.2
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
decodeEncodedWords = data =>
data.replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) =>
decodeText(charset, 'B' == encoding ? BDecode(text) : QPDecode(text))
)
,

decodeText = (charset, data) => {
try {
// https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0)));
} catch (e) {
console.error({charset:charset,error:e});
}
};
86 changes: 63 additions & 23 deletions dev/Mime/Parser.js
Original file line number Diff line number Diff line change
@@ -1,17 +1,5 @@
//import { b64Encode } from 'Common/Utils';

const
// RFC2045
QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))],
QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams),
decodeText = (charset, data) => {
try {
// https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0)));
} catch (e) {
console.error({charset:charset,error:e});
}
};
import { decodeEncodedWords, BDecode, BEncode, QPDecode, decodeText } from 'Mime/Encoding';
import { addressparser } from 'Mime/Address';

export function ParseMime(text)
{
Expand All @@ -27,7 +15,49 @@ export function ParseMime(text)
this.bodyEnd = 0;
this.boundary = '';
this.bodyText = '';
this.headers = {};
// https://datatracker.ietf.org/doc/html/rfc2822#section-3.6
this.headers = {
// Required
date = null,
from = [], // mailbox-list
// Optional
sender = [], // MUST occur with multi-address
'reply-to' = [], // address-list
to = [], // address-list
cc = [], // address-list
bcc = [], // address-list
'message-id' = '', // msg-id SHOULD be present
'in-reply-to' = '', // 1*msg-id SHOULD occur in some replies
references = '', // 1*msg-id SHOULD occur in some replies
subject = '', // unstructured
// Optional unlimited
comments = [], // unstructured
keywords = [], // phrase *("," phrase)
// https://datatracker.ietf.org/doc/html/rfc2822#section-3.6.6
trace = [],
'resent-date' = [],
'resent-from' = [],
'resent-sender' = [],
'resent-to' = [],
'resent-cc' = [],
'resent-bcc' = [],
'resent-msg-id' = [],
// optional others outside RFC2822
'mime-version' = '',
'content-transfer-encoding' = '',
'content-type' = '',
'delivered-to' = '', // angle-addr
'return-path' = '', // angle-addr
'received' = [],
'authentication-results' = '', // dkim, spf, dmarc
'dkim-signature' = '',
'x-rspamd-queue-id' = '',
'x-rspamd-action' = '',
'x-spamd-bar' = '',
'x-rspamd-server' = '',
'x-spamd-result' = '',
'x-remote-address' = '',
};
}
*/

Expand All @@ -54,7 +84,7 @@ export function ParseMime(text)
if ('quoted-printable' == encoding) {
body = QPDecode(body);
} else if ('base64' == encoding) {
body = atob(body.replace(/\r?\n/g, ''));
body = BDecode(body.replace(/\r?\n/g, ''));
}
return decodeText(charset, body);
}
Expand All @@ -68,8 +98,7 @@ export function ParseMime(text)
if ('quoted-printable' == encoding) {
body = QPDecode(body);
}
body = btoa(body);
// body = b64Encode(body);
body = BEncode(body);
}
return 'data:' + this.headerValue('content-type') + ';base64,' + body;
}
Expand All @@ -92,6 +121,9 @@ export function ParseMime(text)
}
}

// mailbox-list or address-list
const lists = ['from','reply-to','to','cc','bcc'];

const ParsePart = (mimePart, start_pos = 0, id = '') =>
{
let part = new MimePart,
Expand All @@ -113,11 +145,19 @@ export function ParseMime(text)
[...header.matchAll(/;\s*([^;=]+)=\s*"?([^;"]+)"?/g)].forEach(param =>
params[param[1].trim().toLowerCase()] = param[2].trim()
);
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
match[2] = match[2].trim().replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) =>
decodeText(charset, 'B' == encoding ? atob(text) : QPDecode(text))
);
headers[match[1].trim().toLowerCase()] = {
let field = match[1].trim().toLowerCase();
if (lists.includes(field)) {
match[2] = addressparser(match[2]);
} else if ('keywords' === field) {
match[2] = match[2].split(',').forEach(entry => decodeEncodedWords(entry.trim()));
match[2] = (headers[field]?.value || []).concat(match[2]);
} else {
match[2] = decodeEncodedWords(match[2].trim());
if ('comments' === field) {
match[2] = (headers[field]?.value || []).push(match[2]);
}
}
headers[field] = {
value: match[2],
params: params
};
Expand Down
Loading

0 comments on commit ec9197c

Please sign in to comment.