Also fix MIME header parsing in JavaScript, read issue #1403

RainLoop · Feb 4, 2024 · ec9197c · ec9197c
1 parent 0914ede
commit ec9197c
Show file tree

Hide file tree

Showing 8 changed files with 314 additions and 223 deletions.
diff --git a/dev/Component/EmailAddresses.js b/dev/Component/EmailAddresses.js
@@ -1,5 +1,6 @@
 import { doc, createElement, addEventsListeners } from 'Common/Globals';
-import { EmailModel, addressparser } from 'Model/Email';
+import { EmailModel } from 'Model/Email';
+import { addressparser } from 'Mime/Address';
 
 const contentType = 'snappymail/emailaddress',
 	getAddressKey = li => li?.emailaddress?.key,

diff --git a/dev/Mime/Address.js b/dev/Mime/Address.js
@@ -0,0 +1,197 @@
+import { decodeEncodedWords } from 'Mime/Encoding';
+
+/**
+ * Parses structured e-mail addresses from an address/mailbox(-list) field
+ * https://datatracker.ietf.org/doc/html/rfc2822#section-3.4
+ *
+ * Example:
+ *
+ *    "Name <address@domain>"
+ *
+ * will be converted to
+ *
+ *     [{name: "Name", email: "address@domain"}]
+ *
+ * @param {String} str Address field
+ * @return {Array} An array of address objects
+ */
+export function addressparser(str) {
+	str = (str || '').toString();
+
+	let
+		endOperator = '',
+		node = {
+			type: 'text',
+			value: ''
+		},
+		escaped = false,
+		address = [],
+		addresses = [];
+
+	const
+		/*
+		 * Operator tokens and which tokens are expected to end the sequence
+		 */
+		OPERATORS = {
+		  '"': '"',
+		  '(': ')',
+		  '<': '>',
+		  ',': '',
+		  // Groups are ended by semicolons
+		  ':': ';',
+		  // Semicolons are not a legal delimiter per the RFC2822 grammar other
+		  // than for terminating a group, but they are also not valid for any
+		  // other use in this context.  Given that some mail clients have
+		  // historically allowed the semicolon as a delimiter equivalent to the
+		  // comma in their UI, it makes sense to treat them the same as a comma
+		  // when used outside of a group.
+		  ';': ''
+		},
+		pushToken = token => {
+			token.value = (token.value || '').toString().trim();
+			token.value.length && address.push(token);
+			node = {
+				type: 'text',
+				value: ''
+			},
+			escaped = false;
+		},
+		pushAddress = () => {
+			if (address.length) {
+				address = _handleAddress(address);
+				if (address.length) {
+					addresses = addresses.concat(address);
+				}
+			}
+			address = [];
+		};
+
+	[...str].forEach(chr => {
+		if (!escaped && (chr === endOperator || (!endOperator && chr in OPERATORS))) {
+			pushToken(node);
+			if (',' === chr || ';' === chr) {
+				pushAddress();
+			} else {
+				endOperator = endOperator ? '' : OPERATORS[chr];
+				if ('<' === chr) {
+					node.type = 'email';
+				} else if ('(' === chr) {
+					node.type = 'comment';
+				} else if (':' === chr) {
+					node.type = 'group';
+				}
+			}
+		} else {
+			node.value += chr;
+			escaped = !escaped && '\\' === chr;
+		}
+	});
+	pushToken(node);
+
+	pushAddress();
+
+	return addresses;
+}
+
+/**
+ * Converts tokens for a single address into an address object
+ *
+ * @param {Array} tokens Tokens object
+ * @return {Object} Address object
+ */
+function _handleAddress(tokens) {
+	let
+		isGroup = false,
+		address = {},
+		addresses = [],
+		data = {
+			email: [],
+			comment: [],
+			group: [],
+			text: []
+		};
+
+	tokens.forEach(token => {
+		isGroup = isGroup || 'group' === token.type;
+		data[token.type].push(token.value);
+	});
+
+	// If there is no text but a comment, replace the two
+	if (!data.text.length && data.comment.length) {
+		data.text = data.comment;
+		data.comment = [];
+	}
+
+	if (isGroup) {
+		// http://tools.ietf.org/html/rfc2822#appendix-A.1.3
+/*
+		addresses.push({
+			email: '',
+			name: data.text.join(' ').trim(),
+			group: addressparser(data.group.join(','))
+//			,comment: data.comment.join(' ').trim()
+		});
+*/
+		addresses = addresses.concat(addressparser(data.group.join(',')));
+	} else {
+		// If no address was found, try to detect one from regular text
+		if (!data.email.length && data.text.length) {
+			var i = data.text.length;
+			while (i--) {
+				if (data.text[i].match(/^[^@\s]+@[^@\s]+$/)) {
+					data.email = data.text.splice(i, 1);
+					break;
+				}
+			}
+
+			// still no address
+			if (!data.email.length) {
+				i = data.text.length;
+				while (i--) {
+					data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, address => {
+						if (!data.email.length) {
+							data.email = [address.trim()];
+							return '';
+						}
+						return address.trim();
+					});
+					if (data.email.length) {
+						break;
+					}
+				}
+			}
+		}
+
+		// If there's still no text but a comment exists, replace the two
+		if (!data.text.length && data.comment.length) {
+			data.text = data.comment;
+			data.comment = [];
+		}
+
+		// Keep only the first address occurence, push others to regular text
+		if (data.email.length > 1) {
+			data.text = data.text.concat(data.email.splice(1));
+		}
+
+		address = {
+			// Join values with spaces
+			email: decodeEncodedWords(data.email.join(' ').trim()),
+			name: decodeEncodedWords(data.text.join(' ').trim())
+//			,comment: data.comment.join(' ').trim()
+		};
+
+		if (address.email === address.name) {
+			if (address.email.includes('@')) {
+				address.name = '';
+			} else {
+				address.email = '';
+			}
+		}
+
+//		address.email = address.email.replace(/^[<]+(.*)[>]+$/g, '$1');
+
+		addresses.push(address);
+	}
+
+	return addresses;
+}
diff --git a/dev/Mime/Encoding.js b/dev/Mime/Encoding.js
@@ -0,0 +1,35 @@
+const
+	QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))];
+
+export const
+	// https://datatracker.ietf.org/doc/html/rfc2045#section-6.8
+	BDecode = atob,
+
+	// unescape(encodeURIComponent()) makes the UTF-16 DOMString to an UTF-8 string
+	BEncode = data => btoa(unescape(encodeURIComponent(data))),
+/* 	// Without deprecated 'unescape':
+	BEncode = data => btoa(encodeURIComponent(data).replace(
+		/%([0-9A-F]{2})/g, (match, p1) => String.fromCharCode('0x' + p1)
+	)),
+*/
+
+	// https://datatracker.ietf.org/doc/html/rfc2045#section-6.7
+	QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams),
+
+	// https://datatracker.ietf.org/doc/html/rfc2047#section-4.1
+	// https://datatracker.ietf.org/doc/html/rfc2047#section-4.2
+	// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
+	decodeEncodedWords = data =>
+		data.replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) =>
+			decodeText(charset, 'B' == encoding ? BDecode(text) : QPDecode(text))
+		)
+	,
+
+	decodeText = (charset, data) => {
+		try {
+			// https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
+			return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0)));
+		} catch (e) {
+			console.error({charset:charset,error:e});
+		}
+	};
diff --git a/dev/Mime/Parser.js b/dev/Mime/Parser.js
@@ -1,17 +1,5 @@
-//import { b64Encode } from 'Common/Utils';
-
-const
-	// RFC2045
-	QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))],
-	QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams),
-	decodeText = (charset, data) => {
-		try {
-			// https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
-			return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0)));
-		} catch (e) {
-			console.error({charset:charset,error:e});
-		}
-	};
+import { decodeEncodedWords, BDecode, BEncode, QPDecode, decodeText } from 'Mime/Encoding';
+import { addressparser } from 'Mime/Address';
 
 export function ParseMime(text)
 {
@@ -27,7 +15,49 @@ export function ParseMime(text)
 			this.bodyEnd = 0;
 			this.boundary = '';
 			this.bodyText = '';
-			this.headers = {};
+			// https://datatracker.ietf.org/doc/html/rfc2822#section-3.6
+			this.headers = {
+				// Required
+				date = null,
+				from = [], // mailbox-list
+				// Optional
+				sender          = [], // MUST occur with multi-address
+				'reply-to'      = [], // address-list
+				to              = [], // address-list
+				cc              = [], // address-list
+				bcc             = [], // address-list
+				'message-id'    = '', // msg-id SHOULD be present
+				'in-reply-to'   = '', // 1*msg-id SHOULD occur in some replies
+				references      = '', // 1*msg-id SHOULD occur in some replies
+				subject         = '', // unstructured
+				// Optional unlimited
+				comments        = [], // unstructured
+				keywords        = [], // phrase *("," phrase)
+				// https://datatracker.ietf.org/doc/html/rfc2822#section-3.6.6
+				trace           = [],
+				'resent-date'   = [],
+				'resent-from'   = [],
+				'resent-sender' = [],
+				'resent-to'     = [],
+				'resent-cc'     = [],
+				'resent-bcc'    = [],
+				'resent-msg-id' = [],
+				// optional others outside RFC2822
+				'mime-version'              = '',
+				'content-transfer-encoding' = '',
+				'content-type'              = '',
+				'delivered-to'              = '', // angle-addr
+				'return-path'               = '', // angle-addr
+				'received'                  = [],
+				'authentication-results'    = '', // dkim, spf, dmarc
+				'dkim-signature'            = '',
+				'x-rspamd-queue-id'         = '',
+				'x-rspamd-action'           = '',
+				'x-spamd-bar'               = '',
+				'x-rspamd-server'           = '',
+				'x-spamd-result'            = '',
+				'x-remote-address'          = '',
+			};
 		}
 */
 
@@ -54,7 +84,7 @@ export function ParseMime(text)
 			if ('quoted-printable' == encoding) {
 				body = QPDecode(body);
 			} else if ('base64' == encoding) {
-				body = atob(body.replace(/\r?\n/g, ''));
+				body = BDecode(body.replace(/\r?\n/g, ''));
 			}
 			return decodeText(charset, body);
 		}
@@ -68,8 +98,7 @@ export function ParseMime(text)
 				if ('quoted-printable' == encoding) {
 					body = QPDecode(body);
 				}
-				body = btoa(body);
-//				body = b64Encode(body);
+				body = BEncode(body);
 			}
 			return 'data:' + this.headerValue('content-type') + ';base64,' + body;
 		}
@@ -92,6 +121,9 @@ export function ParseMime(text)
 		}
 	}
 
+	// mailbox-list or address-list
+	const lists = ['from','reply-to','to','cc','bcc'];
+
 	const ParsePart = (mimePart, start_pos = 0, id = '') =>
 	{
 		let part = new MimePart,
@@ -113,11 +145,19 @@ export function ParseMime(text)
 					[...header.matchAll(/;\s*([^;=]+)=\s*"?([^;"]+)"?/g)].forEach(param =>
 						params[param[1].trim().toLowerCase()] = param[2].trim()
 					);
-					// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
-					match[2] = match[2].trim().replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) =>
-						decodeText(charset, 'B' == encoding ? atob(text) : QPDecode(text))
-					);
-					headers[match[1].trim().toLowerCase()] = {
+					let field = match[1].trim().toLowerCase();
+					if (lists.includes(field)) {
+						match[2] = addressparser(match[2]);
+					} else if ('keywords' === field) {
+						match[2] = match[2].split(',').forEach(entry => decodeEncodedWords(entry.trim()));
+						match[2] = (headers[field]?.value || []).concat(match[2]);
+					} else {
+						match[2] = decodeEncodedWords(match[2].trim());
+						if ('comments' === field) {
+							match[2] = (headers[field]?.value || []).push(match[2]);
+						}
+					}
+					headers[field] = {
 						value: match[2],
 						params: params
 					};