diff --git a/Cargo.lock b/Cargo.lock index 575c7ed..f2960cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -256,6 +256,7 @@ dependencies = [ "log", "regex", "similar", + "unicode-width", ] [[package]] @@ -264,6 +265,12 @@ version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "utf8parse" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 1c13eb6..56e9491 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ lazy_static = "1.5.0" log = "0.4.22" regex = "1.11.0" similar = "2.6.0" +unicode-width = "0.2.0" [profile.release] codegen-units = 1 diff --git a/src/comments.rs b/src/comments.rs index e95e6ce..f5658ad 100644 --- a/src/comments.rs +++ b/src/comments.rs @@ -16,8 +16,3 @@ pub fn find_comment_index(line: &str) -> Option { pub fn remove_comment(line: &str, comment: Option) -> &str { comment.map_or_else(|| line, |c| &line[0..c]) } - -/// Extract a comment from the end of a line -pub fn get_comment(line: &str, comment: Option) -> &str { - comment.map_or_else(|| "", |c| &line[c..]) -} diff --git a/src/format.rs b/src/format.rs index c52ed85..e7caeb9 100644 --- a/src/format.rs +++ b/src/format.rs @@ -14,69 +14,98 @@ use std::iter::zip; /// Central function to format a file pub fn format_file( - text: &str, + old_text: &str, file: &str, args: &Cli, logs: &mut Vec, ) -> String { record_file_log(logs, Info, file, "Formatting started."); - let mut old_text = remove_extra_newlines(text); - if !args.usetabs { - old_text = remove_tabs(&old_text, args); - } - old_text = remove_trailing_spaces(&old_text); + // Clean the source file and zip its lines with line numbers + let old_text = clean_text(old_text, args); + let mut old_lines = zip(1.., old_text.lines()); + + // Initialise let mut state = State::new(); - let old_lines = old_text.lines(); - let mut old_lines = zip(1.., old_lines); let mut queue: Vec<(usize, String)> = vec![]; - let mut new_text = String::with_capacity(2 * text.len()); + let mut new_text = String::with_capacity(2 * old_text.len()); + + // Select the character used for indentation. let indent_char = if args.usetabs { "\t" } else { " " }; loop { if let Some((linum_old, mut line)) = queue.pop() { + // Read the patterns present on this line. let pattern = Pattern::new(&line); - let temp_state: State; - (line, temp_state) = apply_indent( + + // Temporary state for working on this line. + let mut temp_state = state.clone(); + + // Update the state with the line number from the queue. + temp_state.linum_old = linum_old; + + // If the line should not be ignored ... + if !set_ignore_and_report( &line, - linum_old, - &state, + &mut temp_state, logs, file, - args, &pattern, - indent_char, - ); - if needs_env_new_line(&line, &temp_state, &pattern) { - let env_lines = - put_env_new_line(&line, &temp_state, file, args, logs); - if env_lines.is_some() { - queue.push((linum_old, env_lines.clone().unwrap().1)); - queue.push((linum_old, env_lines.clone().unwrap().0)); - } else { - state = temp_state; - new_text.push_str(&line); - new_text.push_str(LINE_END); - state.linum_new += 1; - }; - } else if needs_wrap(&line, &temp_state, args) { - let wrapped_lines = - apply_wrap(&line, &temp_state, file, args, logs); - if wrapped_lines.is_some() { - queue.push((linum_old, wrapped_lines.clone().unwrap().1)); - queue.push((linum_old, wrapped_lines.clone().unwrap().0)); - } else { - state = temp_state; - new_text.push_str(&line); - new_text.push_str(LINE_END); - state.linum_new += 1; - }; - } else { - state = temp_state; - new_text.push_str(&line); - new_text.push_str(LINE_END); - state.linum_new += 1; + ) { + // Check if the line should be split because of a pattern that should begin on a new line. + if needs_env_new_line(&line, &temp_state, &pattern) { + // Split the line into two ... + let (this_line, next_line) = + put_env_new_line(&line, &temp_state, file, args, logs); + // ... and queue the second part for formatting. + queue.push((linum_old, next_line.to_string())); + line = this_line.to_string(); + } + + // Calculate the indent based on the current state and the patterns in the line. + let indent = calculate_indent( + &line, + &mut temp_state, + logs, + file, + args, + &pattern, + ); + + let indent_length = usize::try_from(indent.visual * args.tab) + .expect("Visual indent is non-negative."); + + // Wrap the line before applying the indent, and loop back if the line needed wrapping. + if needs_wrap(line.trim_start(), indent_length, args) { + let wrapped_lines = apply_wrap( + line.trim_start(), + indent_length, + &temp_state, + file, + args, + logs, + ); + if let Some([this_line, next_line_start, next_line]) = + wrapped_lines + { + queue.push(( + linum_old, + [next_line_start, next_line].concat(), + )); + queue.push((linum_old, this_line.to_string())); + continue; + } + } + + // Lastly, apply the indent if the line didn't need wrapping. + line = apply_indent(&line, &indent, args, indent_char); } + + // Add line to new text + state = temp_state; + new_text.push_str(&line); + new_text.push_str(LINE_END); + state.linum_new += 1; } else if let Some((linum_old, line)) = old_lines.next() { queue.push((linum_old, line.to_string())); } else { @@ -84,7 +113,7 @@ pub fn format_file( } } - if !indents_return_to_zero(&new_text) { + if !indents_return_to_zero(&state) { record_file_log(logs, Warn, file, "Indent does not return to zero."); } @@ -93,6 +122,35 @@ pub fn format_file( new_text } +/// Sets the `ignore` and `verbatim` flags in the given [State] based on `line` and returns whether `line` should be +/// ignored by formatting. +fn set_ignore_and_report( + line: &str, + temp_state: &mut State, + logs: &mut Vec, + file: &str, + pattern: &Pattern, +) -> bool { + temp_state.ignore = get_ignore(line, temp_state, logs, file, true); + temp_state.verbatim = + get_verbatim(line, temp_state, logs, file, true, pattern); + + temp_state.verbatim.visual || temp_state.ignore.visual +} + +/// Cleans the given text by removing extra line breaks and trailing spaces, and tabs if they shouldn't be used. +fn clean_text(text: &str, args: &Cli) -> String { + let mut text = remove_extra_newlines(text); + + if !args.usetabs { + text = remove_tabs(&text, args); + } + + text = remove_trailing_spaces(&text); + + text +} + /// Information on the current state during formatting #[derive(Clone, Debug)] pub struct State { @@ -143,6 +201,7 @@ impl Pattern { } /// Ensure that the indentation returns to zero at the end of the file -fn indents_return_to_zero(text: &str) -> bool { - !text.lines().last().unwrap_or_default().starts_with(' ') +fn indents_return_to_zero(state: &State) -> bool { + #![allow(clippy::missing_const_for_fn)] + state.indent.actual == 0 } diff --git a/src/indent.rs b/src/indent.rs index 0bf63d2..0800af6 100644 --- a/src/indent.rs +++ b/src/indent.rs @@ -3,10 +3,8 @@ use crate::cli::*; use crate::comments::*; use crate::format::*; -use crate::ignore::*; use crate::logging::*; use crate::regexes::*; -use crate::verbatim::*; use core::cmp::max; use log::Level::{Trace, Warn}; @@ -109,78 +107,82 @@ fn get_indent(line: &str, prev_indent: &Indent, pattern: &Pattern) -> Indent { Indent { actual, visual } } -/// Apply the correct indentation to a line -pub fn apply_indent( +/// Calculates the indent for `line` based on its contents. This functions saves the calculated [Indent], which might be +/// negative, to the given [State], and then ensures that the returned [Indent] is non-negative. +pub fn calculate_indent( line: &str, - linum_old: usize, - state: &State, + state: &mut State, logs: &mut Vec, file: &str, args: &Cli, pattern: &Pattern, - indent_char: &str, -) -> (String, State) { - #![allow(clippy::too_many_arguments)] - let mut new_state = state.clone(); - new_state.linum_old = linum_old; - - new_state.ignore = get_ignore(line, &new_state, logs, file, true); - new_state.verbatim = - get_verbatim(line, &new_state, logs, file, true, pattern); +) -> Indent { + // Calculate the new indent by first removing the comment from the line (if there is one) to ignore diffs from + // characters in there. + let comment_index = find_comment_index(line); + let line_strip = remove_comment(line, comment_index); + let mut indent = get_indent(line_strip, &state.indent, pattern); + + // Record the indent to the logs. + if args.trace { + record_line_log( + logs, + Trace, + file, + state.linum_new, + state.linum_old, + line, + &format!( + "Indent: actual = {}, visual = {}:", + indent.actual, indent.visual + ), + ); + } - let new_line = if new_state.verbatim.visual || new_state.ignore.visual { - line.to_string() - } else { - // calculate indent - let comment_index = find_comment_index(line); - let line_strip = &remove_comment(line, comment_index); - let mut indent = get_indent(line_strip, &state.indent, pattern); - new_state.indent = indent.clone(); - if args.trace { - record_line_log( - logs, - Trace, - file, - state.linum_new, - new_state.linum_old, - line, - &format!( - "Indent: actual = {}, visual = {}:", - indent.actual, indent.visual - ), - ); - } + // Save the indent to the state. Note, this indent might be negative; it is saved without correction so that this is + // not forgotten for the next iterations. + state.indent = indent.clone(); + + // However, we can't negatively indent a line. So we log the negative indent and reset the values to 0. + if (indent.visual < 0) || (indent.actual < 0) { + record_line_log( + logs, + Warn, + file, + state.linum_new, + state.linum_old, + line, + "Indent is negative.", + ); + indent.actual = indent.actual.max(0); + indent.visual = indent.visual.max(0); + } - if (indent.visual < 0) || (indent.actual < 0) { - record_line_log( - logs, - Warn, - file, - new_state.linum_new, - new_state.linum_old, - line, - "Indent is negative.", - ); - indent.actual = indent.actual.max(0); - indent.visual = indent.visual.max(0); - } + indent +} - // apply indent - let trimmed_line = line.trim_start(); - if trimmed_line.is_empty() { - String::new() - } else { - let n_indent_chars = - usize::try_from(indent.visual * args.tab).unwrap(); - let mut new_line = - String::with_capacity(trimmed_line.len() + n_indent_chars); - for idx in 0..n_indent_chars { - new_line.insert_str(idx, indent_char); - } - new_line.insert_str(n_indent_chars, trimmed_line); - new_line +/// Apply the given indentation to a line +pub fn apply_indent( + line: &str, + indent: &Indent, + args: &Cli, + indent_char: &str, +) -> String { + // Remove white space from the start of the line + let trimmed_line = line.trim_start(); + + // If the line is now empty, return a new empty String + if trimmed_line.is_empty() { + String::new() + // Otherwise, allocate enough memory to fit line with the added indentation and insert the appropriate string slices + } else { + let n_indent_chars = usize::try_from(indent.visual * args.tab).unwrap(); + let mut new_line = + String::with_capacity(trimmed_line.len() + n_indent_chars); + for idx in 0..n_indent_chars { + new_line.insert_str(idx, indent_char); } - }; - - (new_line, new_state) + new_line.insert_str(n_indent_chars, trimmed_line); + new_line + } } diff --git a/src/regexes.rs b/src/regexes.rs index 346a1f1..53b1905 100644 --- a/src/regexes.rs +++ b/src/regexes.rs @@ -51,4 +51,17 @@ lazy_static! { Regex::new(r"(?P\S.*?)(?P\\end\{)").unwrap(); pub static ref RE_ITEM_SHARED_LINE: Regex = Regex::new(r"(?P\S.*?)(?P\\item)").unwrap(); + // Regex that matches any splitting command with non-whitespace characters before it and catches the previous text + // in a group called "prev" and captures the command itself and the remaining text in a group called "env". + pub static ref RE_ENV_ITEM_SHARED_LINE: Regex = Regex::new( + r"(?x) # Enable extended mode + (?P\S.*?) # : captures any number of characters starting with a non-whitespace + # character until the start of the next group; + (?P( # : captures any LaTeX command before which the line should be split + \\begin\{ # start of environments + |\\end\{ # end of environments + |\\item ) # list items (note the space before the closing bracket) + .*)" // and any characters that follow the command + ) + .unwrap(); } diff --git a/src/subs.rs b/src/subs.rs index 30b9894..01a337f 100644 --- a/src/subs.rs +++ b/src/subs.rs @@ -31,24 +31,58 @@ pub fn needs_env_new_line( state: &State, pattern: &Pattern, ) -> bool { - !state.verbatim.visual + // Check if we should format this line and if we've matched an environment. + let not_ignored_and_contains_env = !state.verbatim.visual && !state.ignore.visual && (pattern.contains_env_begin || pattern.contains_env_end || pattern.contains_item) && (RE_ENV_BEGIN_SHARED_LINE.is_match(line) || RE_ENV_END_SHARED_LINE.is_match(line) - || RE_ITEM_SHARED_LINE.is_match(line)) + || RE_ITEM_SHARED_LINE.is_match(line)); + + // If we're not ignoring and we've matched an environment ... + if not_ignored_and_contains_env { + // ... return `true` if the comment index is `None` (which implies the split point must be in text), otherwise + // compare the index of the comment with the split point. + find_comment_index(line).map_or(true, |comment_index| { + if RE_ENV_ITEM_SHARED_LINE + .captures(line) + .unwrap() // Doesn't panic because we've matched split point. + .get(2) + .unwrap() // Doesn't panic because the regex has 4 groups so index 2 is in bounds. + .start() + > comment_index + { + // If the split point is past the comment index, then we don't split the line, + false + } else { + // otherwise, the split point is before the comment and we do split the line. + true + } + }) + } else { + // If we're ignoring or we didn't match an environment, we don't need a new line. + false + } } -/// Ensure LaTeX environments begin on new lines -pub fn put_env_new_line( - line: &str, +/// Ensure LaTeX environments begin on new lines. +/// +/// Returns a tuple containing: +/// 1. a reference to the line that was given, shortened because of the split +/// 2. a reference to the part of the line that was split +pub fn put_env_new_line<'a>( + line: &'a str, state: &State, file: &str, args: &Cli, logs: &mut Vec, -) -> Option<(String, String)> { +) -> (&'a str, &'a str) { + let captures = RE_ENV_ITEM_SHARED_LINE.captures(line).unwrap(); + + let (line, [prev, rest, _]) = captures.extract(); + if args.trace { record_line_log( logs, @@ -60,31 +94,5 @@ pub fn put_env_new_line( "Placing environment on new line.", ); } - let comment_index = find_comment_index(line); - let comment = get_comment(line, comment_index); - let mut text = remove_comment(line, comment_index); - let mut temp = RE_ENV_BEGIN_SHARED_LINE - .replace(text, format!("$prev{LINE_END}$env")) - .to_string(); - text = &temp; - if !text.contains(LINE_END) { - temp = RE_ENV_END_SHARED_LINE - .replace(text, format!("$prev{LINE_END}$env")) - .to_string(); - text = &temp; - } - if !text.contains(LINE_END) { - temp = RE_ITEM_SHARED_LINE - .replace(text, format!("$prev{LINE_END}$env")) - .to_string(); - text = &temp; - } - if text.contains(LINE_END) { - let split = text.split_once(LINE_END).unwrap(); - let split_0 = split.0.to_string(); - let mut split_1 = split.1.to_string(); - split_1.push_str(comment); - return Some((split_0, split_1)); - } - None + (prev, rest) } diff --git a/src/tests.rs b/src/tests.rs index 43e1e5f..c4c9a18 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -51,16 +51,14 @@ fn read_files_from_dir(dir: &str) -> Vec { #[test] fn test_source() { let source_files = read_files_from_dir("./tests/source/"); - let mut fail = false; for file in source_files { if !test_file( &format!("tests/source/{file}"), &format!("tests/target/{file}"), ) { - fail = true; + panic!("Failed in {file}"); } } - assert!(!fail, "Some tests failed"); } #[test] diff --git a/src/wrap.rs b/src/wrap.rs index 30ad1de..1961149 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -5,22 +5,46 @@ use crate::comments::*; use crate::format::*; use crate::logging::*; use log::Level::{Trace, Warn}; +use unicode_width::UnicodeWidthChar; + +/// String slice to start wrapped text lines +pub const TEXT_LINE_START: &str = ""; +/// String slice to start wrapped comment lines +pub const COMMENT_LINE_START: &str = "% "; /// Check if a line needs wrapping -pub fn needs_wrap(line: &str, state: &State, args: &Cli) -> bool { +pub fn needs_wrap(line: &str, indent_length: usize, args: &Cli) -> bool { !args.keep - && !state.verbatim.visual - && !state.ignore.visual - && (line.chars().count() > args.wrap.into()) + && ({ + let mut line_length = 0; + for c in line.chars() { + line_length += + c.width().expect("Why control character in text?"); + } + line_length + } + indent_length + > args.wrap.into()) } /// Find the best place to break a long line -fn find_wrap_point(line: &str, args: &Cli) -> Option { +fn find_wrap_point( + line: &str, + indent_length: usize, + args: &Cli, +) -> Option { let mut wrap_point: Option = None; let mut after_char = false; let mut prev_char: Option = None; - for (i, c) in line.chars().enumerate() { - if i >= args.wrap_min.into() && wrap_point.is_some() { + + let mut line_width = 0; + + let wrap_boundary = usize::from(args.wrap_min) - indent_length; + + // Return *byte* index rather than *char* index. + for (i, c) in line.char_indices() { + line_width += c.width().expect("No control characters in text."); + + if line_width > wrap_boundary && wrap_point.is_some() { break; } if c == ' ' && prev_char != Some('\\') { @@ -36,13 +60,14 @@ fn find_wrap_point(line: &str, args: &Cli) -> Option { } /// Wrap a long line into a short prefix and a suffix -pub fn apply_wrap( - line: &str, +pub fn apply_wrap<'a>( + line: &'a str, + indent_length: usize, state: &State, file: &str, args: &Cli, logs: &mut Vec, -) -> Option<(String, String)> { +) -> Option<[&'a str; 3]> { if args.trace { record_line_log( logs, @@ -54,7 +79,7 @@ pub fn apply_wrap( "Wrapping long line.", ); } - let wrap_point = find_wrap_point(line, args); + let wrap_point = find_wrap_point(line, indent_length, args); let comment_index = find_comment_index(line); match wrap_point { @@ -73,11 +98,15 @@ pub fn apply_wrap( }; wrap_point.map(|p| { - let line_start = - comment_index.map_or("", |c| if p > c { "%" } else { "" }); - let line_1: String = line.chars().take(p).collect(); - let mut line_2: String = line.chars().skip(p).collect(); - line_2.insert_str(0, line_start); - (line_1, line_2) + let this_line = &line[0..p]; + let next_line_start = comment_index.map_or("", |c| { + if p > c { + COMMENT_LINE_START + } else { + TEXT_LINE_START + } + }); + let next_line = &line[p + 1..]; + [this_line, next_line_start, next_line] }) } diff --git a/tests/source/environment_lines.tex b/tests/source/environment_lines.tex index 70890ac..969f697 100644 --- a/tests/source/environment_lines.tex +++ b/tests/source/environment_lines.tex @@ -21,7 +21,7 @@ \end{env2} \end{env1} % environments all on same line -\begin{env1}\begin{env2}\end{env2}\end{env1} % with a comment +\begin{env1}\begin{env2}\end{env2}\end{env1} % with a comment \begin{env1} % environments with extra brackets \begin{env1}(a)(b \begin{env2}[c{d}e] \end{env2}[f]g)\end{env1} diff --git a/tests/target/environment_lines.tex b/tests/target/environment_lines.tex index b8362fa..c75202c 100644 --- a/tests/target/environment_lines.tex +++ b/tests/target/environment_lines.tex @@ -28,7 +28,7 @@ \begin{env1} \begin{env2} \end{env2} -\end{env1} % with a comment +\end{env1} % with a comment \begin{env1} % environments with extra brackets \begin{env1}(a)(b