Skip to content

Commit

Permalink
Basic support for UTF-16 surrogates
Browse files Browse the repository at this point in the history
  • Loading branch information
alabuzhev committed Jun 6, 2021
1 parent c5b980b commit 45efcc1
Show file tree
Hide file tree
Showing 11 changed files with 211 additions and 94 deletions.
5 changes: 5 additions & 0 deletions far/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
--------------------------------------------------------------------------------
drkns 06.06.2021 21:01:06 +0100 - build 5814

1. Basic support for UTF-16 surrogates.

--------------------------------------------------------------------------------
drkns 05.06.2021 23:26:24 +0100 - build 5813

Expand Down
97 changes: 76 additions & 21 deletions far/console.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
static bool sWindowMode;
static bool sEnableVirtualTerminal;

constexpr auto bad_char_replacement = L' ';

wchar_t ReplaceControlCharacter(wchar_t const Char)
{
Expand Down Expand Up @@ -149,18 +150,18 @@ wchar_t ReplaceControlCharacter(wchar_t const Char)
}
}

static void sanitise_dbsc_pair(FAR_CHAR_INFO& First, FAR_CHAR_INFO& Second)
static bool sanitise_dbsc_pair(FAR_CHAR_INFO& First, FAR_CHAR_INFO& Second)
{
if (!(First.Attributes.Flags & COMMON_LVB_LEADING_BYTE) && !(Second.Attributes.Flags & COMMON_LVB_TRAILING_BYTE))
{
// Not DBSC, awesome
return;
}

const auto
IsFirst = flags::check_any(First.Attributes.Flags, COMMON_LVB_LEADING_BYTE),
IsSecond = flags::check_any(Second.Attributes.Flags, COMMON_LVB_TRAILING_BYTE);

if (!IsFirst && !IsSecond)
{
// Not DBSC, awesome
return false;
}

flags::clear(First.Attributes.Flags, COMMON_LVB_LEADING_BYTE);
flags::clear(Second.Attributes.Flags, COMMON_LVB_TRAILING_BYTE);

Expand All @@ -170,14 +171,48 @@ static void sanitise_dbsc_pair(FAR_CHAR_INFO& First, FAR_CHAR_INFO& Second)
flags::set(First.Attributes.Flags, COMMON_LVB_LEADING_BYTE);
flags::set(Second.Attributes.Flags, COMMON_LVB_TRAILING_BYTE);

return;
return false;
}

if (IsFirst)
First.Char = L' ';
First.Char = bad_char_replacement;

if (IsSecond)
Second.Char = L' ';
Second.Char = bad_char_replacement;

return true;
}

static bool sanitise_surrogate_pair(FAR_CHAR_INFO& First, FAR_CHAR_INFO& Second)
{
const auto
IsFirst = encoding::utf16::is_high_surrogate(First.Char),
IsSecond = encoding::utf16::is_low_surrogate(Second.Char);

if (!IsFirst && !IsSecond)
{
// Not surrogate, awesome
return false;
}

if (encoding::utf16::is_valid_surrogate_pair(First.Char, Second.Char) && First.Attributes == Second.Attributes)
{
// Valid surrogate, awesome
return false;
}

if (IsFirst)
First.Char = bad_char_replacement;

if (IsSecond)
Second.Char = bad_char_replacement;

return true;
}

void sanitise_pair(FAR_CHAR_INFO& First, FAR_CHAR_INFO& Second)
{
sanitise_dbsc_pair(First, Second) || sanitise_surrogate_pair(First, Second);
}

static COORD make_coord(point const& Point)
Expand Down Expand Up @@ -804,14 +839,14 @@ namespace console_detail
{
if (n != Input.size() - 1)
{
sanitise_dbsc_pair(Cell, Input[n + 1]);
sanitise_pair(Cell, Input[n + 1]);
}

if (Cell.Attributes.Flags & COMMON_LVB_TRAILING_BYTE)
{
if (!LeadingChar)
{
Cell.Char = L' ';
Cell.Char = bad_char_replacement;
flags::clear(Cell.Attributes.Flags, COMMON_LVB_TRAILING_BYTE);
}
else if (Cell.Char == *LeadingChar)
Expand All @@ -820,6 +855,10 @@ namespace console_detail
continue;
}
}
else if (!n && encoding::utf16::is_low_surrogate(Cell.Char))
{
Cell.Char = bad_char_replacement;
}

LeadingChar.reset();

Expand All @@ -828,13 +867,17 @@ namespace console_detail
if (n == Input.size() - 1)
{
flags::clear(Cell.Attributes.Flags, COMMON_LVB_LEADING_BYTE);
Cell.Char = L' ';
Cell.Char = bad_char_replacement;
}
else
{
LeadingChar = Cell.Char;
}
}
else if (n == Input.size() - 1 && encoding::utf16::is_high_surrogate(Cell.Char))
{
Cell.Char = bad_char_replacement;
}
}

if (!LastColor.has_value() || Cell.Attributes != *LastColor)
Expand All @@ -846,7 +889,7 @@ namespace console_detail
if (CharWidthEnabled && Cell.Char == encoding::replace_char && Cell.Attributes.Reserved[0] > std::numeric_limits<wchar_t>::max())
{
const auto Pair = encoding::utf16::to_surrogate(Cell.Attributes.Reserved[0]);
Str.append(ALL_CONST_RANGE(Pair));
append(Str, Pair.first, Pair.second);
}
else
{
Expand Down Expand Up @@ -999,18 +1042,29 @@ namespace console_detail
if (Cell.Attributes.Flags & COMMON_LVB_TRAILING_BYTE)
{
flags::clear(Cell.Attributes.Flags, COMMON_LVB_TRAILING_BYTE);
Cell.Char = L' ';
Cell.Char = bad_char_replacement;
}
else if (encoding::utf16::is_low_surrogate(Cell.Char))
{
Cell.Char = bad_char_replacement;
}
}

if (Col != SubRect.width() - 1)
{
sanitise_dbsc_pair(Cell, Buffer[SubRect.top + Row][SubRect.left + Col + 1]);
sanitise_pair(Cell, Buffer[SubRect.top + Row][SubRect.left + Col + 1]);
}
else if (Cell.Attributes.Flags & COMMON_LVB_LEADING_BYTE)
else
{
flags::clear(Cell.Attributes.Flags, COMMON_LVB_LEADING_BYTE);
Cell.Char = L' ';
if (Cell.Attributes.Flags & COMMON_LVB_LEADING_BYTE)
{
flags::clear(Cell.Attributes.Flags, COMMON_LVB_LEADING_BYTE);
Cell.Char = bad_char_replacement;
}
else if (encoding::utf16::is_high_surrogate(Cell.Char))
{
Cell.Char = bad_char_replacement;
}
}

ConsoleBuffer.emplace_back(CHAR_INFO{ { ReplaceControlCharacter(Cell.Char) }, colors::FarColorToConsoleColor(Cell.Attributes) });
Expand Down Expand Up @@ -1618,8 +1672,9 @@ namespace console_detail
return false;

DWORD Written;
auto Pair = encoding::utf16::to_surrogate(Codepoint);
if (!WriteConsole(m_WidthTestScreen.native_handle(), Pair.data(), Pair[1]? 2 : 1, &Written, {}))
const auto Pair = encoding::utf16::to_surrogate(Codepoint);
std::array Chars = { Pair.first, Pair.second };
if (!WriteConsole(m_WidthTestScreen.native_handle(), Chars.data(), Pair.second? 2 : 1, &Written, {}))
return false;

CONSOLE_SCREEN_BUFFER_INFO Info;
Expand Down
30 changes: 24 additions & 6 deletions far/edit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,7 @@ bool Edit::ProcessKey(const Manager::Key& Key)
{
AdjustPersistentMark();

const auto SavedCurPos = m_CurPos;
RecurseProcessKey(KEY_LEFT);

if (!m_Flags.Check(FEDITLINE_MARKINGBLOCK))
Expand All @@ -682,7 +683,7 @@ bool Edit::ProcessKey(const Manager::Key& Key)
Select(m_SelStart,m_CurPos);
else
{
int EndPos=m_CurPos+1;
int EndPos = SavedCurPos;
int NewStartPos=m_CurPos;

if (EndPos>m_Str.size())
Expand All @@ -703,23 +704,27 @@ bool Edit::ProcessKey(const Manager::Key& Key)
{
AdjustPersistentMark();

const auto SavedCurPos = m_CurPos;
RecurseProcessKey(KEY_RIGHT);

if (!m_Flags.Check(FEDITLINE_MARKINGBLOCK))
{
RemoveSelection();
m_Flags.Set(FEDITLINE_MARKINGBLOCK);
}

if ((m_SelStart!=-1 && m_SelEnd==-1) || m_SelEnd>m_CurPos)
if ((m_SelStart != -1 && m_SelEnd == -1) || m_SelEnd > SavedCurPos)
{
if (m_CurPos+1==m_SelEnd)
if (m_CurPos == m_SelEnd)
RemoveSelection();
else
Select(m_CurPos+1,m_SelEnd);
Select(m_CurPos, m_SelEnd);
}
else
AddSelect(m_CurPos,m_CurPos+1);
AddSelect(SavedCurPos, m_CurPos);

Show();

RecurseProcessKey(KEY_RIGHT);
return true;
}
case KEY_CTRLSHIFTLEFT: case KEY_CTRLSHIFTNUMPAD4:
Expand Down Expand Up @@ -1044,6 +1049,10 @@ bool Edit::ProcessKey(const Manager::Key& Key)
{
SetPrevCurPos(m_CurPos);
m_CurPos--;

if (m_CurPos && is_valid_surrogate_pair_at(m_CurPos - 1))
--m_CurPos;

Show();
}

Expand All @@ -1064,6 +1073,9 @@ bool Edit::ProcessKey(const Manager::Key& Key)
else
m_CurPos++;

if (m_CurPos && is_valid_surrogate_pair_at(m_CurPos - 1))
++m_CurPos;

Show();
return true;
}
Expand Down Expand Up @@ -2386,6 +2398,12 @@ Editor* Edit::GetEditor() const
return nullptr;
}

bool Edit::is_valid_surrogate_pair_at(size_t const Position) const
{
string_view const Str(m_Str);
return Position < Str.size() && is_valid_surrogate_pair(Str.substr(Position));
}

#ifdef ENABLE_TESTS

#include "testing.hpp"
Expand Down
2 changes: 2 additions & 0 deletions far/edit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ class Edit: public SimpleScreenObject
void SetRightCoord(int Value) { SetPosition({ m_Where.left, m_Where.top, Value, m_Where.bottom }); }
Editor* GetEditor() const;

bool is_valid_surrogate_pair_at(size_t Position) const;

protected:
// BUGBUG: the whole purpose of this class is to avoid zillions of casts in existing code by returning size() as int
// Remove it after fixing all signed/unsigned mess
Expand Down
57 changes: 30 additions & 27 deletions far/editor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1069,37 +1069,38 @@ bool Editor::ProcessKeyInternal(const Manager::Key& Key, bool& Refresh)
if (!CurPos && m_it_CurLine == Lines.begin())
return true;

if (!CurPos) //курсор в начале строки
const auto OldCur = m_it_CurLine;
Pasting++;
ProcessKeyInternal(Manager::Key(KEY_LEFT), Refresh);
Pasting--;

if (OldCur == m_it_CurLine)
{
const auto PrevLine = std::prev(m_it_CurLine);
if (SelAtBeginning) //курсор в начале блока
if (SelAtBeginning || SelFirst)
{
m_it_AnyBlockStart = PrevLine;
PrevLine->Select(PrevLine->GetLength(), -1);
m_it_CurLine->Select(m_it_CurLine->GetCurPos(), SelEnd);
}
else // курсор в конце блока
else
{
m_it_CurLine->RemoveSelection();
PrevLine->GetRealSelection(SelStart, SelEnd);
PrevLine->Select(SelStart, PrevLine->GetLength());
m_it_CurLine->Select(SelStart, m_it_CurLine->GetCurPos());
}
}
else
{
if (SelAtBeginning || SelFirst)
const auto PrevLine = std::prev(m_it_CurLine);
if (SelAtBeginning) //курсор в начале блока
{
m_it_CurLine->Select(SelStart-1,SelEnd);
m_it_AnyBlockStart = m_it_CurLine;
m_it_CurLine->Select(m_it_CurLine->GetLength(), -1);
}
else
else // курсор в конце блока
{
m_it_CurLine->Select(SelStart,SelEnd-1);
OldCur->RemoveSelection();
m_it_CurLine->GetRealSelection(SelStart, SelEnd);
m_it_CurLine->Select(SelStart, PrevLine->GetLength());
}
}

Pasting++;
ProcessKeyInternal(Manager::Key(KEY_LEFT), Refresh);
Pasting--;

Refresh = true;
return true;
}
Expand All @@ -1110,21 +1111,23 @@ bool Editor::ProcessKeyInternal(const Manager::Key& Key, bool& Refresh)
return true;
}

if (SelAtBeginning)
{
m_it_CurLine->Select(SelStart+1,SelEnd);
}
else
{
m_it_CurLine->Select(SelStart,SelEnd+1);
}

const auto OldCur = m_it_CurLine;
Pasting++;
ProcessKeyInternal(Manager::Key(KEY_RIGHT), Refresh);
Pasting--;

if (OldCur != m_it_CurLine)
if (OldCur == m_it_CurLine)
{
if (SelAtBeginning)
{
m_it_CurLine->Select(m_it_CurLine->GetCurPos(), SelEnd);
}
else
{
m_it_CurLine->Select(SelStart, m_it_CurLine->GetCurPos());
}
}
else
{
if (SelAtBeginning)
{
Expand Down
Loading

0 comments on commit 45efcc1

Please sign in to comment.