-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmorse_token.cpp
164 lines (152 loc) · 5.93 KB
/
morse_token.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
///\file morse_token.cpp Implementation of Morse code operations
#include "morse_token.h"
#include "actions.h" // for txError
#define MORSE_UNDEFINED_CHAR '^'
/// Morse code for letters encoded as an implicit binary tree.
/// Letters are written in breadth first order. Left child
/// corresponds to dit; right child corresponds to dah.
/// Caret is used for undefined (or incomplete) codes.
/// The root of the tree acts as a sentinel to simplify the process of
/// converting the loop index into our left-justified code scheme.
///
/// Here are some examples to show how it works:
///
/// Index | Letter | Morse Code | Notes
/// ----------|--------|--------------|------
/// \c 000000 | N/A | N/A | Invalid code
/// \c 000001 | space | N/A | Sentinel followed by zero bits (represents a pause)
/// \c 000010 | \c E | `.` | Sentinel followed by a dit
/// \c 000011 | \c T | `_` | Sentinel followed by a dah
/// \c 000100 | \c I | `. .` | Sentinel followed by two dits
/// \c 000101 | \c A | `. _` | And so on...
/// \c 000110 | \c N | `_ .` |
/// \c 000111 | \c M | `_ _` |
/// \c 001000 | \c S | `. . .` |
/// \c 001001 | \c U | `. . _` |
/// \c 001010 | \c R | `. _ .` |
/// \c 001011 | \c W | `. _ _` |
/// \c 001100 | \c D | `_ . .` |
/// \c 001101 | \c K | `_ . _` |
/// \c 001110 | \c G | `_ _ .` |
/// \c 001111 | \c O | `_ _ _` |
const char morseTree[] =
"^^"
"ET" // 1 symbol
"IANM" // 2 symbols
"SURWDKGO" // 3 symbols
"HVF^L\nPJBXCYZQ!^" // 4 symbols
"54^3^^^2&/+^^^^16=/^^^(^7^^^8^90" // 5 symbols
"^^^^$^^^^^^^?_^^^^\"^^.^^^^@^^^'^^-^^^^^^^^;!^)^^^^^,^^^^:^^^^^^";
const char* morsePunctuation = "!\"$&'()+,-./:;=?@_";
/// Convert a Morse code into ASCII.
char m2a(MorseToken code) {
char result('x');
if (!code.valid()) {
; // Return 'x' for error.
} else if (code.pause()) {
result = ' ';
} else {
int length(-2);
uint8_t t(MORSE_MSB | (code >> 1));
while (t) {
t <<= 1;
++length;
}
uint8_t raw(code);
raw >>= (CHAR_BIT * sizeof MorseToken(0) - length);
raw |= 1 << length;
// Translate the subset of the prosigns/abbreviations that can
// be represented by a single ASCII character, but are not
// present in morseTree. These tokens have 7 symbols. Dollary
// sign is recognized as a six symbol prefix of its seven symbol
// code.
//
// Note: MORSE_ERROR does not fit in the 8-bit representation of
// MorseToken.
switch (raw) {
case MORSE_DOLLAR: result = '$'; break;
case MORSE_SS: result = '#'; break;
default:
if (raw < sizeof morseTree) {
result = morseTree[raw];
} else {
; // Nothing more to do since result is initialized to 'x'.
}
}
}
return result;
}
/// Convert a single ASCII character into a Morse code token.
MorseToken a2m(char a) {
uint8_t letterCode(0);
ptrdiff_t key(strchr(morseTree, a) - morseTree);
if (0 < key && morseTree[key] != MORSE_UNDEFINED_CHAR) {
// Add the sentinel to the right.
letterCode = key << 1 | 1;
while (!(letterCode & MORSE_MSB)) {
letterCode <<= 1;
}
letterCode <<= 1;
} else {
// The codes below would require more rows in morseTree and
// the rows would be mostly empty, so they are handled here.
switch (a) {
case '\n': letterCode = MORSE_AA; break;
case '&': letterCode = MORSE_AMPER; break;
case ')': letterCode = MORSE_CLOSEP; break;
case '$': letterCode = MORSE_DOLLAR; break;
default:
; // Nothing more to do since letterCode is initialized to 0
// (invalid).
}
}
return MorseToken(letterCode);
}
/// The given Morse code token should be a digit. Return the
/// corresponding number. If the input is not a digit, transmits the
/// error prosign and returns an undefined value.
uint8_t m2i(MorseToken m) {
uint8_t result(m.toChar() - '0');
if (9 < result) {
txError();
}
return result;
}
/**
Given an ASCII string representing a prosign, return a
code composed by concatenating the morse code for each
character in the prosign without inter-symbol gaps.
For example "AB" yields
String | Morse Code | Binary
-------|---------------------------|-------
\c A | \c . _ | \c 01100000
\c B | \c _ . . . | \c 10001000
\c AB | \c . _ _ . . . | \c 01100010
If the translation is longer than will fit in MorseToken, the return
value will be the longest suffix of the input that does fit. All the
prosigns I'm aware of fit without truncation if MorseToken is 16 bits
or more. Most fit in 7 bits with the notable exception of
\c MORSE_ERROR, which requires 9 bits (8 symbols + 1 sentinel).
Unfortunately, there are many ways of denoting 8 dits, so the special
case isn't so easy to code, e.g. \c ^HH^, \c ^5S^, \c ^S5^, \c ^IIII^,
\c ^IIH^, \c ^EEEEEEEE^, etc.
*/
MorseToken a2p(const char* prosign) {
uint8_t letterCode(MorseToken(prosign[0], MorseToken::Char));
uint8_t t(letterCode);
size_t length(0);
while (t) { ++length; t <<= 1; }
const char* p(prosign + 1);
while (*p) {
char a(*p);
uint8_t m(MorseToken(a, MorseToken::Char));
// Erase the sentinel (always 1).
letterCode ^= 1 << (wordBits - length);
// Shift the new symbol and or it in after the previous.
letterCode |= m >> (length - 1);
// Update the length.
while (m) { ++length; m <<= 1; }
++p;
}
return MorseToken(letterCode);
}