Skip to content

Commit f5aba59

Browse files
committed
#82 *Improved UTF8 input/output via console on Windows.
1 parent 67ef347 commit f5aba59

File tree

4 files changed

+71
-10
lines changed

4 files changed

+71
-10
lines changed

HISTORY

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
Oct. 10 2023
2+
Improved UTF8 input/output via console on Windows
3+
14
Sep. 18 2023
25
Improved handling of Unicode quotes
36

core/my_basic.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,11 +1346,11 @@ static bool_t mb_is_little_endian(void);
13461346

13471347
/** Unicode handling */
13481348

1349-
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE
1349+
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING
13501350
static int mb_bytes_to_wchar(const char* sz, wchar_t** out, size_t size);
13511351
static int mb_bytes_to_wchar_ansi(const char* sz, wchar_t** out, size_t size);
13521352
static int mb_wchar_to_bytes(const wchar_t* sz, char** out, size_t size);
1353-
#endif /* MB_CP_VC && MB_ENABLE_UNICODE */
1353+
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING */
13541354

13551355
static int mb_uu_getbom(const char** ch);
13561356
#ifdef MB_ENABLE_UNICODE
@@ -3346,7 +3346,7 @@ static bool_t mb_is_little_endian(void) {
33463346

33473347
/** Unicode handling */
33483348

3349-
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE
3349+
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING
33503350
/* Map a UTF8 character string to a UTF16 (wide character) string */
33513351
static int mb_bytes_to_wchar(const char* sz, wchar_t** out, size_t size) {
33523352
int result = MultiByteToWideChar(CP_UTF8, 0, sz, -1, 0, 0);
@@ -3373,7 +3373,7 @@ static int mb_wchar_to_bytes(const wchar_t* sz, char** out, size_t size) {
33733373

33743374
return result;
33753375
}
3376-
#endif /* MB_CP_VC && MB_ENABLE_UNICODE */
3376+
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING */
33773377

33783378
/* Determine whether a string begins with a BOM, and ignore it */
33793379
static int mb_uu_getbom(const char** ch) {
@@ -4926,7 +4926,7 @@ static int _standard_printer(mb_interpreter_t* s, const char* fmt, ...) {
49264926

49274927
/* Print a string */
49284928
static void _print_string(mb_interpreter_t* s, _object_t* obj) {
4929-
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE
4929+
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING
49304930
char* str = 0;
49314931
_dynamic_buffer_t buf;
49324932
size_t lbuf = 0;
@@ -4940,11 +4940,11 @@ static void _print_string(mb_interpreter_t* s, _object_t* obj) {
49404940
}
49414941
_get_printer(s)(s, "%ls", _WCHAR_BUF_PTR(buf));
49424942
_DISPOSE_BUF(buf);
4943-
#else /* MB_CP_VC && MB_ENABLE_UNICODE */
4943+
#else /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING */
49444944
mb_assert(s && obj);
49454945

49464946
_get_printer(s)(s, "%s", obj->data.string ? obj->data.string : MB_NULL_STRING);
4947-
#endif /* MB_CP_VC && MB_ENABLE_UNICODE */
4947+
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING */
49484948
}
49494949

49504950
/** Parsing helpers */
@@ -18610,7 +18610,7 @@ static int _std_input(mb_interpreter_t* s, void** l) {
1861018610
safe_free(obj->data.variable->data->data.string);
1861118611
}
1861218612
len = (size_t)_get_inputer(s)(s, pmt, line, sizeof(line));
18613-
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE
18613+
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING
1861418614
do {
1861518615
_dynamic_buffer_t buf;
1861618616
_dynamic_buffer_t wbuf;
@@ -18629,12 +18629,12 @@ static int _std_input(mb_interpreter_t* s, void** l) {
1862918629
obj->data.variable->data->data.string = _HEAP_CHAR_BUF(buf);
1863018630
obj->data.variable->data->is_ref = false;
1863118631
} while(0);
18632-
#else /* MB_CP_VC && MB_ENABLE_UNICODE */
18632+
#else /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING */
1863318633
obj->data.variable->data->data.string = mb_memdup(line, (unsigned)(len + 1));
1863418634
#if MB_PRINT_INPUT_CONTENT
1863518635
_get_printer(s)(s, "%s\n", obj->data.variable->data->data.string);
1863618636
#endif /* MB_PRINT_INPUT_CONTENT */
18637-
#endif /* MB_CP_VC && MB_ENABLE_UNICODE */
18637+
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING*/
1863818638
ast = ast->next;
1863918639
} else {
1864018640
_handle_error_on_obj(s, SE_RN_INVALID_ID_USAGE, s->source_file, DON(ast), MB_FUNC_ERR, _exit, result);

core/my_basic.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,10 @@ extern "C" {
153153
# endif
154154
#endif /* MB_ENABLE_UNICODE_ID */
155155

156+
#ifndef MB_UNICODE_NEED_CONVERTING
157+
# define MB_UNICODE_NEED_CONVERTING 0
158+
#endif /* MB_UNICODE_NEED_CONVERTING */
159+
156160
#ifndef MB_ENABLE_FORK
157161
# define MB_ENABLE_FORK
158162
#endif /* MB_ENABLE_FORK */

shell/main.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#ifdef MB_CP_VC
3434
# include <conio.h>
3535
# include <crtdbg.h>
36+
# include <fcntl.h>
37+
# include <io.h>
3638
# include <Windows.h>
3739
#elif !defined MB_CP_BORLANDC && !defined MB_CP_TCC
3840
# include <unistd.h>
@@ -1440,6 +1442,49 @@ static int beep(struct mb_interpreter_t* s, void** l) {
14401442
** Callbacks and handlers
14411443
*/
14421444

1445+
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING
1446+
static int _on_input(struct mb_interpreter_t* s, const char* pmt, char* buf, int n) {
1447+
int result = 0;
1448+
mb_unrefvar(s);
1449+
mb_unrefvar(pmt);
1450+
1451+
if(buf && n) {
1452+
int wlen = n;
1453+
int save = _setmode(_fileno(stdin), _O_U16TEXT);
1454+
wchar_t* wstr = malloc(wlen * sizeof(wchar_t));
1455+
if(fgetws(wstr, wlen, stdin) == 0) {
1456+
_setmode(_fileno(stdin), save);
1457+
1458+
free(wstr);
1459+
1460+
fprintf(stderr, "Error reading.\n");
1461+
1462+
exit(1);
1463+
}
1464+
int len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, 0, 0, 0, 0);
1465+
if(!len) {
1466+
_setmode(_fileno(stdin), save);
1467+
1468+
free(wstr);
1469+
1470+
fprintf(stderr, "Error reading.\n");
1471+
1472+
exit(1);
1473+
}
1474+
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, buf, n, 0, 0);
1475+
free(wstr);
1476+
_setmode(_fileno(stdin), save);
1477+
result = len - 1;
1478+
if(buf[result - 1] == '\n') {
1479+
buf[result - 1] = '\0';
1480+
result--;
1481+
}
1482+
}
1483+
1484+
return result;
1485+
}
1486+
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING */
1487+
14431488
static int _on_prev_stepped(struct mb_interpreter_t* s, void** l, const char* f, int p, unsigned short row, unsigned short col) {
14441489
mb_unrefvar(s);
14451490
mb_unrefvar(l);
@@ -1526,14 +1571,23 @@ static void _on_startup(void) {
15261571
srand((unsigned)_ticks());
15271572
#endif /* _HAS_TICKS */
15281573

1574+
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING
1575+
SetConsoleOutputCP(CP_UTF8);
1576+
SetConsoleCP(CP_UTF8);
1577+
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING */
1578+
15291579
setlocale(LC_ALL, "");
1580+
setlocale(LC_CTYPE, "C");
15301581
setlocale(LC_NUMERIC, "C");
15311582
setlocale(LC_TIME, "C");
15321583

15331584
mb_init();
15341585

15351586
mb_open(&bas);
15361587

1588+
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING
1589+
mb_set_inputer(bas, _on_input);
1590+
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING */
15371591
mb_debug_set_stepped_handler(bas, _on_prev_stepped, _on_post_stepped);
15381592
mb_set_error_handler(bas, _on_error);
15391593
mb_set_import_handler(bas, _on_import);

0 commit comments

Comments
 (0)