From 2ca5f3a386a1ca63433d6d13bb66f5e46fb2a854 Mon Sep 17 00:00:00 2001 From: AJR Date: Tue, 15 Dec 2020 13:36:55 -0500 Subject: [PATCH] unicode.h: Updates - Remove from emu.h (except for UTF8_xxx macros, which have been transplanted to emucore.h since a lot of drivers use them) and osdepend.h - Add std::string_view overrides for uchar_from_utf8 and normalize_unicode --- src/devices/cpu/mips/r4000.cpp | 1 + src/emu/drivenum.cpp | 1 + src/emu/emu.h | 1 - src/emu/emucore.h | 41 +++++++++++++++++++++++++++ src/emu/ioport.cpp | 1 + src/emu/natkeyboard.cpp | 1 + src/emu/rendfont.cpp | 7 +++-- src/emu/rendlay.cpp | 7 +++-- src/emu/softlist_dev.cpp | 2 ++ src/emu/validity.cpp | 1 + src/frontend/mame/ui/selector.cpp | 1 + src/frontend/mame/ui/selgame.cpp | 1 + src/frontend/mame/ui/selsoft.cpp | 1 + src/frontend/mame/ui/text.cpp | 5 ++-- src/frontend/mame/ui/utils.h | 2 ++ src/lib/util/unicode.cpp | 15 ++++++++-- src/lib/util/unicode.h | 44 ++--------------------------- src/osd/modules/input/input_sdl.cpp | 1 + src/osd/modules/netdev/taptun.cpp | 1 + src/osd/osdepend.h | 1 - 20 files changed, 82 insertions(+), 53 deletions(-) diff --git a/src/devices/cpu/mips/r4000.cpp b/src/devices/cpu/mips/r4000.cpp index 3a1699df7a5..f3bb66b28c0 100644 --- a/src/devices/cpu/mips/r4000.cpp +++ b/src/devices/cpu/mips/r4000.cpp @@ -30,6 +30,7 @@ #include "debugger.h" #include "r4000.h" #include "mips3dsm.h" +#include "unicode.h" #include "softfloat3/source/include/softfloat.h" diff --git a/src/emu/drivenum.cpp b/src/emu/drivenum.cpp index 574205379cd..52ac8c54fef 100644 --- a/src/emu/drivenum.cpp +++ b/src/emu/drivenum.cpp @@ -11,6 +11,7 @@ #include "emu.h" #include "drivenum.h" #include "softlist_dev.h" +#include "unicode.h" #include diff --git a/src/emu/emu.h b/src/emu/emu.h index af3b6aada72..0463ff8d332 100644 --- a/src/emu/emu.h +++ b/src/emu/emu.h @@ -38,7 +38,6 @@ // commonly-referenced utilities imported from lib/util #include "palette.h" -#include "unicode.h" #include "strformat.h" #include "vecstream.h" diff --git a/src/emu/emucore.h b/src/emu/emucore.h index d161dde9672..53ee22130e0 100644 --- a/src/emu/emucore.h +++ b/src/emu/emucore.h @@ -188,6 +188,47 @@ constexpr int ROT180 = ORIENTATION_FLIP_X | ORIENTATION_FLIP_Y; constexpr int ROT270 = ORIENTATION_SWAP_XY | ORIENTATION_FLIP_Y; // rotate counter-clockwise 90 degrees +// these are UTF-8 encoded strings for common characters +#define UTF8_NBSP "\xc2\xa0" /* non-breaking space */ + +#define UTF8_MULTIPLY "\xc3\x97" /* multiplication sign */ +#define UTF8_DIVIDE "\xc3\xb7" /* division sign */ +#define UTF8_SQUAREROOT "\xe2\x88\x9a" /* square root symbol */ +#define UTF8_PLUSMINUS "\xc2\xb1" /* plusminus symbol */ + +#define UTF8_POW_2 "\xc2\xb2" /* superscript 2 */ +#define UTF8_POW_X "\xcb\xa3" /* superscript x */ +#define UTF8_POW_Y "\xca\xb8" /* superscript y */ +#define UTF8_PRIME "\xca\xb9" /* prime symbol */ +#define UTF8_DEGREES "\xc2\xb0" /* degrees symbol */ + +#define UTF8_SMALL_PI "\xcf\x80" /* Greek small letter pi */ +#define UTF8_CAPITAL_SIGMA "\xce\xa3" /* Greek capital letter sigma */ +#define UTF8_CAPITAL_DELTA "\xce\x94" /* Greek capital letter delta */ + +#define UTF8_MACRON "\xc2\xaf" /* macron symbol */ +#define UTF8_NONSPACE_MACRON "\xcc\x84" /* nonspace macron, use after another char */ + +#define a_RING "\xc3\xa5" /* small a with a ring */ +#define a_UMLAUT "\xc3\xa4" /* small a with an umlaut */ +#define o_UMLAUT "\xc3\xb6" /* small o with an umlaut */ +#define u_UMLAUT "\xc3\xbc" /* small u with an umlaut */ +#define e_ACUTE "\xc3\xa9" /* small e with an acute */ +#define n_TILDE "\xc3\xb1" /* small n with a tilde */ + +#define A_RING "\xc3\x85" /* capital A with a ring */ +#define A_UMLAUT "\xc3\x84" /* capital A with an umlaut */ +#define O_UMLAUT "\xc3\x96" /* capital O with an umlaut */ +#define U_UMLAUT "\xc3\x9c" /* capital U with an umlaut */ +#define E_ACUTE "\xc3\x89" /* capital E with an acute */ +#define N_TILDE "\xc3\x91" /* capital N with a tilde */ + +#define UTF8_LEFT "\xe2\x86\x90" /* cursor left */ +#define UTF8_RIGHT "\xe2\x86\x92" /* cursor right */ +#define UTF8_UP "\xe2\x86\x91" /* cursor up */ +#define UTF8_DOWN "\xe2\x86\x93" /* cursor down */ + + //************************************************************************** // COMMON MACROS diff --git a/src/emu/ioport.cpp b/src/emu/ioport.cpp index e3b2aa9b473..0ea6dd59d59 100644 --- a/src/emu/ioport.cpp +++ b/src/emu/ioport.cpp @@ -100,6 +100,7 @@ #include "natkeyboard.h" #include "osdepend.h" +#include "unicode.h" #include #include diff --git a/src/emu/natkeyboard.cpp b/src/emu/natkeyboard.cpp index a73381e24f4..d01b64311cf 100644 --- a/src/emu/natkeyboard.cpp +++ b/src/emu/natkeyboard.cpp @@ -11,6 +11,7 @@ #include "emu.h" #include "natkeyboard.h" #include "emuopts.h" +#include "unicode.h" #include #include diff --git a/src/emu/rendfont.cpp b/src/emu/rendfont.cpp index 9d22b3ff923..36f5b7367a9 100644 --- a/src/emu/rendfont.cpp +++ b/src/emu/rendfont.cpp @@ -15,6 +15,7 @@ #include "osdepend.h" #include "uismall.fh" +#include "unicode.h" #include "ui/uicmd14.fh" #include "ui/cmddata.h" @@ -380,7 +381,7 @@ std::string convert_command_glyph(std::string_view str) { // decode UTF-8 char32_t uchar; - int const codelen(uchar_from_utf8(&uchar, &str[0], str.length())); + int const codelen(uchar_from_utf8(&uchar, str)); if (0 >= codelen) break; str.remove_prefix(codelen); @@ -861,7 +862,7 @@ float render_font::string_width(float height, float aspect, std::string_view str // loop over characters while (!string.empty()) { - int scharcount = uchar_from_utf8(&schar, &string[0], string.length()); + int scharcount = uchar_from_utf8(&schar, string); totwidth += get_char(schar).width; string.remove_prefix(scharcount); } @@ -884,7 +885,7 @@ float render_font::utf8string_width(float height, float aspect, std::string_view while (!utf8string.empty()) { char32_t uchar; - int count = uchar_from_utf8(&uchar, &utf8string[0], utf8string.length()); + int count = uchar_from_utf8(&uchar, utf8string); if (count < 0) break; diff --git a/src/emu/rendlay.cpp b/src/emu/rendlay.cpp index a71857175e1..51dd035aef2 100644 --- a/src/emu/rendlay.cpp +++ b/src/emu/rendlay.cpp @@ -18,6 +18,7 @@ #include "video/rgbutil.h" #include "nanosvg.h" +#include "unicode.h" #include "vecstream.h" #include "xmlfile.h" @@ -3156,7 +3157,7 @@ protected: while (!s.empty()) { char32_t schar; - int scharcount = uchar_from_utf8(&schar, &s[0], s.length()); + int scharcount = uchar_from_utf8(&schar, s); if (scharcount == -1) break; @@ -3307,7 +3308,7 @@ private: while (!s.empty()) { char32_t schar; - int scharcount = uchar_from_utf8(&schar, &s[0], s.length()); + int scharcount = uchar_from_utf8(&schar, s); if (scharcount == -1) break; @@ -3693,7 +3694,7 @@ void layout_element::component::draw_text( while (!str.empty()) { char32_t schar; - int scharcount = uchar_from_utf8(&schar, &str[0], str.length()); + int scharcount = uchar_from_utf8(&schar, str); if (scharcount == -1) break; diff --git a/src/emu/softlist_dev.cpp b/src/emu/softlist_dev.cpp index a69f6ccab00..c6768155f16 100644 --- a/src/emu/softlist_dev.cpp +++ b/src/emu/softlist_dev.cpp @@ -16,6 +16,8 @@ #include "romload.h" #include "validity.h" +#include "unicode.h" + #include diff --git a/src/emu/validity.cpp b/src/emu/validity.cpp index c7363cf7b7a..84c29dab6c4 100644 --- a/src/emu/validity.cpp +++ b/src/emu/validity.cpp @@ -14,6 +14,7 @@ #include "emuopts.h" #include "romload.h" #include "video/rgbutil.h" +#include "unicode.h" #include #include diff --git a/src/frontend/mame/ui/selector.cpp b/src/frontend/mame/ui/selector.cpp index c7a04c266ca..56b0038c5c5 100644 --- a/src/frontend/mame/ui/selector.cpp +++ b/src/frontend/mame/ui/selector.cpp @@ -13,6 +13,7 @@ #include "ui/ui.h" #include "ui/utils.h" +#include "unicode.h" namespace ui { diff --git a/src/frontend/mame/ui/selgame.cpp b/src/frontend/mame/ui/selgame.cpp index 1f657f28100..b8575428925 100644 --- a/src/frontend/mame/ui/selgame.cpp +++ b/src/frontend/mame/ui/selgame.cpp @@ -31,6 +31,7 @@ #include "romload.h" #include "softlist_dev.h" #include "uiinput.h" +#include "unicode.h" #include #include diff --git a/src/frontend/mame/ui/selsoft.cpp b/src/frontend/mame/ui/selsoft.cpp index f56d04f9fb0..e59c8f9ee79 100644 --- a/src/frontend/mame/ui/selsoft.cpp +++ b/src/frontend/mame/ui/selsoft.cpp @@ -24,6 +24,7 @@ #include "softlist_dev.h" #include "uiinput.h" #include "luaengine.h" +#include "unicode.h" #include #include diff --git a/src/frontend/mame/ui/text.cpp b/src/frontend/mame/ui/text.cpp index 54bd327cacc..981e69b5380 100644 --- a/src/frontend/mame/ui/text.cpp +++ b/src/frontend/mame/ui/text.cpp @@ -12,6 +12,7 @@ #include "text.h" #include "rendfont.h" #include "render.h" +#include "unicode.h" #include #include @@ -133,7 +134,7 @@ void text_layout::add_text(std::string_view text, const char_style &style) { // get the current character char32_t schar; - int const scharcount = uchar_from_utf8(&schar, &text[0], text.length()); + int const scharcount = uchar_from_utf8(&schar, text); if (scharcount < 0) break; @@ -151,7 +152,7 @@ void text_layout::add_text(std::string_view text, const char_style &style) // get the current character char32_t ch; - int const scharcount = uchar_from_utf8(&ch, &text[0], text.length()); + int const scharcount = uchar_from_utf8(&ch, text); if (scharcount < 0) break; text.remove_prefix(scharcount); diff --git a/src/frontend/mame/ui/utils.h b/src/frontend/mame/ui/utils.h index a4a7ec02920..b9a2f17dcfd 100644 --- a/src/frontend/mame/ui/utils.h +++ b/src/frontend/mame/ui/utils.h @@ -19,6 +19,8 @@ #include #include +#include "unicode.h" + class mame_ui_manager; class render_container; diff --git a/src/lib/util/unicode.cpp b/src/lib/util/unicode.cpp index 4a83b675087..b3568fbb8d4 100644 --- a/src/lib/util/unicode.cpp +++ b/src/lib/util/unicode.cpp @@ -120,6 +120,17 @@ bool uchar_is_digit(char32_t uchar) } +//------------------------------------------------- +// uchar_from_utf8 - convert a UTF-8 sequence +// into a unicode character +//----------------------------------------------- + +int uchar_from_utf8(char32_t *uchar, std::string_view utf8str) +{ + return uchar_from_utf8(uchar, utf8str.data(), utf8str.length()); +} + + //------------------------------------------------- // uchar_from_utf8 - convert a UTF-8 sequence // into a unicode character @@ -494,9 +505,9 @@ std::string normalize_unicode(const char *s, unicode_normalization_form normaliz // unicode //------------------------------------------------- -std::string normalize_unicode(const char *s, size_t length, unicode_normalization_form normalization_form, bool fold_case) +std::string normalize_unicode(std::string_view s, unicode_normalization_form normalization_form, bool fold_case) { - return internal_normalize_unicode(s, length, normalization_form, fold_case, false); + return internal_normalize_unicode(s.data(), s.length(), normalization_form, fold_case, false); } diff --git a/src/lib/util/unicode.h b/src/lib/util/unicode.h index 9c727041db8..37821122998 100644 --- a/src/lib/util/unicode.h +++ b/src/lib/util/unicode.h @@ -22,6 +22,7 @@ #include "osdcore.h" #include +#include #include @@ -36,46 +37,6 @@ #define UTF8_CHAR_MAX 6 #define UTF16_CHAR_MAX 2 -// these are UTF-8 encoded strings for common characters -#define UTF8_NBSP "\xc2\xa0" /* non-breaking space */ - -#define UTF8_MULTIPLY "\xc3\x97" /* multiplication sign */ -#define UTF8_DIVIDE "\xc3\xb7" /* division sign */ -#define UTF8_SQUAREROOT "\xe2\x88\x9a" /* square root symbol */ -#define UTF8_PLUSMINUS "\xc2\xb1" /* plusminus symbol */ - -#define UTF8_POW_2 "\xc2\xb2" /* superscript 2 */ -#define UTF8_POW_X "\xcb\xa3" /* superscript x */ -#define UTF8_POW_Y "\xca\xb8" /* superscript y */ -#define UTF8_PRIME "\xca\xb9" /* prime symbol */ -#define UTF8_DEGREES "\xc2\xb0" /* degrees symbol */ - -#define UTF8_SMALL_PI "\xcf\x80" /* Greek small letter pi */ -#define UTF8_CAPITAL_SIGMA "\xce\xa3" /* Greek capital letter sigma */ -#define UTF8_CAPITAL_DELTA "\xce\x94" /* Greek capital letter delta */ - -#define UTF8_MACRON "\xc2\xaf" /* macron symbol */ -#define UTF8_NONSPACE_MACRON "\xcc\x84" /* nonspace macron, use after another char */ - -#define a_RING "\xc3\xa5" /* small a with a ring */ -#define a_UMLAUT "\xc3\xa4" /* small a with an umlaut */ -#define o_UMLAUT "\xc3\xb6" /* small o with an umlaut */ -#define u_UMLAUT "\xc3\xbc" /* small u with an umlaut */ -#define e_ACUTE "\xc3\xa9" /* small e with an acute */ -#define n_TILDE "\xc3\xb1" /* small n with a tilde */ - -#define A_RING "\xc3\x85" /* capital A with a ring */ -#define A_UMLAUT "\xc3\x84" /* capital A with an umlaut */ -#define O_UMLAUT "\xc3\x96" /* capital O with an umlaut */ -#define U_UMLAUT "\xc3\x9c" /* capital U with an umlaut */ -#define E_ACUTE "\xc3\x89" /* capital E with an acute */ -#define N_TILDE "\xc3\x91" /* capital N with a tilde */ - -#define UTF8_LEFT "\xe2\x86\x90" /* cursor left */ -#define UTF8_RIGHT "\xe2\x86\x92" /* cursor right */ -#define UTF8_UP "\xe2\x86\x91" /* cursor up */ -#define UTF8_DOWN "\xe2\x86\x93" /* cursor down */ - enum class unicode_normalization_form { C, D, KC, KD }; @@ -95,6 +56,7 @@ bool uchar_is_digit(char32_t uchar); // converting strings to 32-bit Unicode chars int uchar_from_utf8(char32_t *uchar, const char *utf8char, size_t count); +int uchar_from_utf8(char32_t *uchar, std::string_view utf8str); int uchar_from_utf16(char32_t *uchar, const char16_t *utf16char, size_t count); int uchar_from_utf16f(char32_t *uchar, const char16_t *utf16char, size_t count); std::u32string ustr_from_utf8(const std::string &utf8str); @@ -112,7 +74,7 @@ std::string utf8_from_wstring(const std::wstring &string); // unicode normalization std::string normalize_unicode(const std::string &s, unicode_normalization_form normalization_form, bool fold_case = false); std::string normalize_unicode(const char *s, unicode_normalization_form normalization_form, bool fold_case = false); -std::string normalize_unicode(const char *s, size_t length, unicode_normalization_form normalization_form, bool fold_case = false); +std::string normalize_unicode(std::string_view s, unicode_normalization_form normalization_form, bool fold_case = false); // upper and lower case char32_t uchar_toupper(char32_t ch); diff --git a/src/osd/modules/input/input_sdl.cpp b/src/osd/modules/input/input_sdl.cpp index 51af4750eb1..248f720d485 100644 --- a/src/osd/modules/input/input_sdl.cpp +++ b/src/osd/modules/input/input_sdl.cpp @@ -31,6 +31,7 @@ #include "emu.h" #include "uiinput.h" #include "strconv.h" +#include "unicode.h" // MAMEOS headers #include "input_common.h" diff --git a/src/osd/modules/netdev/taptun.cpp b/src/osd/modules/netdev/taptun.cpp index baa45da0d5d..6999d6f63ba 100644 --- a/src/osd/modules/netdev/taptun.cpp +++ b/src/osd/modules/netdev/taptun.cpp @@ -17,6 +17,7 @@ #include "osdnet.h" #include "modules/osdmodule.h" #include "netdev_module.h" +#include "unicode.h" #ifdef __linux__ #define IFF_TAP 0x0002 diff --git a/src/osd/osdepend.h b/src/osd/osdepend.h index ad7b52049a6..cd00244f5bd 100644 --- a/src/osd/osdepend.h +++ b/src/osd/osdepend.h @@ -15,7 +15,6 @@ #include "emucore.h" #include "osdcore.h" -#include "unicode.h" #include "../frontend/mame/ui/menuitem.h" #include