mirror of
https://github.com/holub/mame
synced 2025-04-16 21:44:32 +03:00

ui: Added some missing functionality: * Added an option to copy input device IDs to the relevant menus. * Added an item for setting the software lists files path (-hashpath) to the folder setup menu. * Allow pasting text from clipboard in most places that allow typing (searching, entering filenames, entering barcodes). * Changed the software selection menu heading to be a bit less misleading. * Made barcode menu less eager to rebuild itself unnecessarily, and removed some confusing and apparently pointless code. Exposed more Lua bindings: * Added low-level palette objects. * Added indexed bitmap types. * Added a bitmap method for extracting pixels from a rectangular area as a packed binary string. * Changed screen device pixels method to return width and height in addition to the pixels. osd: Added some functionality and cleaned up a little: * Added a function for copying text to the clipboard. * Moved function for converting Windows error codes to standard error conditions to winutil.cpp so it can be used from more places. * Removed duplicate declaration of osd_get_clipboard_text and made the function noexcept (including fixing implementations). * Made macOS implementation of osd_get_clipboard_text skip the encoding conversion if it finds UTF-8 text first. * Changed the default -uimodekey setting so it doesn't lose the "not shift" that stops the default from interfering with UI paste. Various bug fixes: * util/unicode.cpp: Fixed the version of utf8_from_uchar that returns std::string blowing up on invalid codepoints. * util/bitmap.h: Fixed wrapping constructors for indexed bitmaps taking the wrong parameter type (nothing was using them before). * util/bitmap.cpp: Fixed potential use-after-free issues with bitmap palettes. * emu/input.cpp, emu/inputdev.cpp: Log 1-based device numbers, matching what's shown in the internal UI and used in tokens in CFG files. * emu/emumem.cpp: Added the bank tag to a fatal error message where it was missing. docs: Reworked and expanded documentation on configuring stable controller IDs. For translators, the changes are quite minor: * There's a menu item for copying a device ID to the clipboard, and associated success/failure messages. * There's the menu item for setting the software list file search path. * One of the lines in the software selection menu heading has changes as it could be interpreted as implying it showed a software list name.
589 lines
14 KiB
C++
589 lines
14 KiB
C++
// license:BSD-3-Clause
|
|
// copyright-holders:Aaron Giles
|
|
/*********************************************************************
|
|
|
|
unicode.cpp
|
|
|
|
Unicode related functions
|
|
|
|
***************************************************************************/
|
|
|
|
#include "unicode.h"
|
|
|
|
#include "osdcomm.h"
|
|
|
|
#ifdef _WIN32
|
|
#include "strconv.h"
|
|
#endif
|
|
|
|
#include <utf8proc.h>
|
|
|
|
#include <codecvt>
|
|
#include <locale>
|
|
|
|
|
|
namespace {
|
|
|
|
//-------------------------------------------------
|
|
// internal_normalize_unicode - uses utf8proc to
|
|
// normalize unicode
|
|
//-------------------------------------------------
|
|
|
|
std::string internal_normalize_unicode(
|
|
char const *s,
|
|
size_t length,
|
|
unicode_normalization_form normalization_form,
|
|
bool fold_case,
|
|
bool null_terminated)
|
|
{
|
|
// convert the normalization form
|
|
int options;
|
|
switch (normalization_form)
|
|
{
|
|
case unicode_normalization_form::C:
|
|
options = UTF8PROC_STABLE | UTF8PROC_COMPOSE;
|
|
break;
|
|
case unicode_normalization_form::D:
|
|
options = UTF8PROC_STABLE | UTF8PROC_DECOMPOSE;
|
|
break;
|
|
case unicode_normalization_form::KC:
|
|
options = UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT;
|
|
break;
|
|
case unicode_normalization_form::KD:
|
|
options = UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT;
|
|
break;
|
|
default:
|
|
throw false;
|
|
}
|
|
|
|
// perform case folding?
|
|
if (fold_case)
|
|
options |= UTF8PROC_CASEFOLD;
|
|
|
|
// use NUL terminator to determine length?
|
|
if (null_terminated)
|
|
options |= UTF8PROC_NULLTERM;
|
|
|
|
// invoke utf8proc
|
|
utf8proc_uint8_t *utf8proc_result(nullptr);
|
|
utf8proc_ssize_t const utf8proc_result_length(utf8proc_map(reinterpret_cast<utf8proc_uint8_t const *>(s), length, &utf8proc_result, utf8proc_option_t(options)));
|
|
|
|
// conver the result
|
|
std::string result;
|
|
if (utf8proc_result)
|
|
{
|
|
if (utf8proc_result_length > 0)
|
|
result.assign(reinterpret_cast<char const *>(utf8proc_result), utf8proc_result_length);
|
|
free(utf8proc_result);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
//-------------------------------------------------
|
|
// uchar_isvalid - return true if a given
|
|
// character is a legitimate unicode character
|
|
//-------------------------------------------------
|
|
|
|
bool uchar_isvalid(char32_t uchar)
|
|
{
|
|
return (uchar < 0x110000) && !((uchar >= 0xd800) && (uchar <= 0xdfff));
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// uchar_is_printable - tests to see if a unicode
|
|
// char is printable
|
|
//-------------------------------------------------
|
|
|
|
bool uchar_is_printable(char32_t uchar)
|
|
{
|
|
return
|
|
!(0x0001f >= uchar) && // C0 control
|
|
!((0x0007f <= uchar) && (0x0009f >= uchar)) && // DEL and C1 control
|
|
!((0x0fdd0 <= uchar) && (0x0fddf >= uchar)) && // noncharacters
|
|
!(0x0fffe == (uchar & 0x0ffff)) && // byte-order detection noncharacter
|
|
!(0x0ffff == (uchar & 0x0ffff)); // the other noncharacter
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// uchar_is_digit - tests to see if a unicode
|
|
// char is a digit
|
|
//-------------------------------------------------
|
|
|
|
bool uchar_is_digit(char32_t uchar)
|
|
{
|
|
return uchar >= '0' && uchar <= '9';
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// uchar_from_utf8 - convert a UTF-8 sequence
|
|
// into a unicode character
|
|
//-----------------------------------------------
|
|
|
|
int uchar_from_utf8(char32_t *uchar, std::string_view utf8str)
|
|
{
|
|
return uchar_from_utf8(uchar, utf8str.data(), utf8str.length());
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// uchar_from_utf8 - convert a UTF-8 sequence
|
|
// into a unicode character
|
|
//-----------------------------------------------
|
|
|
|
int uchar_from_utf8(char32_t *uchar, const char *utf8char, size_t count)
|
|
{
|
|
// validate parameters
|
|
if (!utf8char || !count)
|
|
return 0;
|
|
|
|
// start with the first byte
|
|
char32_t c = (unsigned char)*utf8char;
|
|
count--;
|
|
utf8char++;
|
|
|
|
// based on that, determine how many additional bytes we need
|
|
char32_t minchar;
|
|
int auxlen;
|
|
if ((c & 0x80) == 0x00)
|
|
{
|
|
// unicode char 0x00000000 - 0x0000007F
|
|
auxlen = 0;
|
|
minchar = 0x00000000;
|
|
}
|
|
else if ((c & 0xe0) == 0xc0)
|
|
{
|
|
// unicode char 0x00000080 - 0x000007FF
|
|
c &= 0x1f;
|
|
auxlen = 1;
|
|
minchar = 0x00000080;
|
|
}
|
|
else if ((c & 0xf0) == 0xe0)
|
|
{
|
|
// unicode char 0x00000800 - 0x0000FFFF
|
|
c &= 0x0f;
|
|
auxlen = 2;
|
|
minchar = 0x00000800;
|
|
}
|
|
else if ((c & 0xf8) == 0xf0)
|
|
{
|
|
// unicode char 0x00010000 - 0x001FFFFF
|
|
c &= 0x07;
|
|
auxlen = 3;
|
|
minchar = 0x00010000;
|
|
}
|
|
else if ((c & 0xfc) == 0xf8)
|
|
{
|
|
// unicode char 0x00200000 - 0x03FFFFFF
|
|
c &= 0x03;
|
|
auxlen = 4;
|
|
minchar = 0x00200000;
|
|
}
|
|
else if ((c & 0xfe) == 0xfc)
|
|
{
|
|
// unicode char 0x04000000 - 0x7FFFFFFF
|
|
c &= 0x01;
|
|
auxlen = 5;
|
|
minchar = 0x04000000;
|
|
}
|
|
else
|
|
{
|
|
// invalid
|
|
return -1;
|
|
}
|
|
|
|
// exceeds the count?
|
|
if (auxlen > count)
|
|
return -1;
|
|
|
|
// we now know how long the char is, now compute it
|
|
for (int i = 0; i < auxlen; i++)
|
|
{
|
|
char32_t const auxchar = (unsigned char)utf8char[i];
|
|
|
|
// all auxiliary chars must be between 0x80-0xbf
|
|
if ((auxchar & 0xc0) != 0x80)
|
|
return -1;
|
|
|
|
c = c << 6;
|
|
c |= auxchar & 0x3f;
|
|
}
|
|
|
|
// make sure that this char is above the minimum
|
|
if (c < minchar)
|
|
return -1;
|
|
|
|
*uchar = c;
|
|
return auxlen + 1;
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// uchar_from_utf16 - convert a UTF-16 sequence
|
|
// into a unicode character
|
|
//-------------------------------------------------
|
|
|
|
int uchar_from_utf16(char32_t *uchar, const char16_t *utf16char, size_t count)
|
|
{
|
|
int rc = -1;
|
|
|
|
// validate parameters
|
|
if (utf16char == nullptr || count == 0)
|
|
{
|
|
rc = 0;
|
|
}
|
|
if (utf16char[0] >= 0xd800 && utf16char[0] <= 0xdbff)
|
|
{
|
|
// handle the two-byte case
|
|
if (count > 1 && utf16char[1] >= 0xdc00 && utf16char[1] <= 0xdfff)
|
|
{
|
|
*uchar = 0x10000 + ((utf16char[0] & 0x3ff) * 0x400) + (utf16char[1] & 0x3ff);
|
|
rc = 2;
|
|
}
|
|
}
|
|
else if (utf16char[0] < 0xdc00 || utf16char[0] > 0xdfff)
|
|
{
|
|
// handle the one-byte case
|
|
*uchar = utf16char[0];
|
|
rc = 1;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// uchar_from_utf16f - convert a UTF-16 sequence
|
|
// into a unicode character from a flipped
|
|
// byte order
|
|
//-------------------------------------------------
|
|
|
|
int uchar_from_utf16f(char32_t *uchar, const char16_t *utf16char, size_t count)
|
|
{
|
|
char16_t buf[2] = {0};
|
|
if (count > 0)
|
|
buf[0] = swapendian_int16(utf16char[0]);
|
|
if (count > 1)
|
|
buf[1] = swapendian_int16(utf16char[1]);
|
|
return uchar_from_utf16(uchar, buf, count);
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// ustr_from_utf8 - convert a UTF-8 sequence into
|
|
// into a Unicode string
|
|
//-------------------------------------------------
|
|
|
|
std::u32string ustr_from_utf8(std::string_view utf8str)
|
|
{
|
|
std::u32string result;
|
|
if (!utf8str.empty())
|
|
{
|
|
char const *utf8char(&utf8str[0]);
|
|
auto remaining(utf8str.length());
|
|
while (remaining)
|
|
{
|
|
char32_t ch;
|
|
int const consumed(uchar_from_utf8(&ch, utf8char, remaining));
|
|
result.append(1, (consumed > 0) ? ch : char32_t(0x00fffdU));
|
|
utf8char += (consumed > 0) ? consumed : 1;
|
|
remaining -= (consumed > 0) ? consumed : 1;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// utf8_from_uchar - convert a unicode character
|
|
// into a UTF-8 sequence
|
|
//-------------------------------------------------
|
|
|
|
int utf8_from_uchar(char *utf8string, size_t count, char32_t uchar)
|
|
{
|
|
int rc = 0;
|
|
|
|
// error on invalid characters
|
|
if (!uchar_isvalid(uchar))
|
|
return -1;
|
|
|
|
// based on the value, output the appropriate number of bytes
|
|
if (uchar < 0x80)
|
|
{
|
|
// unicode char 0x00000000 - 0x0000007F
|
|
if (count < 1)
|
|
return -1;
|
|
utf8string[rc++] = (char) uchar;
|
|
}
|
|
else if (uchar < 0x800)
|
|
{
|
|
// unicode char 0x00000080 - 0x000007FF
|
|
if (count < 2)
|
|
return -1;
|
|
utf8string[rc++] = ((char) (uchar >> 6)) | 0xC0;
|
|
utf8string[rc++] = ((char) ((uchar >> 0) & 0x3F)) | 0x80;
|
|
}
|
|
else if (uchar < 0x10000)
|
|
{
|
|
// unicode char 0x00000800 - 0x0000FFFF
|
|
if (count < 3)
|
|
return -1;
|
|
utf8string[rc++] = ((char) (uchar >> 12)) | 0xE0;
|
|
utf8string[rc++] = ((char) ((uchar >> 6) & 0x3F)) | 0x80;
|
|
utf8string[rc++] = ((char) ((uchar >> 0) & 0x3F)) | 0x80;
|
|
}
|
|
else if (uchar < 0x00200000)
|
|
{
|
|
// unicode char 0x00010000 - 0x001FFFFF
|
|
if (count < 4)
|
|
return -1;
|
|
utf8string[rc++] = ((char) (uchar >> 18)) | 0xF0;
|
|
utf8string[rc++] = ((char) ((uchar >> 12) & 0x3F)) | 0x80;
|
|
utf8string[rc++] = ((char) ((uchar >> 6) & 0x3F)) | 0x80;
|
|
utf8string[rc++] = ((char) ((uchar >> 0) & 0x3F)) | 0x80;
|
|
}
|
|
else if (uchar < 0x04000000)
|
|
{
|
|
// unicode char 0x00200000 - 0x03FFFFFF
|
|
if (count < 5)
|
|
return -1;
|
|
utf8string[rc++] = ((char) (uchar >> 24)) | 0xF8;
|
|
utf8string[rc++] = ((char) ((uchar >> 18) & 0x3F)) | 0x80;
|
|
utf8string[rc++] = ((char) ((uchar >> 12) & 0x3F)) | 0x80;
|
|
utf8string[rc++] = ((char) ((uchar >> 6) & 0x3F)) | 0x80;
|
|
utf8string[rc++] = ((char) ((uchar >> 0) & 0x3F)) | 0x80;
|
|
}
|
|
else if (uchar < 0x80000000)
|
|
{
|
|
// unicode char 0x04000000 - 0x7FFFFFFF
|
|
if (count < 6)
|
|
return -1;
|
|
utf8string[rc++] = ((char) (uchar >> 30)) | 0xFC;
|
|
utf8string[rc++] = ((char) ((uchar >> 24) & 0x3F)) | 0x80;
|
|
utf8string[rc++] = ((char) ((uchar >> 18) & 0x3F)) | 0x80;
|
|
utf8string[rc++] = ((char) ((uchar >> 12) & 0x3F)) | 0x80;
|
|
utf8string[rc++] = ((char) ((uchar >> 6) & 0x3F)) | 0x80;
|
|
utf8string[rc++] = ((char) ((uchar >> 0) & 0x3F)) | 0x80;
|
|
}
|
|
else
|
|
rc = -1;
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// utf8_from_uchar - convert a unicode character
|
|
// into a UTF-8 sequence
|
|
//-------------------------------------------------
|
|
|
|
std::string utf8_from_uchar(char32_t uchar)
|
|
{
|
|
char buffer[UTF8_CHAR_MAX];
|
|
auto const len = utf8_from_uchar(buffer, std::size(buffer), uchar);
|
|
return std::string(buffer, std::max<decltype(len)>(len, 0));
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// utf16_from_uchar - convert a unicode character
|
|
// into a UTF-16 sequence
|
|
//-------------------------------------------------
|
|
|
|
int utf16_from_uchar(char16_t *utf16string, size_t count, char32_t uchar)
|
|
{
|
|
int rc;
|
|
|
|
// error on invalid characters
|
|
if (!uchar_isvalid(uchar))
|
|
return -1;
|
|
|
|
if (uchar < 0x10000)
|
|
{
|
|
// single word case
|
|
if (count < 1)
|
|
return -1;
|
|
utf16string[0] = (char16_t) uchar;
|
|
rc = 1;
|
|
}
|
|
else if (uchar < 0x100000)
|
|
{
|
|
// double word case
|
|
if (count < 2)
|
|
return -1;
|
|
uchar -= 0x10000;
|
|
utf16string[0] = ((uchar >> 10) & 0x03ff) | 0xd800;
|
|
utf16string[1] = ((uchar >> 0) & 0x03ff) | 0xdc00;
|
|
rc = 2;
|
|
}
|
|
else
|
|
{
|
|
return -1;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// utf16_from_uchar - convert a unicode character
|
|
// into a UTF-16 sequence with flipped endianness
|
|
//-------------------------------------------------
|
|
|
|
int utf16f_from_uchar(char16_t *utf16string, size_t count, char32_t uchar)
|
|
{
|
|
int rc;
|
|
char16_t buf[2] = { 0, 0 };
|
|
|
|
rc = utf16_from_uchar(buf, count, uchar);
|
|
|
|
if (rc >= 1)
|
|
utf16string[0] = swapendian_int16(buf[0]);
|
|
if (rc >= 2)
|
|
utf16string[1] = swapendian_int16(buf[1]);
|
|
return rc;
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// wstring_from_utf8
|
|
//-------------------------------------------------
|
|
|
|
std::wstring wstring_from_utf8(std::string_view utf8string)
|
|
{
|
|
#ifdef _WIN32
|
|
// for some reason, using codecvt yields bad results on MinGW (but not MSVC)
|
|
return osd::text::to_wstring(utf8string);
|
|
#else
|
|
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
|
return converter.from_bytes(utf8string.data(), utf8string.data() + utf8string.length());
|
|
#endif
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// utf8_from_wstring
|
|
//-------------------------------------------------
|
|
|
|
std::string utf8_from_wstring(std::wstring_view string)
|
|
{
|
|
#ifdef _WIN32
|
|
// for some reason, using codecvt yields bad results on MinGW (but not MSVC)
|
|
return osd::text::from_wstring(string);
|
|
#else
|
|
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
|
return converter.to_bytes(string.data(), string.data() + string.length());
|
|
#endif
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// normalize_unicode - uses utf8proc to normalize
|
|
// unicode
|
|
//-------------------------------------------------
|
|
|
|
std::string normalize_unicode(const char *s, unicode_normalization_form normalization_form, bool fold_case)
|
|
{
|
|
return internal_normalize_unicode(s, 0, normalization_form, fold_case, true);
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// normalize_unicode - uses utf8proc to normalize
|
|
// unicode
|
|
//-------------------------------------------------
|
|
|
|
std::string normalize_unicode(std::string_view s, unicode_normalization_form normalization_form, bool fold_case)
|
|
{
|
|
return internal_normalize_unicode(s.data(), s.length(), normalization_form, fold_case, false);
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// uchar_toupper - uses utf8proc to convert to
|
|
// upper case
|
|
//-------------------------------------------------
|
|
|
|
char32_t uchar_toupper(char32_t ch)
|
|
{
|
|
return utf8proc_toupper(ch);
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// uchar_tolower - uses utf8proc to convert to
|
|
// lower case
|
|
//-------------------------------------------------
|
|
|
|
char32_t uchar_tolower(char32_t ch)
|
|
{
|
|
return utf8proc_tolower(ch);
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// utf8_previous_char - return a pointer to the
|
|
// previous character in a string
|
|
//-------------------------------------------------
|
|
|
|
/**
|
|
* @fn const char *utf8_previous_char(const char *utf8string)
|
|
*
|
|
* @brief UTF 8 previous character.
|
|
*
|
|
* @param utf8string The UTF 8string.
|
|
*
|
|
* @return null if it fails, else a char*.
|
|
*/
|
|
|
|
const char *utf8_previous_char(const char *utf8string)
|
|
{
|
|
while ((*--utf8string & 0xc0) == 0x80)
|
|
;
|
|
return utf8string;
|
|
}
|
|
|
|
|
|
//-------------------------------------------------
|
|
// utf8_is_valid_string - return true if the
|
|
// given string is a properly formed sequence of
|
|
// UTF-8 characters
|
|
//-------------------------------------------------
|
|
|
|
/**
|
|
* @fn int utf8_is_valid_string(const char *utf8string)
|
|
*
|
|
* @brief UTF 8 is valid string.
|
|
*
|
|
* @param utf8string The UTF 8string.
|
|
*
|
|
* @return An int.
|
|
*/
|
|
|
|
bool utf8_is_valid_string(const char *utf8string)
|
|
{
|
|
int remaining_length = strlen(utf8string);
|
|
|
|
while (*utf8string != 0)
|
|
{
|
|
char32_t uchar = 0;
|
|
int charlen;
|
|
|
|
// extract the current character and verify it
|
|
charlen = uchar_from_utf8(&uchar, utf8string, remaining_length);
|
|
if (charlen <= 0 || uchar == 0 || !uchar_isvalid(uchar))
|
|
return false;
|
|
|
|
// advance
|
|
utf8string += charlen;
|
|
remaining_length -= charlen;
|
|
}
|
|
|
|
return true;
|
|
}
|