mame/src/lib/util/corestr.cpp

334 lines
7.5 KiB
C++

// license:BSD-3-Clause
// copyright-holders:Aaron Giles
/***************************************************************************
corestr.c
Core string functions used throughout MAME.
****************************************************************************/
#include "corestr.h"
#include "osdcore.h"
#include <algorithm>
#include <memory>
#include <ctype.h>
#include <stdlib.h>
/*-------------------------------------------------
core_stricmp - case-insensitive string compare
-------------------------------------------------*/
int core_stricmp(const char *s1, const char *s2)
{
for (;;)
{
int c1 = tolower((uint8_t)*s1++);
int c2 = tolower((uint8_t)*s2++);
if (c1 == 0 || c1 != c2)
return c1 - c2;
}
}
/*-------------------------------------------------
core_strnicmp - case-insensitive string compare
-------------------------------------------------*/
int core_strnicmp(const char *s1, const char *s2, size_t n)
{
size_t i;
for (i = 0; i < n; i++)
{
int c1 = tolower((uint8_t)*s1++);
int c2 = tolower((uint8_t)*s2++);
if (c1 == 0 || c1 != c2)
return c1 - c2;
}
return 0;
}
/*-------------------------------------------------
core_strwildcmp - case-insensitive wildcard
string compare (up to 16 characters at the
moment)
-------------------------------------------------*/
int core_strwildcmp(const char *sp1, const char *sp2)
{
char s1[17], s2[17];
size_t i, l1, l2;
char *p;
//assert(strlen(sp1) < 16);
//assert(strlen(sp2) < 16);
if (sp1[0] == 0) strcpy(s1, "*");
else { strncpy(s1, sp1, 16); s1[16] = 0; }
if (sp2[0] == 0) strcpy(s2, "*");
else { strncpy(s2, sp2, 16); s2[16] = 0; }
p = strchr(s1, '*');
if (p)
{
for (i = p - s1; i < 16; i++) s1[i] = '?';
s1[16] = 0;
}
p = strchr(s2, '*');
if (p)
{
for (i = p - s2; i < 16; i++) s2[i] = '?';
s2[16] = 0;
}
l1 = strlen(s1);
if (l1 < 16)
{
for (i = l1 + 1; i < 16; i++) s1[i] = ' ';
s1[16] = 0;
}
l2 = strlen(s2);
if (l2 < 16)
{
for (i = l2 + 1; i < 16; i++) s2[i] = ' ';
s2[16] = 0;
}
for (i = 0; i < 16; i++)
{
if (s1[i] == '?' && s2[i] != '?') s1[i] = s2[i];
if (s2[i] == '?' && s1[i] != '?') s2[i] = s1[i];
}
return core_stricmp(s1, s2);
}
bool core_iswildstr(const char *sp)
{
for ( ; sp && *sp; sp++)
{
if (('?' == *sp) || ('*' == *sp))
return true;
}
return false;
}
/*-------------------------------------------------
std::string helpers
-------------------------------------------------*/
#include <algorithm>
int strcatvprintf(std::string &str, const char *format, va_list args)
{
char tempbuf[4096];
int result = vsprintf(tempbuf, format, args);
// set the result
str.append(tempbuf);
return result;
}
void strdelchr(std::string& str, char chr)
{
for (size_t i = 0; i < str.length(); i++)
{
if (str[i] == chr)
{
str.erase(i, 1);
i--;
}
}
}
void strreplacechr(std::string& str, char ch, char newch)
{
for (auto & elem : str)
{
if (elem == ch) elem = newch;
}
}
static std::string &internal_strtrimspace(std::string& str, bool right_only)
{
// identify the start
std::string::iterator start = str.begin();
if (!right_only)
{
start = std::find_if(
str.begin(),
str.end(),
[](char c) { return !isspace(uint8_t(c)); });
}
// identify the end
std::string::iterator end = std::find_if(
str.rbegin(),
std::string::reverse_iterator(start),
[](char c) { return !isspace(uint8_t(c)); }).base();
// extract the string
str = end > start
? str.substr(start - str.begin(), end - start)
: "";
return str;
}
std::string &strtrimspace(std::string& str)
{
return internal_strtrimspace(str, false);
}
std::string &strtrimrightspace(std::string& str)
{
return internal_strtrimspace(str, true);
}
std::string &strmakeupper(std::string& str)
{
std::transform(str.begin(), str.end(), str.begin(), ::toupper);
return str;
}
/**
* @fn std::string &strmakelower(std::string& str)
*
* @brief Changes the given string to lower case.
*
* @param [in,out] str The string to make lower case
*
* @return A reference to the original std::string having been changed to lower case
*/
std::string &strmakelower(std::string& str)
{
std::transform(str.begin(), str.end(), str.begin(), ::tolower);
return str;
}
/**
* @fn int strreplace(std::string &str, const std::string& search, const std::string& replace)
*
* @brief Strreplaces.
*
* @param [in,out] str The string.
* @param search The search.
* @param replace The replace.
*
* @return An int.
*/
int strreplace(std::string &str, const std::string& search, const std::string& replace)
{
int searchlen = search.length();
int replacelen = replace.length();
int matches = 0;
for (int curindex = str.find(search, 0); curindex != -1; curindex = str.find(search, curindex + replacelen))
{
matches++;
str.erase(curindex, searchlen).insert(curindex, replace);
}
return matches;
}
namespace util {
/**
* @fn double edit_distance(std::u32string const &lhs, std::u32string const &rhs)
*
* @brief Compares strings and returns prefix-weighted similarity score (smaller is more similar).
*
* @param lhs First input.
* @param rhs Second input.
*
* @return Similarity score ranging from 0.0 (totally dissimilar) to 1.0 (identical).
*/
double edit_distance(std::u32string const &lhs, std::u32string const &rhs)
{
// based on Jaro-Winkler distance
// TODO: this breaks if the lengths don't fit in a long int, but that's not a big limitation
constexpr long MAX_PREFIX(4);
constexpr double PREFIX_WEIGHT(0.1);
constexpr double PREFIX_THRESHOLD(0.7);
std::u32string const &longer((lhs.length() >= rhs.length()) ? lhs : rhs);
std::u32string const &shorter((lhs.length() < rhs.length()) ? lhs : rhs);
// find matches
long const range((std::max)(long(longer.length() / 2) - 1, 0L));
std::unique_ptr<long []> match_idx(std::make_unique<long []>(shorter.length()));
std::unique_ptr<bool []> match_flg(std::make_unique<bool []>(longer.length()));
std::fill_n(match_idx.get(), shorter.length(), -1);
std::fill_n(match_flg.get(), longer.length(), false);
long match_cnt(0);
for (long i = 0; shorter.length() > i; ++i)
{
char32_t const ch(shorter[i]);
long const n((std::min)(i + range + 1L, long(longer.length())));
for (long j = (std::max)(i - range, 0L); n > j; ++j)
{
if (!match_flg[j] && (ch == longer[j]))
{
match_idx[i] = j;
match_flg[j] = true;
++match_cnt;
break;
}
}
}
// early exit if strings are very dissimilar
if (!match_cnt)
return 1.0;
// now find transpositions
std::unique_ptr<char32_t []> ms(std::make_unique<char32_t []>(2 * match_cnt));
std::fill_n(ms.get(), 2 * match_cnt, char32_t(0));
char32_t *const ms1(&ms[0]);
char32_t *const ms2(&ms[match_cnt]);
for (long i = 0, j = 0; shorter.length() > i; ++i)
{
if (0 <= match_idx[i])
ms1[j++] = shorter[i];
}
match_idx.reset();
for (long i = 0, j = 0; longer.length() > i; ++i)
{
if (match_flg[i])
ms2[j++] = longer[i];
}
match_flg.reset();
long halftrans_cnt(0);
for (long i = 0; match_cnt > i; ++i)
{
if (ms1[i] != ms2[i])
++halftrans_cnt;
}
ms.reset();
// simple prefix detection
long prefix_len(0);
for (long i = 0; ((std::min)(long(shorter.length()), MAX_PREFIX) > i) && (lhs[i] == rhs[i]); ++i)
++prefix_len;
// do the weighting
double const m(match_cnt);
double const t(double(halftrans_cnt) / 2);
double const jaro(((m / lhs.length()) + (m / rhs.length()) + ((m - t) / m)) / 3);
double const jaro_winkler((PREFIX_THRESHOLD > jaro) ? jaro : (jaro + (PREFIX_WEIGHT * prefix_len * (1.0 - jaro))));
return 1.0 - jaro_winkler;
}
} // namespace util