mirror of
https://github.com/holub/mame
synced 2025-04-09 18:17:44 +03:00
3rdparty/utf8proc: Updated to 2.9.0.
This commit is contained in:
parent
12590d6ad8
commit
466c450cb3
23
3rdparty/utf8proc/.github/workflows/ci-fuzz.yml
vendored
Normal file
23
3rdparty/utf8proc/.github/workflows/ci-fuzz.yml
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
name: CIFuzz
|
||||
on: [pull_request]
|
||||
jobs:
|
||||
Fuzzing:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Build Fuzzers
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
|
||||
with:
|
||||
oss-fuzz-project-name: 'utf8proc'
|
||||
dry-run: false
|
||||
- name: Run Fuzzers
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
|
||||
with:
|
||||
oss-fuzz-project-name: 'utf8proc'
|
||||
fuzz-seconds: 600
|
||||
dry-run: false
|
||||
- name: Upload Crash
|
||||
uses: actions/upload-artifact@v1
|
||||
if: failure()
|
||||
with:
|
||||
name: artifacts
|
||||
path: ./out/artifacts
|
64
3rdparty/utf8proc/.github/workflows/cmake.yml
vendored
Normal file
64
3rdparty/utf8proc/.github/workflows/cmake.yml
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
name: CMake
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- 'release-*'
|
||||
pull_request:
|
||||
# run on all pr
|
||||
|
||||
jobs:
|
||||
build:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macOS-latest]
|
||||
shared: ["ON", "OFF"]
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: ${{ matrix.os }} - shared=${{ matrix.shared }}
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
mkdir build
|
||||
cmake -S . -B build -DBUILD_SHARED_LIBS=${{ matrix.shared }} -DUTF8PROC_ENABLE_TESTING=ON
|
||||
cmake --build build
|
||||
- name: Run Test
|
||||
run: ctest --test-dir build -V
|
||||
- name: Upload shared lib
|
||||
if: matrix.shared == 'ON'
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ matrix.os }}
|
||||
path: |
|
||||
build/libutf8proc.*
|
||||
build/Debug/utf8proc.*
|
||||
|
||||
mingw:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-latest]
|
||||
shared: ["ON", "OFF"]
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: mingw64 - shared=${{ matrix.shared }}
|
||||
defaults:
|
||||
run:
|
||||
shell: msys2 {0}
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: msys2/setup-msys2@v2
|
||||
with:
|
||||
install: gcc make mingw-w64-x86_64-cmake
|
||||
- name: Build
|
||||
run: |
|
||||
mkdir build
|
||||
cmake -S . -B build -DBUILD_SHARED_LIBS=${{ matrix.shared }} -DUTF8PROC_ENABLE_TESTING=ON -G'MSYS Makefiles'
|
||||
cmake --build build
|
||||
- name: Run Test
|
||||
run: ctest --test-dir build -V
|
||||
- name: Upload shared lib
|
||||
if: matrix.shared == 'ON'
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: windows-mingw64
|
||||
path: build/libutf8proc.*
|
41
3rdparty/utf8proc/.github/workflows/make.yml
vendored
Normal file
41
3rdparty/utf8proc/.github/workflows/make.yml
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
name: Make
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- 'release-*'
|
||||
pull_request:
|
||||
# run on all pr
|
||||
|
||||
jobs:
|
||||
build:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macOS-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
name: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
# TODO: update makefile to check MANIFEST
|
||||
# - name: Install dependencies (MacOS)
|
||||
# if: matrix.config.os == 'macos-latest'
|
||||
# run: brew install ruby findutils
|
||||
|
||||
- name: Check MANIFEST
|
||||
if: matrix.config.os == 'ubuntu-latest'
|
||||
run: make manifest && diff MANIFEST.new MANIFEST
|
||||
- name: Run Test
|
||||
run: make check
|
||||
- name: Check utf8proc_data.c
|
||||
run: make data && diff data/utf8proc_data.c.new utf8proc_data.c
|
||||
- name: Clean
|
||||
run: make clean && git status --ignored --porcelain && test -z "$(git status --ignored --porcelain)"
|
||||
|
||||
- name: Make lib
|
||||
run: make
|
||||
- name: Upload shared lib
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: make-${{ matrix.os }}
|
||||
path: libutf8proc.*
|
38
3rdparty/utf8proc/.gitignore
vendored
Normal file
38
3rdparty/utf8proc/.gitignore
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
*.tar.gz
|
||||
*.exe
|
||||
*.dll
|
||||
*.do
|
||||
*.o
|
||||
*.so*
|
||||
*.a
|
||||
*.dll
|
||||
*.dylib
|
||||
*.dSYM
|
||||
*.out
|
||||
*.new
|
||||
.vscode
|
||||
/data/*.txt
|
||||
/data/*.ttf
|
||||
/data/*.sfd
|
||||
/docs/
|
||||
/bench/bench
|
||||
/bench/icu
|
||||
/bench/unistring
|
||||
/test/normtest
|
||||
/test/graphemetest
|
||||
/test/printproperty
|
||||
/test/charwidth
|
||||
/test/misc
|
||||
/test/valid
|
||||
/test/iterate
|
||||
/test/case
|
||||
/test/iscase
|
||||
/test/custom
|
||||
/tmp/
|
||||
/mingw_static/
|
||||
/mingw_shared/
|
||||
/msvc_shared/
|
||||
/msvc_static/
|
||||
/build/
|
||||
NEWS-update.jl
|
||||
libutf8proc.pc
|
22
3rdparty/utf8proc/.travis.yml
vendored
22
3rdparty/utf8proc/.travis.yml
vendored
@ -1,22 +0,0 @@
|
||||
language: c
|
||||
compiler:
|
||||
- gcc
|
||||
- clang
|
||||
notifications:
|
||||
email: false
|
||||
before_install:
|
||||
- sudo add-apt-repository ppa:staticfloat/julia-deps -y
|
||||
- sudo add-apt-repository ppa:staticfloat/juliareleases -y
|
||||
- sudo apt-get update -qq -y
|
||||
- sudo apt-get install libpcre3-dev julia fontforge -y
|
||||
script:
|
||||
- make manifest && diff MANIFEST.new MANIFEST
|
||||
- make check
|
||||
- make data && diff data/utf8proc_data.c.new utf8proc_data.c
|
||||
- make clean && git status --ignored --porcelain && test -z "$(git status --ignored --porcelain)"
|
||||
- (mkdir build_static && cd build_static && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make)
|
||||
- (mkdir build_shared && cd build_shared && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_SHARED_LIBS=ON && make)
|
||||
env:
|
||||
# use JuliaLang caching (https://github.com/staticfloat/cache.julialang.org)
|
||||
# so that Travis builds do not depend on anyone's flaky servers but our own
|
||||
- URLCACHE=https://cache.julialang.org/ CFLAGS="-O2 -Werror -Wmissing-prototypes"
|
46
3rdparty/utf8proc/CMakeLists.txt
vendored
46
3rdparty/utf8proc/CMakeLists.txt
vendored
@ -1,20 +1,24 @@
|
||||
cmake_minimum_required (VERSION 2.8.12)
|
||||
cmake_minimum_required (VERSION 3.0.0)
|
||||
|
||||
include (utils.cmake)
|
||||
|
||||
disallow_intree_builds()
|
||||
|
||||
project (utf8proc C)
|
||||
if (POLICY CMP0048)
|
||||
cmake_policy (SET CMP0048 NEW)
|
||||
endif ()
|
||||
project (utf8proc VERSION 2.9.0 LANGUAGES C)
|
||||
|
||||
# This is the ABI version number, which may differ from the
|
||||
# API version number (defined in utf8proc.h).
|
||||
# API version number (defined in utf8proc.h and above).
|
||||
# Be sure to also update these in Makefile and MANIFEST!
|
||||
set(SO_MAJOR 2)
|
||||
set(SO_MINOR 4)
|
||||
set(SO_PATCH 1)
|
||||
set(SO_MAJOR 3)
|
||||
set(SO_MINOR 0)
|
||||
set(SO_PATCH 0)
|
||||
|
||||
option(UTF8PROC_INSTALL "Enable installation of utf8proc" On)
|
||||
option(UTF8PROC_ENABLE_TESTING "Enable testing of utf8proc" Off)
|
||||
option(LIB_FUZZING_ENGINE "Fuzzing engine to link against" Off)
|
||||
|
||||
add_library (utf8proc
|
||||
utf8proc.c
|
||||
@ -50,23 +54,23 @@ set_target_properties (utf8proc PROPERTIES
|
||||
)
|
||||
|
||||
if (UTF8PROC_INSTALL)
|
||||
include(GNUInstallDirs)
|
||||
install(FILES utf8proc.h DESTINATION "${CMAKE_INSTALL_FULL_INCLUDEDIR}")
|
||||
install(TARGETS utf8proc
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
|
||||
install(
|
||||
FILES
|
||||
"${PROJECT_SOURCE_DIR}/utf8proc.h"
|
||||
DESTINATION include)
|
||||
ARCHIVE DESTINATION "${CMAKE_INSTALL_FULL_LIBDIR}"
|
||||
LIBRARY DESTINATION "${CMAKE_INSTALL_FULL_LIBDIR}"
|
||||
RUNTIME DESTINATION "${CMAKE_INSTALL_FULL_BINDIR}"
|
||||
)
|
||||
configure_file(libutf8proc.pc.cmakein libutf8proc.pc @ONLY)
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libutf8proc.pc" DESTINATION "${CMAKE_INSTALL_FULL_LIBDIR}/pkgconfig")
|
||||
endif()
|
||||
|
||||
if(UTF8PROC_ENABLE_TESTING)
|
||||
enable_testing()
|
||||
file(MAKE_DIRECTORY data)
|
||||
set(UNICODE_VERSION 13.0.0)
|
||||
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt data/NormalizationTest.txt SHOW_PROGRESS)
|
||||
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTest.txt SHOW_PROGRESS)
|
||||
set(UNICODE_VERSION 15.1.0)
|
||||
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt ${CMAKE_BINARY_DIR}/data/NormalizationTest.txt SHOW_PROGRESS)
|
||||
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt ${CMAKE_BINARY_DIR}/data/GraphemeBreakTest.txt SHOW_PROGRESS)
|
||||
add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c)
|
||||
target_link_libraries(case utf8proc)
|
||||
add_executable(custom test/tests.h test/tests.c utf8proc.h test/custom.c)
|
||||
@ -98,4 +102,12 @@ if(UTF8PROC_ENABLE_TESTING)
|
||||
target_link_libraries(normtest utf8proc)
|
||||
add_test(utf8proc.testgraphemetest graphemetest data/GraphemeBreakTest.txt)
|
||||
add_test(utf8proc.testnormtest normtest data/NormalizationTest.txt)
|
||||
|
||||
if(LIB_FUZZING_ENGINE)
|
||||
add_executable(fuzzer utf8proc.h test/fuzzer.c)
|
||||
target_link_libraries(fuzzer ${LIB_FUZZING_ENGINE} utf8proc)
|
||||
else()
|
||||
add_executable(fuzzer utf8proc.h test/fuzz_main.c test/fuzzer.c)
|
||||
target_link_libraries(fuzzer utf8proc)
|
||||
endif()
|
||||
endif()
|
||||
|
169
3rdparty/utf8proc/Doxyfile
vendored
169
3rdparty/utf8proc/Doxyfile
vendored
@ -1,4 +1,4 @@
|
||||
# Doxyfile 1.8.18
|
||||
# Doxyfile 1.9.1
|
||||
|
||||
# This file describes the settings to be used by the documentation system
|
||||
# doxygen (www.doxygen.org) for a project.
|
||||
@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
|
||||
# title of most generated pages and in a few other places.
|
||||
# The default value is: My Project.
|
||||
|
||||
PROJECT_NAME = "utf8proc"
|
||||
PROJECT_NAME = utf8proc
|
||||
|
||||
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
|
||||
# could be handy for archiving the generated documentation or if some version
|
||||
@ -217,6 +217,14 @@ QT_AUTOBRIEF = NO
|
||||
|
||||
MULTILINE_CPP_IS_BRIEF = NO
|
||||
|
||||
# By default Python docstrings are displayed as preformatted text and doxygen's
|
||||
# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
|
||||
# doxygen's special commands can be used and the contents of the docstring
|
||||
# documentation blocks is shown as doxygen documentation.
|
||||
# The default value is: YES.
|
||||
|
||||
PYTHON_DOCSTRING = YES
|
||||
|
||||
# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
|
||||
# documentation from any documented member that it re-implements.
|
||||
# The default value is: YES.
|
||||
@ -305,7 +313,10 @@ OPTIMIZE_OUTPUT_SLICE = NO
|
||||
# Note: For files without extension you can use no_extension as a placeholder.
|
||||
#
|
||||
# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
|
||||
# the files are not read by doxygen.
|
||||
# the files are not read by doxygen. When specifying no_extension you should add
|
||||
# * to the FILE_PATTERNS.
|
||||
#
|
||||
# Note see also the list of default file extension mappings.
|
||||
|
||||
EXTENSION_MAPPING =
|
||||
|
||||
@ -439,6 +450,19 @@ TYPEDEF_HIDES_STRUCT = NO
|
||||
|
||||
LOOKUP_CACHE_SIZE = 0
|
||||
|
||||
# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
|
||||
# during processing. When set to 0 doxygen will based this on the number of
|
||||
# cores available in the system. You can set it explicitly to a value larger
|
||||
# than 0 to get more control over the balance between CPU load and processing
|
||||
# speed. At this moment only the input processing can be done using multiple
|
||||
# threads. Since this is still an experimental feature the default is set to 1,
|
||||
# which efficively disables parallel processing. Please report any issues you
|
||||
# encounter. Generating dot graphs in parallel is controlled by the
|
||||
# DOT_NUM_THREADS setting.
|
||||
# Minimum value: 0, maximum value: 32, default value: 1.
|
||||
|
||||
NUM_PROC_THREADS = 1
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Build related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
@ -502,6 +526,13 @@ EXTRACT_LOCAL_METHODS = NO
|
||||
|
||||
EXTRACT_ANON_NSPACES = NO
|
||||
|
||||
# If this flag is set to YES, the name of an unnamed parameter in a declaration
|
||||
# will be determined by the corresponding definition. By default unnamed
|
||||
# parameters remain unnamed in the output.
|
||||
# The default value is: YES.
|
||||
|
||||
RESOLVE_UNNAMED_PARAMS = YES
|
||||
|
||||
# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
|
||||
# undocumented members inside documented classes or files. If set to NO these
|
||||
# members will be included in the various overviews, but no documentation
|
||||
@ -539,11 +570,18 @@ HIDE_IN_BODY_DOCS = NO
|
||||
|
||||
INTERNAL_DOCS = NO
|
||||
|
||||
# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
|
||||
# names in lower-case letters. If set to YES, upper-case letters are also
|
||||
# allowed. This is useful if you have classes or files whose names only differ
|
||||
# in case and if your file system supports case sensitive file names. Windows
|
||||
# (including Cygwin) ands Mac users are advised to set this option to NO.
|
||||
# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
|
||||
# able to match the capabilities of the underlying filesystem. In case the
|
||||
# filesystem is case sensitive (i.e. it supports files in the same directory
|
||||
# whose names only differ in casing), the option must be set to YES to properly
|
||||
# deal with such files in case they appear in the input. For filesystems that
|
||||
# are not case sensitive the option should be be set to NO to properly deal with
|
||||
# output files written for symbols that only differ in casing, such as for two
|
||||
# classes, one named CLASS and the other named Class, and to also support
|
||||
# references to files without having to specify the exact matching casing. On
|
||||
# Windows (including Cygwin) and MacOS, users should typically set this option
|
||||
# to NO, whereas on Linux or other Unix flavors it should typically be set to
|
||||
# YES.
|
||||
# The default value is: system dependent.
|
||||
|
||||
CASE_SENSE_NAMES = NO
|
||||
@ -782,7 +820,10 @@ WARN_IF_DOC_ERROR = YES
|
||||
WARN_NO_PARAMDOC = NO
|
||||
|
||||
# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
|
||||
# a warning is encountered.
|
||||
# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
|
||||
# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
|
||||
# at the end of the doxygen process doxygen will return with a non-zero status.
|
||||
# Possible values are: NO, YES and FAIL_ON_WARNINGS.
|
||||
# The default value is: NO.
|
||||
|
||||
WARN_AS_ERROR = NO
|
||||
@ -818,8 +859,8 @@ INPUT =
|
||||
# This tag can be used to specify the character encoding of the source files
|
||||
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
|
||||
# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
|
||||
# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
|
||||
# possible encodings.
|
||||
# documentation (see:
|
||||
# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
|
||||
# The default value is: UTF-8.
|
||||
|
||||
INPUT_ENCODING = UTF-8
|
||||
@ -832,13 +873,15 @@ INPUT_ENCODING = UTF-8
|
||||
# need to set EXTENSION_MAPPING for the extension otherwise the files are not
|
||||
# read by doxygen.
|
||||
#
|
||||
# Note the list of default checked file patterns might differ from the list of
|
||||
# default file extension mappings.
|
||||
#
|
||||
# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
|
||||
# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
|
||||
# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
|
||||
# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
|
||||
# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
|
||||
# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd,
|
||||
# *.vhdl, *.ucf, *.qsf and *.ice.
|
||||
# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
|
||||
# *.ucf, *.qsf and *.ice.
|
||||
|
||||
FILE_PATTERNS =
|
||||
|
||||
@ -1065,13 +1108,6 @@ VERBATIM_HEADERS = YES
|
||||
|
||||
ALPHABETICAL_INDEX = YES
|
||||
|
||||
# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
|
||||
# which the alphabetical index list will be split.
|
||||
# Minimum value: 1, maximum value: 20, default value: 5.
|
||||
# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
|
||||
|
||||
COLS_IN_ALPHA_INDEX = 5
|
||||
|
||||
# In case all classes in a project start with a common prefix, all classes will
|
||||
# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
|
||||
# can be used to specify a prefix (or a list of prefixes) that should be ignored
|
||||
@ -1242,10 +1278,11 @@ HTML_INDEX_NUM_ENTRIES = 100
|
||||
|
||||
# If the GENERATE_DOCSET tag is set to YES, additional index files will be
|
||||
# generated that can be used as input for Apple's Xcode 3 integrated development
|
||||
# environment (see: https://developer.apple.com/xcode/), introduced with OSX
|
||||
# 10.5 (Leopard). To create a documentation set, doxygen will generate a
|
||||
# Makefile in the HTML output directory. Running make will produce the docset in
|
||||
# that directory and running make install will install the docset in
|
||||
# environment (see:
|
||||
# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
|
||||
# create a documentation set, doxygen will generate a Makefile in the HTML
|
||||
# output directory. Running make will produce the docset in that directory and
|
||||
# running make install will install the docset in
|
||||
# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
|
||||
# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
|
||||
# genXcode/_index.html for more information.
|
||||
@ -1287,8 +1324,8 @@ DOCSET_PUBLISHER_NAME = Publisher
|
||||
# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
|
||||
# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
|
||||
# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
|
||||
# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
|
||||
# Windows.
|
||||
# (see:
|
||||
# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
|
||||
#
|
||||
# The HTML Help Workshop contains a compiler that can convert all HTML output
|
||||
# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
|
||||
@ -1318,7 +1355,7 @@ CHM_FILE =
|
||||
HHC_LOCATION =
|
||||
|
||||
# The GENERATE_CHI flag controls if a separate .chi index file is generated
|
||||
# (YES) or that it should be included in the master .chm file (NO).
|
||||
# (YES) or that it should be included in the main .chm file (NO).
|
||||
# The default value is: NO.
|
||||
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
|
||||
|
||||
@ -1363,7 +1400,8 @@ QCH_FILE =
|
||||
|
||||
# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
|
||||
# Project output. For more information please see Qt Help Project / Namespace
|
||||
# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
|
||||
# (see:
|
||||
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
|
||||
# The default value is: org.doxygen.Project.
|
||||
# This tag requires that the tag GENERATE_QHP is set to YES.
|
||||
|
||||
@ -1371,8 +1409,8 @@ QHP_NAMESPACE = org.doxygen.Project
|
||||
|
||||
# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
|
||||
# Help Project output. For more information please see Qt Help Project / Virtual
|
||||
# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
|
||||
# folders).
|
||||
# Folders (see:
|
||||
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
|
||||
# The default value is: doc.
|
||||
# This tag requires that the tag GENERATE_QHP is set to YES.
|
||||
|
||||
@ -1380,16 +1418,16 @@ QHP_VIRTUAL_FOLDER = doc
|
||||
|
||||
# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
|
||||
# filter to add. For more information please see Qt Help Project / Custom
|
||||
# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
|
||||
# filters).
|
||||
# Filters (see:
|
||||
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
|
||||
# This tag requires that the tag GENERATE_QHP is set to YES.
|
||||
|
||||
QHP_CUST_FILTER_NAME =
|
||||
|
||||
# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
|
||||
# custom filter to add. For more information please see Qt Help Project / Custom
|
||||
# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
|
||||
# filters).
|
||||
# Filters (see:
|
||||
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
|
||||
# This tag requires that the tag GENERATE_QHP is set to YES.
|
||||
|
||||
QHP_CUST_FILTER_ATTRS =
|
||||
@ -1401,9 +1439,9 @@ QHP_CUST_FILTER_ATTRS =
|
||||
|
||||
QHP_SECT_FILTER_ATTRS =
|
||||
|
||||
# The QHG_LOCATION tag can be used to specify the location of Qt's
|
||||
# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
|
||||
# generated .qhp file.
|
||||
# The QHG_LOCATION tag can be used to specify the location (absolute path
|
||||
# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
|
||||
# run qhelpgenerator on the generated .qhp file.
|
||||
# This tag requires that the tag GENERATE_QHP is set to YES.
|
||||
|
||||
QHG_LOCATION =
|
||||
@ -1484,8 +1522,8 @@ EXT_LINKS_IN_WINDOW = NO
|
||||
# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
|
||||
# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
|
||||
# the HTML output. These images will generally look nicer at scaled resolutions.
|
||||
# Possible values are: png The default and svg Looks nicer but requires the
|
||||
# pdf2svg tool.
|
||||
# Possible values are: png (the default) and svg (looks nicer but requires the
|
||||
# pdf2svg or inkscape tool).
|
||||
# The default value is: png.
|
||||
# This tag requires that the tag GENERATE_HTML is set to YES.
|
||||
|
||||
@ -1530,7 +1568,7 @@ USE_MATHJAX = NO
|
||||
|
||||
# When MathJax is enabled you can set the default output format to be used for
|
||||
# the MathJax output. See the MathJax site (see:
|
||||
# http://docs.mathjax.org/en/latest/output.html) for more details.
|
||||
# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
|
||||
# Possible values are: HTML-CSS (which is slower, but has the best
|
||||
# compatibility), NativeMML (i.e. MathML) and SVG.
|
||||
# The default value is: HTML-CSS.
|
||||
@ -1560,7 +1598,8 @@ MATHJAX_EXTENSIONS =
|
||||
|
||||
# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
|
||||
# of code that will be used on startup of the MathJax code. See the MathJax site
|
||||
# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
|
||||
# (see:
|
||||
# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
|
||||
# example see the documentation.
|
||||
# This tag requires that the tag USE_MATHJAX is set to YES.
|
||||
|
||||
@ -1607,7 +1646,8 @@ SERVER_BASED_SEARCH = NO
|
||||
#
|
||||
# Doxygen ships with an example indexer (doxyindexer) and search engine
|
||||
# (doxysearch.cgi) which are based on the open source search engine library
|
||||
# Xapian (see: https://xapian.org/).
|
||||
# Xapian (see:
|
||||
# https://xapian.org/).
|
||||
#
|
||||
# See the section "External Indexing and Searching" for details.
|
||||
# The default value is: NO.
|
||||
@ -1620,8 +1660,9 @@ EXTERNAL_SEARCH = NO
|
||||
#
|
||||
# Doxygen ships with an example indexer (doxyindexer) and search engine
|
||||
# (doxysearch.cgi) which are based on the open source search engine library
|
||||
# Xapian (see: https://xapian.org/). See the section "External Indexing and
|
||||
# Searching" for details.
|
||||
# Xapian (see:
|
||||
# https://xapian.org/). See the section "External Indexing and Searching" for
|
||||
# details.
|
||||
# This tag requires that the tag SEARCHENGINE is set to YES.
|
||||
|
||||
SEARCHENGINE_URL =
|
||||
@ -1785,9 +1826,11 @@ LATEX_EXTRA_FILES =
|
||||
|
||||
PDF_HYPERLINKS = YES
|
||||
|
||||
# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
|
||||
# the PDF file directly from the LaTeX files. Set this option to YES, to get a
|
||||
# higher quality PDF documentation.
|
||||
# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
|
||||
# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
|
||||
# files. Set this option to YES, to get a higher quality PDF documentation.
|
||||
#
|
||||
# See also section LATEX_CMD_NAME for selecting the engine.
|
||||
# The default value is: YES.
|
||||
# This tag requires that the tag GENERATE_LATEX is set to YES.
|
||||
|
||||
@ -2298,10 +2341,32 @@ UML_LOOK = NO
|
||||
# but if the number exceeds 15, the total amount of fields shown is limited to
|
||||
# 10.
|
||||
# Minimum value: 0, maximum value: 100, default value: 10.
|
||||
# This tag requires that the tag HAVE_DOT is set to YES.
|
||||
# This tag requires that the tag UML_LOOK is set to YES.
|
||||
|
||||
UML_LIMIT_NUM_FIELDS = 10
|
||||
|
||||
# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
|
||||
# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
|
||||
# tag is set to YES, doxygen will add type and arguments for attributes and
|
||||
# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
|
||||
# will not generate fields with class member information in the UML graphs. The
|
||||
# class diagrams will look similar to the default class diagrams but using UML
|
||||
# notation for the relationships.
|
||||
# Possible values are: NO, YES and NONE.
|
||||
# The default value is: NO.
|
||||
# This tag requires that the tag UML_LOOK is set to YES.
|
||||
|
||||
DOT_UML_DETAILS = NO
|
||||
|
||||
# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
|
||||
# to display on a single line. If the actual line length exceeds this threshold
|
||||
# significantly it will wrapped across multiple lines. Some heuristics are apply
|
||||
# to avoid ugly line breaks.
|
||||
# Minimum value: 0, maximum value: 1000, default value: 17.
|
||||
# This tag requires that the tag HAVE_DOT is set to YES.
|
||||
|
||||
DOT_WRAP_THRESHOLD = 17
|
||||
|
||||
# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
|
||||
# collaboration graphs will show the relations between templates and their
|
||||
# instances.
|
||||
@ -2491,9 +2556,11 @@ DOT_MULTI_TARGETS = NO
|
||||
|
||||
GENERATE_LEGEND = YES
|
||||
|
||||
# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
|
||||
# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
|
||||
# files that are used to generate the various graphs.
|
||||
#
|
||||
# Note: This setting is not only used for dot files but also for msc and
|
||||
# plantuml temporary files.
|
||||
# The default value is: YES.
|
||||
# This tag requires that the tag HAVE_DOT is set to YES.
|
||||
|
||||
DOT_CLEANUP = YES
|
||||
|
2
3rdparty/utf8proc/LICENSE.md
vendored
2
3rdparty/utf8proc/LICENSE.md
vendored
@ -7,7 +7,7 @@ whose copyright and license statements are reproduced below, all new
|
||||
work on the utf8proc library is licensed under the [MIT "expat"
|
||||
license](http://opensource.org/licenses/MIT):
|
||||
|
||||
*Copyright © 2014-2019 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
|
||||
*Copyright © 2014-2021 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
|
6
3rdparty/utf8proc/MANIFEST
vendored
6
3rdparty/utf8proc/MANIFEST
vendored
@ -2,8 +2,8 @@ include/
|
||||
include/utf8proc.h
|
||||
lib/
|
||||
lib/libutf8proc.a
|
||||
lib/libutf8proc.so -> libutf8proc.so.2.4.1
|
||||
lib/libutf8proc.so.2 -> libutf8proc.so.2.4.1
|
||||
lib/libutf8proc.so.2.4.1
|
||||
lib/libutf8proc.so -> libutf8proc.so.3.0.0
|
||||
lib/libutf8proc.so.2 -> libutf8proc.so.3.0.0
|
||||
lib/libutf8proc.so.3.0.0
|
||||
lib/pkgconfig/
|
||||
lib/pkgconfig/libutf8proc.pc
|
||||
|
27
3rdparty/utf8proc/Makefile
vendored
27
3rdparty/utf8proc/Makefile
vendored
@ -11,7 +11,7 @@ PERL=perl
|
||||
CFLAGS ?= -O2
|
||||
PICFLAG = -fPIC
|
||||
C99FLAG = -std=c99
|
||||
WCFLAGS = -Wall -Wextra -pedantic
|
||||
WCFLAGS = -Wsign-conversion -Wall -Wextra -pedantic
|
||||
UCFLAGS = $(CPPFLAGS) $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS $(UTF8PROC_DEFINES)
|
||||
LDFLAG_SHARED = -shared
|
||||
SOFLAG = -Wl,-soname
|
||||
@ -22,9 +22,12 @@ SOFLAG = -Wl,-soname
|
||||
# compatibility is broken, even if the API is backward-compatible.
|
||||
# The API version number is defined in utf8proc.h.
|
||||
# Be sure to also update these ABI versions in MANIFEST and CMakeLists.txt!
|
||||
MAJOR=2
|
||||
MINOR=4
|
||||
PATCH=1
|
||||
MAJOR=3
|
||||
MINOR=0
|
||||
PATCH=0
|
||||
|
||||
# api version (also in utf8proc.h and CMakeLists.txt)
|
||||
VERSION=2.9.0
|
||||
|
||||
OS := $(shell uname)
|
||||
ifeq ($(OS),Darwin) # MacOS X
|
||||
@ -78,7 +81,7 @@ utf8proc.o: utf8proc.h utf8proc.c utf8proc_data.c
|
||||
|
||||
libutf8proc.a: utf8proc.o
|
||||
rm -f libutf8proc.a
|
||||
$(AR) rs libutf8proc.a utf8proc.o
|
||||
$(AR) crs libutf8proc.a utf8proc.o
|
||||
|
||||
libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH): utf8proc.o
|
||||
$(CC) $(LDFLAGS) $(LDFLAG_SHARED) -o $@ $(SOFLAG) -Wl,libutf8proc.so.$(MAJOR) utf8proc.o
|
||||
@ -168,6 +171,20 @@ test/custom: test/custom.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||
test/misc: test/misc.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||
$(CC) $(UCFLAGS) $(LDFLAGS) -DUNICODE_VERSION='"'`$(PERL) -ne "/^UNICODE_VERSION=/ and print $$';" data/Makefile`'"' test/misc.c test/tests.o utf8proc.o -o $@
|
||||
|
||||
# make release tarball from master branch
|
||||
dist:
|
||||
git archive master --prefix=utf8proc-$(VERSION)/ -o utf8proc-$(VERSION).tar.gz
|
||||
|
||||
# build tarball, make sure it passes checks, and make sure version numbers are consistent
|
||||
distcheck: dist
|
||||
test `grep UTF8PROC_VERSION utf8proc.h | cut -d' ' -f3 | tr '\n' .` = $(VERSION). || exit 1
|
||||
test `grep "utf8proc VERSION" CMakeLists.txt |cut -d' ' -f 4` = $(VERSION) || exit 1
|
||||
test `grep libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) MANIFEST | wc -l` = 3 || exit 1
|
||||
test `grep 'set(SO_' CMakeLists.txt |cut -d' ' -f2 | tr -d ')' | tr '\n' '.'` = $(MAJOR).$(MINOR).$(PATCH). || exit 1
|
||||
tar xzf utf8proc-$(VERSION).tar.gz
|
||||
make -C utf8proc-$(VERSION) check
|
||||
rm -rf utf8proc-$(VERSION)
|
||||
|
||||
check: test/normtest data/NormalizationTest.txt data/Lowercase.txt data/Uppercase.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/iscase test/custom test/charwidth test/misc test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
|
||||
$(MAKE) -C bench
|
||||
test/normtest data/NormalizationTest.txt
|
||||
|
29
3rdparty/utf8proc/NEWS.md
vendored
29
3rdparty/utf8proc/NEWS.md
vendored
@ -1,5 +1,29 @@
|
||||
# utf8proc release history #
|
||||
|
||||
## Version 2.9.0 ##
|
||||
|
||||
2023-10-20
|
||||
|
||||
- Unicode 15.1 support ([#253]).
|
||||
|
||||
## Version 2.8.0 ##
|
||||
|
||||
2022-10-30
|
||||
|
||||
- Unicode 15 support ([#247]).
|
||||
|
||||
## Version 2.7.0 ##
|
||||
|
||||
2021-12-16
|
||||
|
||||
- Unicode 14 support ([#233]).
|
||||
|
||||
- Support `GNUInstallDirs` in CMake build ([#159]).
|
||||
|
||||
- `cmake` build now installs `pkg-config` file ([#224]).
|
||||
|
||||
- Various build and portability improvements.
|
||||
|
||||
## Version 2.6.1 ##
|
||||
|
||||
2020-12-15
|
||||
@ -409,8 +433,13 @@ Release of version 1.0.1
|
||||
[#152]: https://github.com/JuliaStrings/utf8proc/issues/152
|
||||
[#154]: https://github.com/JuliaStrings/utf8proc/issues/154
|
||||
[#156]: https://github.com/JuliaStrings/utf8proc/issues/156
|
||||
[#159]: https://github.com/JuliaStrings/utf8proc/issues/159
|
||||
[#167]: https://github.com/JuliaStrings/utf8proc/issues/167
|
||||
[#173]: https://github.com/JuliaStrings/utf8proc/issues/173
|
||||
[#179]: https://github.com/JuliaStrings/utf8proc/issues/179
|
||||
[#196]: https://github.com/JuliaStrings/utf8proc/issues/196
|
||||
[#205]: https://github.com/JuliaStrings/utf8proc/issues/205
|
||||
[#224]: https://github.com/JuliaStrings/utf8proc/issues/224
|
||||
[#233]: https://github.com/JuliaStrings/utf8proc/issues/233
|
||||
[#247]: https://github.com/JuliaStrings/utf8proc/issues/247
|
||||
[#253]: https://github.com/JuliaStrings/utf8proc/issues/253
|
||||
|
9
3rdparty/utf8proc/README.md
vendored
9
3rdparty/utf8proc/README.md
vendored
@ -1,5 +1,5 @@
|
||||
# utf8proc
|
||||
[](https://travis-ci.org/JuliaStrings/utf8proc)
|
||||
[](https://github.com/JuliaStrings/utf8proc/actions/workflows/build-ci.yml)
|
||||
[](https://ci.appveyor.com/project/StevenGJohnson/utf8proc)
|
||||
|
||||
[utf8proc](http://juliastrings.github.io/utf8proc/) is a small, clean C
|
||||
@ -38,9 +38,8 @@ For compilation of the C library, run `make`. You can also install the library
|
||||
Alternatively, you can compile with `cmake`, e.g. by
|
||||
```sh
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
cmake -S . -B build
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
### Using other compilers
|
||||
@ -60,7 +59,7 @@ The C library is found in this directory after successful compilation
|
||||
and is named `libutf8proc.a` (for the static library) and
|
||||
`libutf8proc.so` (for the dynamic library).
|
||||
|
||||
The Unicode version supported is 13.0.0.
|
||||
The Unicode version supported is 15.1.0.
|
||||
|
||||
For Unicode normalizations, the following options are used:
|
||||
|
||||
|
2
3rdparty/utf8proc/data/Makefile
vendored
2
3rdparty/utf8proc/data/Makefile
vendored
@ -22,7 +22,7 @@ CharWidths.txt: charwidths.jl EastAsianWidth.txt
|
||||
$(JULIA) charwidths.jl > $@
|
||||
|
||||
# Unicode data version (must also update utf8proc_unicode_version function)
|
||||
UNICODE_VERSION=13.0.0
|
||||
UNICODE_VERSION=15.1.0
|
||||
|
||||
UnicodeData.txt:
|
||||
$(CURL) $(CURLFLAGS) -o $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
|
||||
|
57
3rdparty/utf8proc/data/data_generator.rb
vendored
Normal file → Executable file
57
3rdparty/utf8proc/data/data_generator.rb
vendored
Normal file → Executable file
@ -67,7 +67,7 @@
|
||||
# authorization of the copyright holder.
|
||||
|
||||
|
||||
$ignorable_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Default_Ignorable_Code_Point.*?# Total code points:/m]
|
||||
$ignorable_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Default_Ignorable_Code_Point.*?# Total code points:/m]
|
||||
$ignorable = []
|
||||
$ignorable_list.each_line do |entry|
|
||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||
@ -77,7 +77,7 @@ $ignorable_list.each_line do |entry|
|
||||
end
|
||||
end
|
||||
|
||||
$uppercase_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Uppercase.*?# Total code points:/m]
|
||||
$uppercase_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Uppercase.*?# Total code points:/m]
|
||||
$uppercase = []
|
||||
$uppercase_list.each_line do |entry|
|
||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||
@ -87,7 +87,7 @@ $uppercase_list.each_line do |entry|
|
||||
end
|
||||
end
|
||||
|
||||
$lowercase_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Lowercase.*?# Total code points:/m]
|
||||
$lowercase_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Lowercase.*?# Total code points:/m]
|
||||
$lowercase = []
|
||||
$lowercase_list.each_line do |entry|
|
||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||
@ -97,7 +97,33 @@ $lowercase_list.each_line do |entry|
|
||||
end
|
||||
end
|
||||
|
||||
$grapheme_boundclass_list = File.read("GraphemeBreakProperty.txt")
|
||||
$icb_linker_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Linker.*?# Total code points:/m]
|
||||
$icb = Hash.new("UTF8PROC_INDIC_CONJUNCT_BREAK_NONE")
|
||||
$icb_linker_list.each_line do |entry|
|
||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||
$1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER" }
|
||||
elsif entry =~ /^[0-9A-F]+/
|
||||
$icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER"
|
||||
end
|
||||
end
|
||||
$icb_consonant_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Consonant.*?# Total code points:/m]
|
||||
$icb_consonant_list.each_line do |entry|
|
||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||
$1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT" }
|
||||
elsif entry =~ /^[0-9A-F]+/
|
||||
$icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT"
|
||||
end
|
||||
end
|
||||
$icb_extend_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Extend.*?# Total code points:/m]
|
||||
$icb_extend_list.each_line do |entry|
|
||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||
$1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND" }
|
||||
elsif entry =~ /^[0-9A-F]+/
|
||||
$icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND"
|
||||
end
|
||||
end
|
||||
|
||||
$grapheme_boundclass_list = File.read("GraphemeBreakProperty.txt", :encoding => 'utf-8')
|
||||
$grapheme_boundclass = Hash.new("UTF8PROC_BOUNDCLASS_OTHER")
|
||||
$grapheme_boundclass_list.each_line do |entry|
|
||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([A-Za-z_]+)/
|
||||
@ -107,7 +133,7 @@ $grapheme_boundclass_list.each_line do |entry|
|
||||
end
|
||||
end
|
||||
|
||||
$emoji_data_list = File.read("emoji-data.txt")
|
||||
$emoji_data_list = File.read("emoji-data.txt", :encoding => 'utf-8')
|
||||
$emoji_data_list.each_line do |entry|
|
||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Extended_Pictographic\W/
|
||||
$1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC" }
|
||||
@ -120,7 +146,7 @@ $emoji_data_list.each_line do |entry|
|
||||
end
|
||||
end
|
||||
|
||||
$charwidth_list = File.read("CharWidths.txt")
|
||||
$charwidth_list = File.read("CharWidths.txt", :encoding => 'utf-8')
|
||||
$charwidth = Hash.new(0)
|
||||
$charwidth_list.each_line do |entry|
|
||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([0-9]+)/
|
||||
@ -130,13 +156,13 @@ $charwidth_list.each_line do |entry|
|
||||
end
|
||||
end
|
||||
|
||||
$exclusions = File.read("CompositionExclusions.txt")[/# \(1\) Script Specifics.*?# Total code points:/m]
|
||||
$exclusions = File.read("CompositionExclusions.txt", :encoding => 'utf-8')[/# \(1\) Script Specifics.*?# Total code points:/m]
|
||||
$exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex }
|
||||
|
||||
$excl_version = File.read("CompositionExclusions.txt")[/# \(2\) Post Composition Version precomposed characters.*?# Total code points:/m]
|
||||
$excl_version = File.read("CompositionExclusions.txt", :encoding => 'utf-8')[/# \(2\) Post Composition Version precomposed characters.*?# Total code points:/m]
|
||||
$excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex }
|
||||
|
||||
$case_folding_string = File.open("CaseFolding.txt", :encoding => 'utf-8').read
|
||||
$case_folding_string = File.read("CaseFolding.txt", :encoding => 'utf-8')
|
||||
$case_folding = {}
|
||||
$case_folding_string.chomp.split("\n").each do |line|
|
||||
next unless line =~ /([0-9A-F]+); [CF]; ([0-9A-F ]+);/i
|
||||
@ -174,13 +200,13 @@ def cpary2c(array)
|
||||
return "UINT16_MAX" if array.nil? || array.length == 0
|
||||
lencode = array.length - 1 #no sequence has len 0, so we encode len 1 as 0, len 2 as 1, ...
|
||||
array = cpary2utf16encoded(array)
|
||||
if lencode >= 7 #we have only 3 bits for the length (which is already cutting it close. might need to change it to 2 bits in future Unicode versions)
|
||||
if lencode >= 3 #we have only 2 bits for the length
|
||||
array = [lencode] + array
|
||||
lencode = 7
|
||||
lencode = 3
|
||||
end
|
||||
idx = pushary(array)
|
||||
raise "Array index out of bound" if idx > 0x1FFF
|
||||
return "#{idx | (lencode << 13)}"
|
||||
raise "Array index out of bound" if idx > 0x3FFF
|
||||
return "#{idx | (lencode << 14)}"
|
||||
end
|
||||
def singlecpmap(cp)
|
||||
return "UINT16_MAX" if cp == nil
|
||||
@ -249,7 +275,8 @@ class UnicodeChar
|
||||
"#{$ignorable.include?(code)}, " <<
|
||||
"#{%W[Zl Zp Cc Cf].include?(category) and not [0x200C, 0x200D].include?(category)}, " <<
|
||||
"#{$charwidth[code]}, 0, " <<
|
||||
"#{$grapheme_boundclass[code]}},\n"
|
||||
"#{$grapheme_boundclass[code]}, " <<
|
||||
"#{$icb[code]}},\n"
|
||||
end
|
||||
end
|
||||
|
||||
@ -415,7 +442,7 @@ end
|
||||
$stdout << "};\n\n"
|
||||
|
||||
$stdout << "static const utf8proc_property_t utf8proc_properties[] = {\n"
|
||||
$stdout << " {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER},\n"
|
||||
$stdout << " {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER, UTF8PROC_INDIC_CONJUNCT_BREAK_NONE},\n"
|
||||
properties.each { |line|
|
||||
$stdout << line
|
||||
}
|
||||
|
10
3rdparty/utf8proc/libutf8proc.pc.cmakein
vendored
Normal file
10
3rdparty/utf8proc/libutf8proc.pc.cmakein
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
prefix=@CMAKE_INSTALL_PREFIX@
|
||||
exec_prefix=@CMAKE_INSTALL_FULL_BINDIR@
|
||||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
|
||||
Name: libutf8proc
|
||||
Description: UTF8 processing
|
||||
Version: @PROJECT_VERSION@
|
||||
Libs: -L${libdir} -lutf8proc
|
||||
Cflags: -I${includedir} -DUTF8PROC_EXPORTS
|
12
3rdparty/utf8proc/test/case.c
vendored
12
3rdparty/utf8proc/test/case.c
vendored
@ -26,27 +26,27 @@ int main(int argc, char **argv)
|
||||
++error;
|
||||
}
|
||||
|
||||
if (sizeof(wint_t) > 2 || c < (1<<16)) {
|
||||
wint_t l0 = towlower(c), u0 = towupper(c);
|
||||
if (sizeof(wint_t) > 2 || (c < (1<<16) && u < (1<<16) && l < (1<<16))) {
|
||||
wint_t l0 = towlower((wint_t)c), u0 = towupper((wint_t)c);
|
||||
|
||||
/* OS unicode tables may be out of date. But if they
|
||||
do have a lower/uppercase mapping, hopefully it
|
||||
is correct? */
|
||||
if (l0 != c && l0 != l) {
|
||||
if (l0 != (wint_t)c && l0 != (wint_t)l) {
|
||||
fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
|
||||
l, c, l0);
|
||||
++error;
|
||||
}
|
||||
else if (l0 != l) { /* often true for out-of-date OS unicode */
|
||||
else if (l0 != (wint_t)l) { /* often true for out-of-date OS unicode */
|
||||
++better;
|
||||
/* printf("%x != towlower(%x) == %x\n", l, c, l0); */
|
||||
}
|
||||
if (u0 != c && u0 != u) {
|
||||
if (u0 != (wint_t)c && u0 != (wint_t)u) {
|
||||
fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
|
||||
u, c, u0);
|
||||
++error;
|
||||
}
|
||||
else if (u0 != u) { /* often true for out-of-date OS unicode */
|
||||
else if (u0 != (wint_t)u) { /* often true for out-of-date OS unicode */
|
||||
++better;
|
||||
/* printf("%x != towupper(%x) == %x\n", u, c, u0); */
|
||||
}
|
||||
|
54
3rdparty/utf8proc/test/fuzz_main.c
vendored
Normal file
54
3rdparty/utf8proc/test/fuzz_main.c
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* Fuzz target entry point, works without libFuzzer */
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
FILE *f;
|
||||
char *buf = NULL;
|
||||
long siz_buf;
|
||||
|
||||
if(argc < 2)
|
||||
{
|
||||
fprintf(stderr, "no input file\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
f = fopen(argv[1], "rb");
|
||||
if(f == NULL)
|
||||
{
|
||||
fprintf(stderr, "error opening input file %s\n", argv[1]);
|
||||
goto err;
|
||||
}
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
|
||||
siz_buf = ftell(f);
|
||||
rewind(f);
|
||||
|
||||
if(siz_buf < 1) goto err;
|
||||
|
||||
buf = (char*)malloc(siz_buf);
|
||||
if(buf == NULL)
|
||||
{
|
||||
fprintf(stderr, "malloc() failed\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if(fread(buf, siz_buf, 1, f) != 1)
|
||||
{
|
||||
fprintf(stderr, "fread() failed\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
(void)LLVMFuzzerTestOneInput((uint8_t*)buf, siz_buf);
|
||||
|
||||
err:
|
||||
free(buf);
|
||||
|
||||
return 0;
|
||||
}
|
84
3rdparty/utf8proc/test/fuzzer.c
vendored
Normal file
84
3rdparty/utf8proc/test/fuzzer.c
vendored
Normal file
@ -0,0 +1,84 @@
|
||||
#include <utf8proc.h>
|
||||
#include <string.h>
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
|
||||
{
|
||||
if(size < 1) return 0;
|
||||
|
||||
/* Avoid timeout with long inputs */
|
||||
if(size > (64 * 1024)) return 0;
|
||||
|
||||
if(data[size-1] != '\0') return 0;
|
||||
|
||||
const uint8_t* ptr = data;
|
||||
utf8proc_int32_t c = 0, c_prev = 0, state = 0;
|
||||
utf8proc_option_t options;
|
||||
utf8proc_ssize_t ret, bytes = 0;
|
||||
size_t len = strlen((const char*)data);
|
||||
|
||||
while(bytes != len)
|
||||
{
|
||||
ret = utf8proc_iterate(ptr, -1, &c);
|
||||
|
||||
if(ret < 0 || ret == 0) break;
|
||||
|
||||
bytes += ret;
|
||||
ptr += ret;
|
||||
|
||||
utf8proc_tolower(c);
|
||||
utf8proc_toupper(c);
|
||||
utf8proc_totitle(c);
|
||||
utf8proc_islower(c);
|
||||
utf8proc_isupper(c);
|
||||
utf8proc_charwidth(c);
|
||||
utf8proc_category(c);
|
||||
utf8proc_category_string(c);
|
||||
utf8proc_codepoint_valid(c);
|
||||
|
||||
utf8proc_grapheme_break(c_prev, c);
|
||||
utf8proc_grapheme_break_stateful(c_prev, c, &state);
|
||||
|
||||
c_prev = c;
|
||||
}
|
||||
|
||||
utf8proc_int32_t *copy = size >= 4 ? NULL : malloc(size);
|
||||
|
||||
if(copy)
|
||||
{
|
||||
size /= 4;
|
||||
|
||||
options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS | UTF8PROC_NLF2PS;
|
||||
memcpy(copy, data, size);
|
||||
utf8proc_normalize_utf32(copy, size, options);
|
||||
|
||||
options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS;
|
||||
memcpy(copy, data, size);
|
||||
utf8proc_normalize_utf32(copy, size, options);
|
||||
|
||||
options = UTF8PROC_STRIPCC | UTF8PROC_NLF2PS;
|
||||
memcpy(copy, data, size);
|
||||
utf8proc_normalize_utf32(copy, size, options);
|
||||
|
||||
options = UTF8PROC_STRIPCC;
|
||||
memcpy(copy, data, size);
|
||||
utf8proc_normalize_utf32(copy, size, options);
|
||||
|
||||
options = UTF8PROC_LUMP;
|
||||
memcpy(copy, data, size);
|
||||
utf8proc_normalize_utf32(copy, size, options);
|
||||
|
||||
options = 0;
|
||||
memcpy(copy, data, size);
|
||||
utf8proc_normalize_utf32(copy, size, options);
|
||||
|
||||
free(copy);
|
||||
}
|
||||
|
||||
free(utf8proc_NFD(data));
|
||||
free(utf8proc_NFC(data));
|
||||
free(utf8proc_NFKD(data));
|
||||
free(utf8proc_NFKC(data));
|
||||
free(utf8proc_NFKC_Casefold(data));
|
||||
|
||||
return 0;
|
||||
}
|
12
3rdparty/utf8proc/test/graphemetest.c
vendored
12
3rdparty/utf8proc/test/graphemetest.c
vendored
@ -43,7 +43,7 @@ void checkline(const char *_buf, bool verbose) {
|
||||
else
|
||||
i++;
|
||||
}
|
||||
glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
|
||||
glen = utf8proc_map(utf8, (utf8proc_ssize_t)j, &g, UTF8PROC_CHARBOUND);
|
||||
if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
|
||||
/* the test file contains surrogate codepoints, which are only for UTF-16 */
|
||||
printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
|
||||
@ -66,7 +66,7 @@ void checkline(const char *_buf, bool verbose) {
|
||||
utf8proc_bool expectbreak = false;
|
||||
do {
|
||||
utf8proc_int32_t codepoint;
|
||||
i += utf8proc_iterate(src + i, si - i, &codepoint);
|
||||
i += (size_t)utf8proc_iterate(src + i, (utf8proc_ssize_t)(si - i), &codepoint);
|
||||
check(codepoint >= 0, "invalid UTF-8 data");
|
||||
if (codepoint == 0x002F)
|
||||
expectbreak = true;
|
||||
@ -110,6 +110,7 @@ int main(int argc, char **argv)
|
||||
utf8proc_uint8_t *g;
|
||||
glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
|
||||
check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
|
||||
check(glen != 6, "mishandled u+ffff and u+fffe grapheme breaks");
|
||||
free(g);
|
||||
};
|
||||
|
||||
@ -118,6 +119,13 @@ int main(int argc, char **argv)
|
||||
checkline("/ 1f926 1f3fc 200d 2642 fe0f /", true); /* facepalm + pale skin + zwj + male sign + FE0F */
|
||||
checkline("/ 1f468 1f3fb 200d 1f91d 200d 1f468 1f3fd /", true); /* man face + pale skin + zwj + hand holding + zwj + man face + dark skin */
|
||||
|
||||
/* more GB9c tests */
|
||||
checkline("/ 0915 0300 094d 0300 0924 / 0915 /", true);
|
||||
checkline("/ 0915 0300 094d 0300 094d 0924 / 0915 /", true);
|
||||
checkline("/ 0915 0300 0300 / 0924 / 0915 /", true);
|
||||
checkline("/ 0915 0300 094d 0300 / 0078 /", true);
|
||||
checkline("/ 0300 094d 0300 / 0924 / 0915 /", true);
|
||||
|
||||
check(utf8proc_grapheme_break(0x03b1, 0x03b2), "failed 03b1 / 03b2 test");
|
||||
check(!utf8proc_grapheme_break(0x03b1, 0x0302), "failed 03b1 0302 test");
|
||||
|
||||
|
12
3rdparty/utf8proc/test/iterate.c
vendored
12
3rdparty/utf8proc/test/iterate.c
vendored
@ -8,7 +8,7 @@ static int error;
|
||||
#define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
|
||||
#define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
|
||||
|
||||
static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line)
|
||||
static void testbytes(utf8proc_uint8_t *buf, utf8proc_ssize_t len, utf8proc_ssize_t retval, int line)
|
||||
{
|
||||
utf8proc_int32_t out[16];
|
||||
utf8proc_ssize_t ret;
|
||||
@ -16,13 +16,13 @@ static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int
|
||||
/* Make a copy to ensure that memory is left uninitialized after "len"
|
||||
* bytes. This way, Valgrind can detect overreads.
|
||||
*/
|
||||
unsigned char tmp[16];
|
||||
memcpy(tmp, buf, len);
|
||||
utf8proc_uint8_t tmp[16];
|
||||
memcpy(tmp, buf, (unsigned long int)len);
|
||||
|
||||
tests++;
|
||||
if ((ret = utf8proc_iterate(tmp, len, out)) != retval) {
|
||||
fprintf(stderr, "Failed (%d):", line);
|
||||
for (int i = 0; i < len ; i++) {
|
||||
for (utf8proc_ssize_t i = 0; i < len ; i++) {
|
||||
fprintf(stderr, " 0x%02x", tmp[i]);
|
||||
}
|
||||
fprintf(stderr, " -> %zd\n", ret);
|
||||
@ -32,8 +32,8 @@ static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
uint32_t byt;
|
||||
unsigned char buf[16];
|
||||
utf8proc_int32_t byt;
|
||||
utf8proc_uint8_t buf[16];
|
||||
|
||||
(void) argc; (void) argv; /* unused */
|
||||
|
||||
|
13
3rdparty/utf8proc/test/ossfuzz.sh
vendored
Executable file
13
3rdparty/utf8proc/test/ossfuzz.sh
vendored
Executable file
@ -0,0 +1,13 @@
|
||||
#!/bin/bash -eu
|
||||
# This script is meant to be run by
|
||||
# https://github.com/google/oss-fuzz/blob/master/projects/utf8proc/Dockerfile
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DUTF8PROC_ENABLE_TESTING=ON -DLIB_FUZZING_ENGINE="$LIB_FUZZING_ENGINE"
|
||||
make -j$(nproc)
|
||||
|
||||
cp $SRC/utf8proc/build/fuzzer $OUT/utf8proc_fuzzer
|
||||
|
||||
find $SRC/utf8proc/test -name "*.txt" | \
|
||||
xargs zip $OUT/utf8proc_fuzzer_seed_corpus.zip
|
8
3rdparty/utf8proc/test/printproperty.c
vendored
8
3rdparty/utf8proc/test/printproperty.c
vendored
@ -8,12 +8,14 @@ int main(int argc, char **argv)
|
||||
|
||||
for (i = 1; i < argc; ++i) {
|
||||
utf8proc_uint8_t cstr[16], *map;
|
||||
unsigned int c;
|
||||
utf8proc_uint32_t x;
|
||||
utf8proc_int32_t c;
|
||||
if (!strcmp(argv[i], "-V")) {
|
||||
printf("utf8proc version %s\n", utf8proc_version());
|
||||
continue;
|
||||
}
|
||||
check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
|
||||
check(sscanf(argv[i],"%x", &x) == 1, "invalid hex input %s", argv[i]);
|
||||
c = (utf8proc_int32_t)x;
|
||||
const utf8proc_property_t *p = utf8proc_get_property(c);
|
||||
|
||||
if (utf8proc_codepoint_valid(c))
|
||||
@ -37,6 +39,7 @@ int main(int argc, char **argv)
|
||||
" ignorable = %d\n"
|
||||
" control_boundary = %d\n"
|
||||
" boundclass = %d\n"
|
||||
" indic_conjunct_break = %d\n"
|
||||
" charwidth = %d\n",
|
||||
argv[i], (char*) cstr,
|
||||
utf8proc_category_string(c),
|
||||
@ -53,6 +56,7 @@ int main(int argc, char **argv)
|
||||
p->ignorable,
|
||||
p->control_boundary,
|
||||
p->boundclass,
|
||||
p->indic_conjunct_break,
|
||||
utf8proc_charwidth(c));
|
||||
free(map);
|
||||
}
|
||||
|
3
3rdparty/utf8proc/test/tests.c
vendored
3
3rdparty/utf8proc/test/tests.c
vendored
@ -29,7 +29,8 @@ size_t skipspaces(const unsigned char *buf, size_t i)
|
||||
in dest, returning the number of bytes read from buf */
|
||||
size_t encode(unsigned char *dest, const unsigned char *buf)
|
||||
{
|
||||
size_t i = 0, j, d = 0;
|
||||
size_t i = 0, j;
|
||||
utf8proc_ssize_t d = 0;
|
||||
for (;;) {
|
||||
int c;
|
||||
i = skipspaces(buf, i);
|
||||
|
85
3rdparty/utf8proc/utf8proc.c
vendored
85
3rdparty/utf8proc/utf8proc.c
vendored
@ -1,6 +1,6 @@
|
||||
/* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2019 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
||||
* Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
||||
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -101,7 +101,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
|
||||
return "13.0.0";
|
||||
return "15.1.0";
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
||||
@ -125,7 +125,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
|
||||
) {
|
||||
utf8proc_uint32_t uc;
|
||||
utf8proc_int32_t uc;
|
||||
const utf8proc_uint8_t *end;
|
||||
|
||||
*dst = -1;
|
||||
@ -137,7 +137,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
||||
return 1;
|
||||
}
|
||||
// Must be between 0xc2 and 0xf4 inclusive to be valid
|
||||
if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||
if ((utf8proc_uint32_t)(uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||
if (uc < 0xe0) { // 2-byte sequence
|
||||
// Must have valid continuation character
|
||||
if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||
@ -288,35 +288,54 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
|
||||
true; // GB999
|
||||
}
|
||||
|
||||
static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
|
||||
static utf8proc_bool grapheme_break_extended(int lbc, int tbc, int licb, int ticb, utf8proc_int32_t *state)
|
||||
{
|
||||
if (state) {
|
||||
int lbc_override;
|
||||
if (*state == UTF8PROC_BOUNDCLASS_START)
|
||||
*state = lbc_override = lbc;
|
||||
else
|
||||
lbc_override = *state;
|
||||
utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
|
||||
int state_bc, state_icb; /* boundclass and indic_conjunct_break state */
|
||||
if (*state == 0) { /* state initialization */
|
||||
state_bc = lbc;
|
||||
state_icb = licb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT ? licb : UTF8PROC_INDIC_CONJUNCT_BREAK_NONE;
|
||||
}
|
||||
else { /* lbc and licb are already encoded in *state */
|
||||
state_bc = *state & 0xff; // 1st byte of state is bound class
|
||||
state_icb = *state >> 8; // 2nd byte of state is indic conjunct break
|
||||
}
|
||||
|
||||
utf8proc_bool break_permitted = grapheme_break_simple(state_bc, tbc) &&
|
||||
!(state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER
|
||||
&& ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT); // GB9c
|
||||
|
||||
// Special support for GB9c. Don't break between two consonants
|
||||
// separated 1+ linker characters and 0+ extend characters in any order.
|
||||
// After a consonant, we enter LINKER state after at least one linker.
|
||||
if (ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT
|
||||
|| state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT
|
||||
|| state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND)
|
||||
state_icb = ticb;
|
||||
else if (state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER)
|
||||
state_icb = ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND ?
|
||||
UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER : ticb;
|
||||
|
||||
// Special support for GB 12/13 made possible by GB999. After two RI
|
||||
// class codepoints we want to force a break. Do this by resetting the
|
||||
// second RI's bound class to UTF8PROC_BOUNDCLASS_OTHER, to force a break
|
||||
// after that character according to GB999 (unless of course such a break is
|
||||
// forbidden by a different rule such as GB9).
|
||||
if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
|
||||
*state = UTF8PROC_BOUNDCLASS_OTHER;
|
||||
if (state_bc == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
|
||||
state_bc = UTF8PROC_BOUNDCLASS_OTHER;
|
||||
// Special support for GB11 (emoji extend* zwj / emoji)
|
||||
else if (*state == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
|
||||
else if (state_bc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
|
||||
if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
|
||||
*state = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
|
||||
state_bc = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
|
||||
else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
|
||||
*state = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
|
||||
state_bc = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
|
||||
else
|
||||
*state = tbc;
|
||||
state_bc = tbc;
|
||||
}
|
||||
else
|
||||
*state = tbc;
|
||||
state_bc = tbc;
|
||||
|
||||
*state = state_bc + (state_icb << 8);
|
||||
return break_permitted;
|
||||
}
|
||||
else
|
||||
@ -326,8 +345,12 @@ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t
|
||||
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
|
||||
utf8proc_int32_t c1, utf8proc_int32_t c2, utf8proc_int32_t *state) {
|
||||
|
||||
return grapheme_break_extended(utf8proc_get_property(c1)->boundclass,
|
||||
utf8proc_get_property(c2)->boundclass,
|
||||
const utf8proc_property_t *p1 = utf8proc_get_property(c1);
|
||||
const utf8proc_property_t *p2 = utf8proc_get_property(c2);
|
||||
return grapheme_break_extended(p1->boundclass,
|
||||
p2->boundclass,
|
||||
p1->indic_conjunct_break,
|
||||
p2->indic_conjunct_break,
|
||||
state);
|
||||
}
|
||||
|
||||
@ -356,9 +379,9 @@ static utf8proc_int32_t seqindex_decode_index(const utf8proc_uint32_t seqindex)
|
||||
|
||||
static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
||||
utf8proc_ssize_t written = 0;
|
||||
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
|
||||
int len = seqindex >> 13;
|
||||
if (len >= 7) {
|
||||
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x3FFF];
|
||||
int len = seqindex >> 14;
|
||||
if (len >= 3) {
|
||||
len = *entry;
|
||||
entry++;
|
||||
}
|
||||
@ -376,19 +399,19 @@ static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqinde
|
||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
|
||||
{
|
||||
utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
|
||||
return cl != UINT16_MAX ? seqindex_decode_index(cl) : c;
|
||||
return cl != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cl) : c;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
|
||||
{
|
||||
utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
|
||||
return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
|
||||
return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
|
||||
{
|
||||
utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
|
||||
return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
|
||||
return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c)
|
||||
@ -410,7 +433,7 @@ UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
|
||||
return utf8proc_get_property(c)->category;
|
||||
return (utf8proc_category_t) utf8proc_get_property(c)->category;
|
||||
}
|
||||
|
||||
UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
|
||||
@ -420,7 +443,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
|
||||
|
||||
#define utf8proc_decompose_lump(replacement_uc) \
|
||||
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
||||
options & ~UTF8PROC_LUMP, last_boundclass)
|
||||
options & ~(unsigned int)UTF8PROC_LUMP, last_boundclass)
|
||||
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
||||
const utf8proc_property_t *property;
|
||||
@ -498,8 +521,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
|
||||
}
|
||||
if (options & UTF8PROC_CHARBOUND) {
|
||||
utf8proc_bool boundary;
|
||||
int tbc = property->boundclass;
|
||||
boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
|
||||
boundary = grapheme_break_extended(0, property->boundclass, 0, property->indic_conjunct_break,
|
||||
last_boundclass);
|
||||
if (boundary) {
|
||||
if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
|
||||
if (bufsize >= 2) dst[1] = uc;
|
||||
@ -735,7 +758,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
|
||||
*dstptr = NULL;
|
||||
result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
|
||||
if (result < 0) return result;
|
||||
buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
|
||||
buffer = (utf8proc_int32_t *) malloc(((utf8proc_size_t)result) * sizeof(utf8proc_int32_t) + 1);
|
||||
if (!buffer) return UTF8PROC_ERROR_NOMEM;
|
||||
result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
|
||||
if (result < 0) {
|
||||
|
22
3rdparty/utf8proc/utf8proc.h
vendored
22
3rdparty/utf8proc/utf8proc.h
vendored
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2019 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
||||
* Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
||||
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -71,9 +71,9 @@
|
||||
/** The MAJOR version number (increased when backwards API compatibility is broken). */
|
||||
#define UTF8PROC_VERSION_MAJOR 2
|
||||
/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
|
||||
#define UTF8PROC_VERSION_MINOR 6
|
||||
#define UTF8PROC_VERSION_MINOR 9
|
||||
/** The PATCH version (increased for fixes that do not change the API). */
|
||||
#define UTF8PROC_VERSION_PATCH 1
|
||||
#define UTF8PROC_VERSION_PATCH 0
|
||||
/** @} */
|
||||
|
||||
#include <stdlib.h>
|
||||
@ -273,7 +273,8 @@ typedef struct utf8proc_property_struct {
|
||||
* Boundclass.
|
||||
* @see utf8proc_boundclass_t.
|
||||
*/
|
||||
unsigned boundclass:8;
|
||||
unsigned boundclass:6;
|
||||
unsigned indic_conjunct_break:2;
|
||||
} utf8proc_property_t;
|
||||
|
||||
/** Unicode categories. */
|
||||
@ -388,6 +389,14 @@ typedef enum {
|
||||
UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */
|
||||
} utf8proc_boundclass_t;
|
||||
|
||||
/** Indic_Conjunct_Break property. (TR44) */
|
||||
typedef enum {
|
||||
UTF8PROC_INDIC_CONJUNCT_BREAK_NONE = 0,
|
||||
UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER = 1,
|
||||
UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT = 2,
|
||||
UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND = 3,
|
||||
} utf8proc_indic_conjunct_break_t;
|
||||
|
||||
/**
|
||||
* Function pointer type passed to @ref utf8proc_map_custom and
|
||||
* @ref utf8proc_decompose_custom, which is used to specify a user-defined
|
||||
@ -481,8 +490,9 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
|
||||
* - @ref UTF8PROC_STRIPNA - remove unassigned codepoints
|
||||
* @param last_boundclass
|
||||
* Pointer to an integer variable containing
|
||||
* the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND
|
||||
* option is used. Otherwise, this parameter is ignored.
|
||||
* the previous codepoint's (boundclass + indic_conjunct_break << 1) if the @ref UTF8PROC_CHARBOUND
|
||||
* option is used. If the string is being processed in order, this can be initialized to 0 for
|
||||
* the beginning of the string, and is thereafter updated automatically. Otherwise, this parameter is ignored.
|
||||
*
|
||||
* @return
|
||||
* In case of success, the number of codepoints written is returned; in case
|
||||
|
24975
3rdparty/utf8proc/utf8proc_data.c
vendored
24975
3rdparty/utf8proc/utf8proc_data.c
vendored
File diff suppressed because it is too large
Load Diff
2
COPYING
2
COPYING
@ -197,7 +197,7 @@ tap-windows.h
|
||||
Copyright (C) 2002-2014 OpenVPN Technologies, Inc.
|
||||
|
||||
utf8proc
|
||||
Copyright (c) 2014-2015 Steven G. Johnson, Jiahao Chen, Tony Kelman,
|
||||
Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Tony Kelman,
|
||||
Jonas Fonseca, and other contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
|
@ -1747,11 +1747,6 @@ project "utf8proc"
|
||||
"verbose=-1",
|
||||
}
|
||||
|
||||
configuration { "gmake or ninja" }
|
||||
buildoptions_c {
|
||||
"-Wno-strict-prototypes",
|
||||
}
|
||||
|
||||
configuration { }
|
||||
|
||||
files {
|
||||
|
Loading…
Reference in New Issue
Block a user