mirror of
https://github.com/holub/mame
synced 2025-07-23 02:01:08 +03:00
3rdparty/utf8proc: Updated to 2.9.0.
This commit is contained in:
parent
12590d6ad8
commit
466c450cb3
23
3rdparty/utf8proc/.github/workflows/ci-fuzz.yml
vendored
Normal file
23
3rdparty/utf8proc/.github/workflows/ci-fuzz.yml
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
name: CIFuzz
|
||||||
|
on: [pull_request]
|
||||||
|
jobs:
|
||||||
|
Fuzzing:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Build Fuzzers
|
||||||
|
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
|
||||||
|
with:
|
||||||
|
oss-fuzz-project-name: 'utf8proc'
|
||||||
|
dry-run: false
|
||||||
|
- name: Run Fuzzers
|
||||||
|
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
|
||||||
|
with:
|
||||||
|
oss-fuzz-project-name: 'utf8proc'
|
||||||
|
fuzz-seconds: 600
|
||||||
|
dry-run: false
|
||||||
|
- name: Upload Crash
|
||||||
|
uses: actions/upload-artifact@v1
|
||||||
|
if: failure()
|
||||||
|
with:
|
||||||
|
name: artifacts
|
||||||
|
path: ./out/artifacts
|
64
3rdparty/utf8proc/.github/workflows/cmake.yml
vendored
Normal file
64
3rdparty/utf8proc/.github/workflows/cmake.yml
vendored
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
name: CMake
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
- 'release-*'
|
||||||
|
pull_request:
|
||||||
|
# run on all pr
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest, windows-latest, macOS-latest]
|
||||||
|
shared: ["ON", "OFF"]
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
name: ${{ matrix.os }} - shared=${{ matrix.shared }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
mkdir build
|
||||||
|
cmake -S . -B build -DBUILD_SHARED_LIBS=${{ matrix.shared }} -DUTF8PROC_ENABLE_TESTING=ON
|
||||||
|
cmake --build build
|
||||||
|
- name: Run Test
|
||||||
|
run: ctest --test-dir build -V
|
||||||
|
- name: Upload shared lib
|
||||||
|
if: matrix.shared == 'ON'
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: ${{ matrix.os }}
|
||||||
|
path: |
|
||||||
|
build/libutf8proc.*
|
||||||
|
build/Debug/utf8proc.*
|
||||||
|
|
||||||
|
mingw:
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [windows-latest]
|
||||||
|
shared: ["ON", "OFF"]
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
name: mingw64 - shared=${{ matrix.shared }}
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: msys2 {0}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- uses: msys2/setup-msys2@v2
|
||||||
|
with:
|
||||||
|
install: gcc make mingw-w64-x86_64-cmake
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
mkdir build
|
||||||
|
cmake -S . -B build -DBUILD_SHARED_LIBS=${{ matrix.shared }} -DUTF8PROC_ENABLE_TESTING=ON -G'MSYS Makefiles'
|
||||||
|
cmake --build build
|
||||||
|
- name: Run Test
|
||||||
|
run: ctest --test-dir build -V
|
||||||
|
- name: Upload shared lib
|
||||||
|
if: matrix.shared == 'ON'
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: windows-mingw64
|
||||||
|
path: build/libutf8proc.*
|
41
3rdparty/utf8proc/.github/workflows/make.yml
vendored
Normal file
41
3rdparty/utf8proc/.github/workflows/make.yml
vendored
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
name: Make
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
- 'release-*'
|
||||||
|
pull_request:
|
||||||
|
# run on all pr
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest, macOS-latest]
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
name: ${{ matrix.os }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
# TODO: update makefile to check MANIFEST
|
||||||
|
# - name: Install dependencies (MacOS)
|
||||||
|
# if: matrix.config.os == 'macos-latest'
|
||||||
|
# run: brew install ruby findutils
|
||||||
|
|
||||||
|
- name: Check MANIFEST
|
||||||
|
if: matrix.config.os == 'ubuntu-latest'
|
||||||
|
run: make manifest && diff MANIFEST.new MANIFEST
|
||||||
|
- name: Run Test
|
||||||
|
run: make check
|
||||||
|
- name: Check utf8proc_data.c
|
||||||
|
run: make data && diff data/utf8proc_data.c.new utf8proc_data.c
|
||||||
|
- name: Clean
|
||||||
|
run: make clean && git status --ignored --porcelain && test -z "$(git status --ignored --porcelain)"
|
||||||
|
|
||||||
|
- name: Make lib
|
||||||
|
run: make
|
||||||
|
- name: Upload shared lib
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: make-${{ matrix.os }}
|
||||||
|
path: libutf8proc.*
|
38
3rdparty/utf8proc/.gitignore
vendored
Normal file
38
3rdparty/utf8proc/.gitignore
vendored
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
*.tar.gz
|
||||||
|
*.exe
|
||||||
|
*.dll
|
||||||
|
*.do
|
||||||
|
*.o
|
||||||
|
*.so*
|
||||||
|
*.a
|
||||||
|
*.dll
|
||||||
|
*.dylib
|
||||||
|
*.dSYM
|
||||||
|
*.out
|
||||||
|
*.new
|
||||||
|
.vscode
|
||||||
|
/data/*.txt
|
||||||
|
/data/*.ttf
|
||||||
|
/data/*.sfd
|
||||||
|
/docs/
|
||||||
|
/bench/bench
|
||||||
|
/bench/icu
|
||||||
|
/bench/unistring
|
||||||
|
/test/normtest
|
||||||
|
/test/graphemetest
|
||||||
|
/test/printproperty
|
||||||
|
/test/charwidth
|
||||||
|
/test/misc
|
||||||
|
/test/valid
|
||||||
|
/test/iterate
|
||||||
|
/test/case
|
||||||
|
/test/iscase
|
||||||
|
/test/custom
|
||||||
|
/tmp/
|
||||||
|
/mingw_static/
|
||||||
|
/mingw_shared/
|
||||||
|
/msvc_shared/
|
||||||
|
/msvc_static/
|
||||||
|
/build/
|
||||||
|
NEWS-update.jl
|
||||||
|
libutf8proc.pc
|
22
3rdparty/utf8proc/.travis.yml
vendored
22
3rdparty/utf8proc/.travis.yml
vendored
@ -1,22 +0,0 @@
|
|||||||
language: c
|
|
||||||
compiler:
|
|
||||||
- gcc
|
|
||||||
- clang
|
|
||||||
notifications:
|
|
||||||
email: false
|
|
||||||
before_install:
|
|
||||||
- sudo add-apt-repository ppa:staticfloat/julia-deps -y
|
|
||||||
- sudo add-apt-repository ppa:staticfloat/juliareleases -y
|
|
||||||
- sudo apt-get update -qq -y
|
|
||||||
- sudo apt-get install libpcre3-dev julia fontforge -y
|
|
||||||
script:
|
|
||||||
- make manifest && diff MANIFEST.new MANIFEST
|
|
||||||
- make check
|
|
||||||
- make data && diff data/utf8proc_data.c.new utf8proc_data.c
|
|
||||||
- make clean && git status --ignored --porcelain && test -z "$(git status --ignored --porcelain)"
|
|
||||||
- (mkdir build_static && cd build_static && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make)
|
|
||||||
- (mkdir build_shared && cd build_shared && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_SHARED_LIBS=ON && make)
|
|
||||||
env:
|
|
||||||
# use JuliaLang caching (https://github.com/staticfloat/cache.julialang.org)
|
|
||||||
# so that Travis builds do not depend on anyone's flaky servers but our own
|
|
||||||
- URLCACHE=https://cache.julialang.org/ CFLAGS="-O2 -Werror -Wmissing-prototypes"
|
|
46
3rdparty/utf8proc/CMakeLists.txt
vendored
46
3rdparty/utf8proc/CMakeLists.txt
vendored
@ -1,20 +1,24 @@
|
|||||||
cmake_minimum_required (VERSION 2.8.12)
|
cmake_minimum_required (VERSION 3.0.0)
|
||||||
|
|
||||||
include (utils.cmake)
|
include (utils.cmake)
|
||||||
|
|
||||||
disallow_intree_builds()
|
disallow_intree_builds()
|
||||||
|
|
||||||
project (utf8proc C)
|
if (POLICY CMP0048)
|
||||||
|
cmake_policy (SET CMP0048 NEW)
|
||||||
|
endif ()
|
||||||
|
project (utf8proc VERSION 2.9.0 LANGUAGES C)
|
||||||
|
|
||||||
# This is the ABI version number, which may differ from the
|
# This is the ABI version number, which may differ from the
|
||||||
# API version number (defined in utf8proc.h).
|
# API version number (defined in utf8proc.h and above).
|
||||||
# Be sure to also update these in Makefile and MANIFEST!
|
# Be sure to also update these in Makefile and MANIFEST!
|
||||||
set(SO_MAJOR 2)
|
set(SO_MAJOR 3)
|
||||||
set(SO_MINOR 4)
|
set(SO_MINOR 0)
|
||||||
set(SO_PATCH 1)
|
set(SO_PATCH 0)
|
||||||
|
|
||||||
option(UTF8PROC_INSTALL "Enable installation of utf8proc" On)
|
option(UTF8PROC_INSTALL "Enable installation of utf8proc" On)
|
||||||
option(UTF8PROC_ENABLE_TESTING "Enable testing of utf8proc" Off)
|
option(UTF8PROC_ENABLE_TESTING "Enable testing of utf8proc" Off)
|
||||||
|
option(LIB_FUZZING_ENGINE "Fuzzing engine to link against" Off)
|
||||||
|
|
||||||
add_library (utf8proc
|
add_library (utf8proc
|
||||||
utf8proc.c
|
utf8proc.c
|
||||||
@ -50,23 +54,23 @@ set_target_properties (utf8proc PROPERTIES
|
|||||||
)
|
)
|
||||||
|
|
||||||
if (UTF8PROC_INSTALL)
|
if (UTF8PROC_INSTALL)
|
||||||
|
include(GNUInstallDirs)
|
||||||
|
install(FILES utf8proc.h DESTINATION "${CMAKE_INSTALL_FULL_INCLUDEDIR}")
|
||||||
install(TARGETS utf8proc
|
install(TARGETS utf8proc
|
||||||
RUNTIME DESTINATION bin
|
ARCHIVE DESTINATION "${CMAKE_INSTALL_FULL_LIBDIR}"
|
||||||
LIBRARY DESTINATION lib
|
LIBRARY DESTINATION "${CMAKE_INSTALL_FULL_LIBDIR}"
|
||||||
ARCHIVE DESTINATION lib)
|
RUNTIME DESTINATION "${CMAKE_INSTALL_FULL_BINDIR}"
|
||||||
|
)
|
||||||
install(
|
configure_file(libutf8proc.pc.cmakein libutf8proc.pc @ONLY)
|
||||||
FILES
|
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libutf8proc.pc" DESTINATION "${CMAKE_INSTALL_FULL_LIBDIR}/pkgconfig")
|
||||||
"${PROJECT_SOURCE_DIR}/utf8proc.h"
|
|
||||||
DESTINATION include)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(UTF8PROC_ENABLE_TESTING)
|
if(UTF8PROC_ENABLE_TESTING)
|
||||||
enable_testing()
|
enable_testing()
|
||||||
file(MAKE_DIRECTORY data)
|
file(MAKE_DIRECTORY data)
|
||||||
set(UNICODE_VERSION 13.0.0)
|
set(UNICODE_VERSION 15.1.0)
|
||||||
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt data/NormalizationTest.txt SHOW_PROGRESS)
|
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt ${CMAKE_BINARY_DIR}/data/NormalizationTest.txt SHOW_PROGRESS)
|
||||||
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTest.txt SHOW_PROGRESS)
|
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt ${CMAKE_BINARY_DIR}/data/GraphemeBreakTest.txt SHOW_PROGRESS)
|
||||||
add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c)
|
add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c)
|
||||||
target_link_libraries(case utf8proc)
|
target_link_libraries(case utf8proc)
|
||||||
add_executable(custom test/tests.h test/tests.c utf8proc.h test/custom.c)
|
add_executable(custom test/tests.h test/tests.c utf8proc.h test/custom.c)
|
||||||
@ -98,4 +102,12 @@ if(UTF8PROC_ENABLE_TESTING)
|
|||||||
target_link_libraries(normtest utf8proc)
|
target_link_libraries(normtest utf8proc)
|
||||||
add_test(utf8proc.testgraphemetest graphemetest data/GraphemeBreakTest.txt)
|
add_test(utf8proc.testgraphemetest graphemetest data/GraphemeBreakTest.txt)
|
||||||
add_test(utf8proc.testnormtest normtest data/NormalizationTest.txt)
|
add_test(utf8proc.testnormtest normtest data/NormalizationTest.txt)
|
||||||
|
|
||||||
|
if(LIB_FUZZING_ENGINE)
|
||||||
|
add_executable(fuzzer utf8proc.h test/fuzzer.c)
|
||||||
|
target_link_libraries(fuzzer ${LIB_FUZZING_ENGINE} utf8proc)
|
||||||
|
else()
|
||||||
|
add_executable(fuzzer utf8proc.h test/fuzz_main.c test/fuzzer.c)
|
||||||
|
target_link_libraries(fuzzer utf8proc)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
169
3rdparty/utf8proc/Doxyfile
vendored
169
3rdparty/utf8proc/Doxyfile
vendored
@ -1,4 +1,4 @@
|
|||||||
# Doxyfile 1.8.18
|
# Doxyfile 1.9.1
|
||||||
|
|
||||||
# This file describes the settings to be used by the documentation system
|
# This file describes the settings to be used by the documentation system
|
||||||
# doxygen (www.doxygen.org) for a project.
|
# doxygen (www.doxygen.org) for a project.
|
||||||
@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
|
|||||||
# title of most generated pages and in a few other places.
|
# title of most generated pages and in a few other places.
|
||||||
# The default value is: My Project.
|
# The default value is: My Project.
|
||||||
|
|
||||||
PROJECT_NAME = "utf8proc"
|
PROJECT_NAME = utf8proc
|
||||||
|
|
||||||
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
|
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
|
||||||
# could be handy for archiving the generated documentation or if some version
|
# could be handy for archiving the generated documentation or if some version
|
||||||
@ -217,6 +217,14 @@ QT_AUTOBRIEF = NO
|
|||||||
|
|
||||||
MULTILINE_CPP_IS_BRIEF = NO
|
MULTILINE_CPP_IS_BRIEF = NO
|
||||||
|
|
||||||
|
# By default Python docstrings are displayed as preformatted text and doxygen's
|
||||||
|
# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
|
||||||
|
# doxygen's special commands can be used and the contents of the docstring
|
||||||
|
# documentation blocks is shown as doxygen documentation.
|
||||||
|
# The default value is: YES.
|
||||||
|
|
||||||
|
PYTHON_DOCSTRING = YES
|
||||||
|
|
||||||
# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
|
# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
|
||||||
# documentation from any documented member that it re-implements.
|
# documentation from any documented member that it re-implements.
|
||||||
# The default value is: YES.
|
# The default value is: YES.
|
||||||
@ -305,7 +313,10 @@ OPTIMIZE_OUTPUT_SLICE = NO
|
|||||||
# Note: For files without extension you can use no_extension as a placeholder.
|
# Note: For files without extension you can use no_extension as a placeholder.
|
||||||
#
|
#
|
||||||
# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
|
# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
|
||||||
# the files are not read by doxygen.
|
# the files are not read by doxygen. When specifying no_extension you should add
|
||||||
|
# * to the FILE_PATTERNS.
|
||||||
|
#
|
||||||
|
# Note see also the list of default file extension mappings.
|
||||||
|
|
||||||
EXTENSION_MAPPING =
|
EXTENSION_MAPPING =
|
||||||
|
|
||||||
@ -439,6 +450,19 @@ TYPEDEF_HIDES_STRUCT = NO
|
|||||||
|
|
||||||
LOOKUP_CACHE_SIZE = 0
|
LOOKUP_CACHE_SIZE = 0
|
||||||
|
|
||||||
|
# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
|
||||||
|
# during processing. When set to 0 doxygen will based this on the number of
|
||||||
|
# cores available in the system. You can set it explicitly to a value larger
|
||||||
|
# than 0 to get more control over the balance between CPU load and processing
|
||||||
|
# speed. At this moment only the input processing can be done using multiple
|
||||||
|
# threads. Since this is still an experimental feature the default is set to 1,
|
||||||
|
# which efficively disables parallel processing. Please report any issues you
|
||||||
|
# encounter. Generating dot graphs in parallel is controlled by the
|
||||||
|
# DOT_NUM_THREADS setting.
|
||||||
|
# Minimum value: 0, maximum value: 32, default value: 1.
|
||||||
|
|
||||||
|
NUM_PROC_THREADS = 1
|
||||||
|
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
# Build related configuration options
|
# Build related configuration options
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
@ -502,6 +526,13 @@ EXTRACT_LOCAL_METHODS = NO
|
|||||||
|
|
||||||
EXTRACT_ANON_NSPACES = NO
|
EXTRACT_ANON_NSPACES = NO
|
||||||
|
|
||||||
|
# If this flag is set to YES, the name of an unnamed parameter in a declaration
|
||||||
|
# will be determined by the corresponding definition. By default unnamed
|
||||||
|
# parameters remain unnamed in the output.
|
||||||
|
# The default value is: YES.
|
||||||
|
|
||||||
|
RESOLVE_UNNAMED_PARAMS = YES
|
||||||
|
|
||||||
# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
|
# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
|
||||||
# undocumented members inside documented classes or files. If set to NO these
|
# undocumented members inside documented classes or files. If set to NO these
|
||||||
# members will be included in the various overviews, but no documentation
|
# members will be included in the various overviews, but no documentation
|
||||||
@ -539,11 +570,18 @@ HIDE_IN_BODY_DOCS = NO
|
|||||||
|
|
||||||
INTERNAL_DOCS = NO
|
INTERNAL_DOCS = NO
|
||||||
|
|
||||||
# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
|
# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
|
||||||
# names in lower-case letters. If set to YES, upper-case letters are also
|
# able to match the capabilities of the underlying filesystem. In case the
|
||||||
# allowed. This is useful if you have classes or files whose names only differ
|
# filesystem is case sensitive (i.e. it supports files in the same directory
|
||||||
# in case and if your file system supports case sensitive file names. Windows
|
# whose names only differ in casing), the option must be set to YES to properly
|
||||||
# (including Cygwin) ands Mac users are advised to set this option to NO.
|
# deal with such files in case they appear in the input. For filesystems that
|
||||||
|
# are not case sensitive the option should be be set to NO to properly deal with
|
||||||
|
# output files written for symbols that only differ in casing, such as for two
|
||||||
|
# classes, one named CLASS and the other named Class, and to also support
|
||||||
|
# references to files without having to specify the exact matching casing. On
|
||||||
|
# Windows (including Cygwin) and MacOS, users should typically set this option
|
||||||
|
# to NO, whereas on Linux or other Unix flavors it should typically be set to
|
||||||
|
# YES.
|
||||||
# The default value is: system dependent.
|
# The default value is: system dependent.
|
||||||
|
|
||||||
CASE_SENSE_NAMES = NO
|
CASE_SENSE_NAMES = NO
|
||||||
@ -782,7 +820,10 @@ WARN_IF_DOC_ERROR = YES
|
|||||||
WARN_NO_PARAMDOC = NO
|
WARN_NO_PARAMDOC = NO
|
||||||
|
|
||||||
# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
|
# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
|
||||||
# a warning is encountered.
|
# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
|
||||||
|
# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
|
||||||
|
# at the end of the doxygen process doxygen will return with a non-zero status.
|
||||||
|
# Possible values are: NO, YES and FAIL_ON_WARNINGS.
|
||||||
# The default value is: NO.
|
# The default value is: NO.
|
||||||
|
|
||||||
WARN_AS_ERROR = NO
|
WARN_AS_ERROR = NO
|
||||||
@ -818,8 +859,8 @@ INPUT =
|
|||||||
# This tag can be used to specify the character encoding of the source files
|
# This tag can be used to specify the character encoding of the source files
|
||||||
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
|
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
|
||||||
# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
|
# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
|
||||||
# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
|
# documentation (see:
|
||||||
# possible encodings.
|
# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
|
||||||
# The default value is: UTF-8.
|
# The default value is: UTF-8.
|
||||||
|
|
||||||
INPUT_ENCODING = UTF-8
|
INPUT_ENCODING = UTF-8
|
||||||
@ -832,13 +873,15 @@ INPUT_ENCODING = UTF-8
|
|||||||
# need to set EXTENSION_MAPPING for the extension otherwise the files are not
|
# need to set EXTENSION_MAPPING for the extension otherwise the files are not
|
||||||
# read by doxygen.
|
# read by doxygen.
|
||||||
#
|
#
|
||||||
|
# Note the list of default checked file patterns might differ from the list of
|
||||||
|
# default file extension mappings.
|
||||||
|
#
|
||||||
# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
|
# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
|
||||||
# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
|
# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
|
||||||
# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
|
# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
|
||||||
# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
|
# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
|
||||||
# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
|
# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
|
||||||
# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd,
|
# *.ucf, *.qsf and *.ice.
|
||||||
# *.vhdl, *.ucf, *.qsf and *.ice.
|
|
||||||
|
|
||||||
FILE_PATTERNS =
|
FILE_PATTERNS =
|
||||||
|
|
||||||
@ -1065,13 +1108,6 @@ VERBATIM_HEADERS = YES
|
|||||||
|
|
||||||
ALPHABETICAL_INDEX = YES
|
ALPHABETICAL_INDEX = YES
|
||||||
|
|
||||||
# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
|
|
||||||
# which the alphabetical index list will be split.
|
|
||||||
# Minimum value: 1, maximum value: 20, default value: 5.
|
|
||||||
# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
|
|
||||||
|
|
||||||
COLS_IN_ALPHA_INDEX = 5
|
|
||||||
|
|
||||||
# In case all classes in a project start with a common prefix, all classes will
|
# In case all classes in a project start with a common prefix, all classes will
|
||||||
# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
|
# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
|
||||||
# can be used to specify a prefix (or a list of prefixes) that should be ignored
|
# can be used to specify a prefix (or a list of prefixes) that should be ignored
|
||||||
@ -1242,10 +1278,11 @@ HTML_INDEX_NUM_ENTRIES = 100
|
|||||||
|
|
||||||
# If the GENERATE_DOCSET tag is set to YES, additional index files will be
|
# If the GENERATE_DOCSET tag is set to YES, additional index files will be
|
||||||
# generated that can be used as input for Apple's Xcode 3 integrated development
|
# generated that can be used as input for Apple's Xcode 3 integrated development
|
||||||
# environment (see: https://developer.apple.com/xcode/), introduced with OSX
|
# environment (see:
|
||||||
# 10.5 (Leopard). To create a documentation set, doxygen will generate a
|
# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
|
||||||
# Makefile in the HTML output directory. Running make will produce the docset in
|
# create a documentation set, doxygen will generate a Makefile in the HTML
|
||||||
# that directory and running make install will install the docset in
|
# output directory. Running make will produce the docset in that directory and
|
||||||
|
# running make install will install the docset in
|
||||||
# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
|
# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
|
||||||
# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
|
# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
|
||||||
# genXcode/_index.html for more information.
|
# genXcode/_index.html for more information.
|
||||||
@ -1287,8 +1324,8 @@ DOCSET_PUBLISHER_NAME = Publisher
|
|||||||
# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
|
# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
|
||||||
# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
|
# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
|
||||||
# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
|
# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
|
||||||
# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
|
# (see:
|
||||||
# Windows.
|
# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
|
||||||
#
|
#
|
||||||
# The HTML Help Workshop contains a compiler that can convert all HTML output
|
# The HTML Help Workshop contains a compiler that can convert all HTML output
|
||||||
# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
|
# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
|
||||||
@ -1318,7 +1355,7 @@ CHM_FILE =
|
|||||||
HHC_LOCATION =
|
HHC_LOCATION =
|
||||||
|
|
||||||
# The GENERATE_CHI flag controls if a separate .chi index file is generated
|
# The GENERATE_CHI flag controls if a separate .chi index file is generated
|
||||||
# (YES) or that it should be included in the master .chm file (NO).
|
# (YES) or that it should be included in the main .chm file (NO).
|
||||||
# The default value is: NO.
|
# The default value is: NO.
|
||||||
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
|
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
|
||||||
|
|
||||||
@ -1363,7 +1400,8 @@ QCH_FILE =
|
|||||||
|
|
||||||
# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
|
# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
|
||||||
# Project output. For more information please see Qt Help Project / Namespace
|
# Project output. For more information please see Qt Help Project / Namespace
|
||||||
# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
|
# (see:
|
||||||
|
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
|
||||||
# The default value is: org.doxygen.Project.
|
# The default value is: org.doxygen.Project.
|
||||||
# This tag requires that the tag GENERATE_QHP is set to YES.
|
# This tag requires that the tag GENERATE_QHP is set to YES.
|
||||||
|
|
||||||
@ -1371,8 +1409,8 @@ QHP_NAMESPACE = org.doxygen.Project
|
|||||||
|
|
||||||
# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
|
# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
|
||||||
# Help Project output. For more information please see Qt Help Project / Virtual
|
# Help Project output. For more information please see Qt Help Project / Virtual
|
||||||
# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
|
# Folders (see:
|
||||||
# folders).
|
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
|
||||||
# The default value is: doc.
|
# The default value is: doc.
|
||||||
# This tag requires that the tag GENERATE_QHP is set to YES.
|
# This tag requires that the tag GENERATE_QHP is set to YES.
|
||||||
|
|
||||||
@ -1380,16 +1418,16 @@ QHP_VIRTUAL_FOLDER = doc
|
|||||||
|
|
||||||
# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
|
# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
|
||||||
# filter to add. For more information please see Qt Help Project / Custom
|
# filter to add. For more information please see Qt Help Project / Custom
|
||||||
# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
|
# Filters (see:
|
||||||
# filters).
|
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
|
||||||
# This tag requires that the tag GENERATE_QHP is set to YES.
|
# This tag requires that the tag GENERATE_QHP is set to YES.
|
||||||
|
|
||||||
QHP_CUST_FILTER_NAME =
|
QHP_CUST_FILTER_NAME =
|
||||||
|
|
||||||
# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
|
# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
|
||||||
# custom filter to add. For more information please see Qt Help Project / Custom
|
# custom filter to add. For more information please see Qt Help Project / Custom
|
||||||
# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
|
# Filters (see:
|
||||||
# filters).
|
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
|
||||||
# This tag requires that the tag GENERATE_QHP is set to YES.
|
# This tag requires that the tag GENERATE_QHP is set to YES.
|
||||||
|
|
||||||
QHP_CUST_FILTER_ATTRS =
|
QHP_CUST_FILTER_ATTRS =
|
||||||
@ -1401,9 +1439,9 @@ QHP_CUST_FILTER_ATTRS =
|
|||||||
|
|
||||||
QHP_SECT_FILTER_ATTRS =
|
QHP_SECT_FILTER_ATTRS =
|
||||||
|
|
||||||
# The QHG_LOCATION tag can be used to specify the location of Qt's
|
# The QHG_LOCATION tag can be used to specify the location (absolute path
|
||||||
# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
|
# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
|
||||||
# generated .qhp file.
|
# run qhelpgenerator on the generated .qhp file.
|
||||||
# This tag requires that the tag GENERATE_QHP is set to YES.
|
# This tag requires that the tag GENERATE_QHP is set to YES.
|
||||||
|
|
||||||
QHG_LOCATION =
|
QHG_LOCATION =
|
||||||
@ -1484,8 +1522,8 @@ EXT_LINKS_IN_WINDOW = NO
|
|||||||
# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
|
# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
|
||||||
# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
|
# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
|
||||||
# the HTML output. These images will generally look nicer at scaled resolutions.
|
# the HTML output. These images will generally look nicer at scaled resolutions.
|
||||||
# Possible values are: png The default and svg Looks nicer but requires the
|
# Possible values are: png (the default) and svg (looks nicer but requires the
|
||||||
# pdf2svg tool.
|
# pdf2svg or inkscape tool).
|
||||||
# The default value is: png.
|
# The default value is: png.
|
||||||
# This tag requires that the tag GENERATE_HTML is set to YES.
|
# This tag requires that the tag GENERATE_HTML is set to YES.
|
||||||
|
|
||||||
@ -1530,7 +1568,7 @@ USE_MATHJAX = NO
|
|||||||
|
|
||||||
# When MathJax is enabled you can set the default output format to be used for
|
# When MathJax is enabled you can set the default output format to be used for
|
||||||
# the MathJax output. See the MathJax site (see:
|
# the MathJax output. See the MathJax site (see:
|
||||||
# http://docs.mathjax.org/en/latest/output.html) for more details.
|
# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
|
||||||
# Possible values are: HTML-CSS (which is slower, but has the best
|
# Possible values are: HTML-CSS (which is slower, but has the best
|
||||||
# compatibility), NativeMML (i.e. MathML) and SVG.
|
# compatibility), NativeMML (i.e. MathML) and SVG.
|
||||||
# The default value is: HTML-CSS.
|
# The default value is: HTML-CSS.
|
||||||
@ -1560,7 +1598,8 @@ MATHJAX_EXTENSIONS =
|
|||||||
|
|
||||||
# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
|
# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
|
||||||
# of code that will be used on startup of the MathJax code. See the MathJax site
|
# of code that will be used on startup of the MathJax code. See the MathJax site
|
||||||
# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
|
# (see:
|
||||||
|
# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
|
||||||
# example see the documentation.
|
# example see the documentation.
|
||||||
# This tag requires that the tag USE_MATHJAX is set to YES.
|
# This tag requires that the tag USE_MATHJAX is set to YES.
|
||||||
|
|
||||||
@ -1607,7 +1646,8 @@ SERVER_BASED_SEARCH = NO
|
|||||||
#
|
#
|
||||||
# Doxygen ships with an example indexer (doxyindexer) and search engine
|
# Doxygen ships with an example indexer (doxyindexer) and search engine
|
||||||
# (doxysearch.cgi) which are based on the open source search engine library
|
# (doxysearch.cgi) which are based on the open source search engine library
|
||||||
# Xapian (see: https://xapian.org/).
|
# Xapian (see:
|
||||||
|
# https://xapian.org/).
|
||||||
#
|
#
|
||||||
# See the section "External Indexing and Searching" for details.
|
# See the section "External Indexing and Searching" for details.
|
||||||
# The default value is: NO.
|
# The default value is: NO.
|
||||||
@ -1620,8 +1660,9 @@ EXTERNAL_SEARCH = NO
|
|||||||
#
|
#
|
||||||
# Doxygen ships with an example indexer (doxyindexer) and search engine
|
# Doxygen ships with an example indexer (doxyindexer) and search engine
|
||||||
# (doxysearch.cgi) which are based on the open source search engine library
|
# (doxysearch.cgi) which are based on the open source search engine library
|
||||||
# Xapian (see: https://xapian.org/). See the section "External Indexing and
|
# Xapian (see:
|
||||||
# Searching" for details.
|
# https://xapian.org/). See the section "External Indexing and Searching" for
|
||||||
|
# details.
|
||||||
# This tag requires that the tag SEARCHENGINE is set to YES.
|
# This tag requires that the tag SEARCHENGINE is set to YES.
|
||||||
|
|
||||||
SEARCHENGINE_URL =
|
SEARCHENGINE_URL =
|
||||||
@ -1785,9 +1826,11 @@ LATEX_EXTRA_FILES =
|
|||||||
|
|
||||||
PDF_HYPERLINKS = YES
|
PDF_HYPERLINKS = YES
|
||||||
|
|
||||||
# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
|
# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
|
||||||
# the PDF file directly from the LaTeX files. Set this option to YES, to get a
|
# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
|
||||||
# higher quality PDF documentation.
|
# files. Set this option to YES, to get a higher quality PDF documentation.
|
||||||
|
#
|
||||||
|
# See also section LATEX_CMD_NAME for selecting the engine.
|
||||||
# The default value is: YES.
|
# The default value is: YES.
|
||||||
# This tag requires that the tag GENERATE_LATEX is set to YES.
|
# This tag requires that the tag GENERATE_LATEX is set to YES.
|
||||||
|
|
||||||
@ -2298,10 +2341,32 @@ UML_LOOK = NO
|
|||||||
# but if the number exceeds 15, the total amount of fields shown is limited to
|
# but if the number exceeds 15, the total amount of fields shown is limited to
|
||||||
# 10.
|
# 10.
|
||||||
# Minimum value: 0, maximum value: 100, default value: 10.
|
# Minimum value: 0, maximum value: 100, default value: 10.
|
||||||
# This tag requires that the tag HAVE_DOT is set to YES.
|
# This tag requires that the tag UML_LOOK is set to YES.
|
||||||
|
|
||||||
UML_LIMIT_NUM_FIELDS = 10
|
UML_LIMIT_NUM_FIELDS = 10
|
||||||
|
|
||||||
|
# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
|
||||||
|
# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
|
||||||
|
# tag is set to YES, doxygen will add type and arguments for attributes and
|
||||||
|
# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
|
||||||
|
# will not generate fields with class member information in the UML graphs. The
|
||||||
|
# class diagrams will look similar to the default class diagrams but using UML
|
||||||
|
# notation for the relationships.
|
||||||
|
# Possible values are: NO, YES and NONE.
|
||||||
|
# The default value is: NO.
|
||||||
|
# This tag requires that the tag UML_LOOK is set to YES.
|
||||||
|
|
||||||
|
DOT_UML_DETAILS = NO
|
||||||
|
|
||||||
|
# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
|
||||||
|
# to display on a single line. If the actual line length exceeds this threshold
|
||||||
|
# significantly it will wrapped across multiple lines. Some heuristics are apply
|
||||||
|
# to avoid ugly line breaks.
|
||||||
|
# Minimum value: 0, maximum value: 1000, default value: 17.
|
||||||
|
# This tag requires that the tag HAVE_DOT is set to YES.
|
||||||
|
|
||||||
|
DOT_WRAP_THRESHOLD = 17
|
||||||
|
|
||||||
# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
|
# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
|
||||||
# collaboration graphs will show the relations between templates and their
|
# collaboration graphs will show the relations between templates and their
|
||||||
# instances.
|
# instances.
|
||||||
@ -2491,9 +2556,11 @@ DOT_MULTI_TARGETS = NO
|
|||||||
|
|
||||||
GENERATE_LEGEND = YES
|
GENERATE_LEGEND = YES
|
||||||
|
|
||||||
# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
|
# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
|
||||||
# files that are used to generate the various graphs.
|
# files that are used to generate the various graphs.
|
||||||
|
#
|
||||||
|
# Note: This setting is not only used for dot files but also for msc and
|
||||||
|
# plantuml temporary files.
|
||||||
# The default value is: YES.
|
# The default value is: YES.
|
||||||
# This tag requires that the tag HAVE_DOT is set to YES.
|
|
||||||
|
|
||||||
DOT_CLEANUP = YES
|
DOT_CLEANUP = YES
|
||||||
|
2
3rdparty/utf8proc/LICENSE.md
vendored
2
3rdparty/utf8proc/LICENSE.md
vendored
@ -7,7 +7,7 @@ whose copyright and license statements are reproduced below, all new
|
|||||||
work on the utf8proc library is licensed under the [MIT "expat"
|
work on the utf8proc library is licensed under the [MIT "expat"
|
||||||
license](http://opensource.org/licenses/MIT):
|
license](http://opensource.org/licenses/MIT):
|
||||||
|
|
||||||
*Copyright © 2014-2019 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
|
*Copyright © 2014-2021 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a
|
Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
copy of this software and associated documentation files (the "Software"),
|
copy of this software and associated documentation files (the "Software"),
|
||||||
|
6
3rdparty/utf8proc/MANIFEST
vendored
6
3rdparty/utf8proc/MANIFEST
vendored
@ -2,8 +2,8 @@ include/
|
|||||||
include/utf8proc.h
|
include/utf8proc.h
|
||||||
lib/
|
lib/
|
||||||
lib/libutf8proc.a
|
lib/libutf8proc.a
|
||||||
lib/libutf8proc.so -> libutf8proc.so.2.4.1
|
lib/libutf8proc.so -> libutf8proc.so.3.0.0
|
||||||
lib/libutf8proc.so.2 -> libutf8proc.so.2.4.1
|
lib/libutf8proc.so.2 -> libutf8proc.so.3.0.0
|
||||||
lib/libutf8proc.so.2.4.1
|
lib/libutf8proc.so.3.0.0
|
||||||
lib/pkgconfig/
|
lib/pkgconfig/
|
||||||
lib/pkgconfig/libutf8proc.pc
|
lib/pkgconfig/libutf8proc.pc
|
||||||
|
27
3rdparty/utf8proc/Makefile
vendored
27
3rdparty/utf8proc/Makefile
vendored
@ -11,7 +11,7 @@ PERL=perl
|
|||||||
CFLAGS ?= -O2
|
CFLAGS ?= -O2
|
||||||
PICFLAG = -fPIC
|
PICFLAG = -fPIC
|
||||||
C99FLAG = -std=c99
|
C99FLAG = -std=c99
|
||||||
WCFLAGS = -Wall -Wextra -pedantic
|
WCFLAGS = -Wsign-conversion -Wall -Wextra -pedantic
|
||||||
UCFLAGS = $(CPPFLAGS) $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS $(UTF8PROC_DEFINES)
|
UCFLAGS = $(CPPFLAGS) $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS $(UTF8PROC_DEFINES)
|
||||||
LDFLAG_SHARED = -shared
|
LDFLAG_SHARED = -shared
|
||||||
SOFLAG = -Wl,-soname
|
SOFLAG = -Wl,-soname
|
||||||
@ -22,9 +22,12 @@ SOFLAG = -Wl,-soname
|
|||||||
# compatibility is broken, even if the API is backward-compatible.
|
# compatibility is broken, even if the API is backward-compatible.
|
||||||
# The API version number is defined in utf8proc.h.
|
# The API version number is defined in utf8proc.h.
|
||||||
# Be sure to also update these ABI versions in MANIFEST and CMakeLists.txt!
|
# Be sure to also update these ABI versions in MANIFEST and CMakeLists.txt!
|
||||||
MAJOR=2
|
MAJOR=3
|
||||||
MINOR=4
|
MINOR=0
|
||||||
PATCH=1
|
PATCH=0
|
||||||
|
|
||||||
|
# api version (also in utf8proc.h and CMakeLists.txt)
|
||||||
|
VERSION=2.9.0
|
||||||
|
|
||||||
OS := $(shell uname)
|
OS := $(shell uname)
|
||||||
ifeq ($(OS),Darwin) # MacOS X
|
ifeq ($(OS),Darwin) # MacOS X
|
||||||
@ -78,7 +81,7 @@ utf8proc.o: utf8proc.h utf8proc.c utf8proc_data.c
|
|||||||
|
|
||||||
libutf8proc.a: utf8proc.o
|
libutf8proc.a: utf8proc.o
|
||||||
rm -f libutf8proc.a
|
rm -f libutf8proc.a
|
||||||
$(AR) rs libutf8proc.a utf8proc.o
|
$(AR) crs libutf8proc.a utf8proc.o
|
||||||
|
|
||||||
libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH): utf8proc.o
|
libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH): utf8proc.o
|
||||||
$(CC) $(LDFLAGS) $(LDFLAG_SHARED) -o $@ $(SOFLAG) -Wl,libutf8proc.so.$(MAJOR) utf8proc.o
|
$(CC) $(LDFLAGS) $(LDFLAG_SHARED) -o $@ $(SOFLAG) -Wl,libutf8proc.so.$(MAJOR) utf8proc.o
|
||||||
@ -168,6 +171,20 @@ test/custom: test/custom.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
|||||||
test/misc: test/misc.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
test/misc: test/misc.c test/tests.o utf8proc.o utf8proc.h test/tests.h
|
||||||
$(CC) $(UCFLAGS) $(LDFLAGS) -DUNICODE_VERSION='"'`$(PERL) -ne "/^UNICODE_VERSION=/ and print $$';" data/Makefile`'"' test/misc.c test/tests.o utf8proc.o -o $@
|
$(CC) $(UCFLAGS) $(LDFLAGS) -DUNICODE_VERSION='"'`$(PERL) -ne "/^UNICODE_VERSION=/ and print $$';" data/Makefile`'"' test/misc.c test/tests.o utf8proc.o -o $@
|
||||||
|
|
||||||
|
# make release tarball from master branch
|
||||||
|
dist:
|
||||||
|
git archive master --prefix=utf8proc-$(VERSION)/ -o utf8proc-$(VERSION).tar.gz
|
||||||
|
|
||||||
|
# build tarball, make sure it passes checks, and make sure version numbers are consistent
|
||||||
|
distcheck: dist
|
||||||
|
test `grep UTF8PROC_VERSION utf8proc.h | cut -d' ' -f3 | tr '\n' .` = $(VERSION). || exit 1
|
||||||
|
test `grep "utf8proc VERSION" CMakeLists.txt |cut -d' ' -f 4` = $(VERSION) || exit 1
|
||||||
|
test `grep libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) MANIFEST | wc -l` = 3 || exit 1
|
||||||
|
test `grep 'set(SO_' CMakeLists.txt |cut -d' ' -f2 | tr -d ')' | tr '\n' '.'` = $(MAJOR).$(MINOR).$(PATCH). || exit 1
|
||||||
|
tar xzf utf8proc-$(VERSION).tar.gz
|
||||||
|
make -C utf8proc-$(VERSION) check
|
||||||
|
rm -rf utf8proc-$(VERSION)
|
||||||
|
|
||||||
check: test/normtest data/NormalizationTest.txt data/Lowercase.txt data/Uppercase.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/iscase test/custom test/charwidth test/misc test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
|
check: test/normtest data/NormalizationTest.txt data/Lowercase.txt data/Uppercase.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/iscase test/custom test/charwidth test/misc test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
|
||||||
$(MAKE) -C bench
|
$(MAKE) -C bench
|
||||||
test/normtest data/NormalizationTest.txt
|
test/normtest data/NormalizationTest.txt
|
||||||
|
29
3rdparty/utf8proc/NEWS.md
vendored
29
3rdparty/utf8proc/NEWS.md
vendored
@ -1,5 +1,29 @@
|
|||||||
# utf8proc release history #
|
# utf8proc release history #
|
||||||
|
|
||||||
|
## Version 2.9.0 ##
|
||||||
|
|
||||||
|
2023-10-20
|
||||||
|
|
||||||
|
- Unicode 15.1 support ([#253]).
|
||||||
|
|
||||||
|
## Version 2.8.0 ##
|
||||||
|
|
||||||
|
2022-10-30
|
||||||
|
|
||||||
|
- Unicode 15 support ([#247]).
|
||||||
|
|
||||||
|
## Version 2.7.0 ##
|
||||||
|
|
||||||
|
2021-12-16
|
||||||
|
|
||||||
|
- Unicode 14 support ([#233]).
|
||||||
|
|
||||||
|
- Support `GNUInstallDirs` in CMake build ([#159]).
|
||||||
|
|
||||||
|
- `cmake` build now installs `pkg-config` file ([#224]).
|
||||||
|
|
||||||
|
- Various build and portability improvements.
|
||||||
|
|
||||||
## Version 2.6.1 ##
|
## Version 2.6.1 ##
|
||||||
|
|
||||||
2020-12-15
|
2020-12-15
|
||||||
@ -409,8 +433,13 @@ Release of version 1.0.1
|
|||||||
[#152]: https://github.com/JuliaStrings/utf8proc/issues/152
|
[#152]: https://github.com/JuliaStrings/utf8proc/issues/152
|
||||||
[#154]: https://github.com/JuliaStrings/utf8proc/issues/154
|
[#154]: https://github.com/JuliaStrings/utf8proc/issues/154
|
||||||
[#156]: https://github.com/JuliaStrings/utf8proc/issues/156
|
[#156]: https://github.com/JuliaStrings/utf8proc/issues/156
|
||||||
|
[#159]: https://github.com/JuliaStrings/utf8proc/issues/159
|
||||||
[#167]: https://github.com/JuliaStrings/utf8proc/issues/167
|
[#167]: https://github.com/JuliaStrings/utf8proc/issues/167
|
||||||
[#173]: https://github.com/JuliaStrings/utf8proc/issues/173
|
[#173]: https://github.com/JuliaStrings/utf8proc/issues/173
|
||||||
[#179]: https://github.com/JuliaStrings/utf8proc/issues/179
|
[#179]: https://github.com/JuliaStrings/utf8proc/issues/179
|
||||||
[#196]: https://github.com/JuliaStrings/utf8proc/issues/196
|
[#196]: https://github.com/JuliaStrings/utf8proc/issues/196
|
||||||
[#205]: https://github.com/JuliaStrings/utf8proc/issues/205
|
[#205]: https://github.com/JuliaStrings/utf8proc/issues/205
|
||||||
|
[#224]: https://github.com/JuliaStrings/utf8proc/issues/224
|
||||||
|
[#233]: https://github.com/JuliaStrings/utf8proc/issues/233
|
||||||
|
[#247]: https://github.com/JuliaStrings/utf8proc/issues/247
|
||||||
|
[#253]: https://github.com/JuliaStrings/utf8proc/issues/253
|
||||||
|
9
3rdparty/utf8proc/README.md
vendored
9
3rdparty/utf8proc/README.md
vendored
@ -1,5 +1,5 @@
|
|||||||
# utf8proc
|
# utf8proc
|
||||||
[](https://travis-ci.org/JuliaStrings/utf8proc)
|
[](https://github.com/JuliaStrings/utf8proc/actions/workflows/build-ci.yml)
|
||||||
[](https://ci.appveyor.com/project/StevenGJohnson/utf8proc)
|
[](https://ci.appveyor.com/project/StevenGJohnson/utf8proc)
|
||||||
|
|
||||||
[utf8proc](http://juliastrings.github.io/utf8proc/) is a small, clean C
|
[utf8proc](http://juliastrings.github.io/utf8proc/) is a small, clean C
|
||||||
@ -38,9 +38,8 @@ For compilation of the C library, run `make`. You can also install the library
|
|||||||
Alternatively, you can compile with `cmake`, e.g. by
|
Alternatively, you can compile with `cmake`, e.g. by
|
||||||
```sh
|
```sh
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cmake -S . -B build
|
||||||
cmake ..
|
cmake --build build
|
||||||
make
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Using other compilers
|
### Using other compilers
|
||||||
@ -60,7 +59,7 @@ The C library is found in this directory after successful compilation
|
|||||||
and is named `libutf8proc.a` (for the static library) and
|
and is named `libutf8proc.a` (for the static library) and
|
||||||
`libutf8proc.so` (for the dynamic library).
|
`libutf8proc.so` (for the dynamic library).
|
||||||
|
|
||||||
The Unicode version supported is 13.0.0.
|
The Unicode version supported is 15.1.0.
|
||||||
|
|
||||||
For Unicode normalizations, the following options are used:
|
For Unicode normalizations, the following options are used:
|
||||||
|
|
||||||
|
2
3rdparty/utf8proc/data/Makefile
vendored
2
3rdparty/utf8proc/data/Makefile
vendored
@ -22,7 +22,7 @@ CharWidths.txt: charwidths.jl EastAsianWidth.txt
|
|||||||
$(JULIA) charwidths.jl > $@
|
$(JULIA) charwidths.jl > $@
|
||||||
|
|
||||||
# Unicode data version (must also update utf8proc_unicode_version function)
|
# Unicode data version (must also update utf8proc_unicode_version function)
|
||||||
UNICODE_VERSION=13.0.0
|
UNICODE_VERSION=15.1.0
|
||||||
|
|
||||||
UnicodeData.txt:
|
UnicodeData.txt:
|
||||||
$(CURL) $(CURLFLAGS) -o $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
|
$(CURL) $(CURLFLAGS) -o $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
|
||||||
|
57
3rdparty/utf8proc/data/data_generator.rb
vendored
Normal file → Executable file
57
3rdparty/utf8proc/data/data_generator.rb
vendored
Normal file → Executable file
@ -67,7 +67,7 @@
|
|||||||
# authorization of the copyright holder.
|
# authorization of the copyright holder.
|
||||||
|
|
||||||
|
|
||||||
$ignorable_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Default_Ignorable_Code_Point.*?# Total code points:/m]
|
$ignorable_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Default_Ignorable_Code_Point.*?# Total code points:/m]
|
||||||
$ignorable = []
|
$ignorable = []
|
||||||
$ignorable_list.each_line do |entry|
|
$ignorable_list.each_line do |entry|
|
||||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||||
@ -77,7 +77,7 @@ $ignorable_list.each_line do |entry|
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
$uppercase_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Uppercase.*?# Total code points:/m]
|
$uppercase_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Uppercase.*?# Total code points:/m]
|
||||||
$uppercase = []
|
$uppercase = []
|
||||||
$uppercase_list.each_line do |entry|
|
$uppercase_list.each_line do |entry|
|
||||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||||
@ -87,7 +87,7 @@ $uppercase_list.each_line do |entry|
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
$lowercase_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Lowercase.*?# Total code points:/m]
|
$lowercase_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Lowercase.*?# Total code points:/m]
|
||||||
$lowercase = []
|
$lowercase = []
|
||||||
$lowercase_list.each_line do |entry|
|
$lowercase_list.each_line do |entry|
|
||||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||||
@ -97,7 +97,33 @@ $lowercase_list.each_line do |entry|
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
$grapheme_boundclass_list = File.read("GraphemeBreakProperty.txt")
|
$icb_linker_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Linker.*?# Total code points:/m]
|
||||||
|
$icb = Hash.new("UTF8PROC_INDIC_CONJUNCT_BREAK_NONE")
|
||||||
|
$icb_linker_list.each_line do |entry|
|
||||||
|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||||
|
$1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER" }
|
||||||
|
elsif entry =~ /^[0-9A-F]+/
|
||||||
|
$icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
$icb_consonant_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Consonant.*?# Total code points:/m]
|
||||||
|
$icb_consonant_list.each_line do |entry|
|
||||||
|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||||
|
$1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT" }
|
||||||
|
elsif entry =~ /^[0-9A-F]+/
|
||||||
|
$icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
$icb_extend_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Extend.*?# Total code points:/m]
|
||||||
|
$icb_extend_list.each_line do |entry|
|
||||||
|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
|
||||||
|
$1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND" }
|
||||||
|
elsif entry =~ /^[0-9A-F]+/
|
||||||
|
$icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
$grapheme_boundclass_list = File.read("GraphemeBreakProperty.txt", :encoding => 'utf-8')
|
||||||
$grapheme_boundclass = Hash.new("UTF8PROC_BOUNDCLASS_OTHER")
|
$grapheme_boundclass = Hash.new("UTF8PROC_BOUNDCLASS_OTHER")
|
||||||
$grapheme_boundclass_list.each_line do |entry|
|
$grapheme_boundclass_list.each_line do |entry|
|
||||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([A-Za-z_]+)/
|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([A-Za-z_]+)/
|
||||||
@ -107,7 +133,7 @@ $grapheme_boundclass_list.each_line do |entry|
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
$emoji_data_list = File.read("emoji-data.txt")
|
$emoji_data_list = File.read("emoji-data.txt", :encoding => 'utf-8')
|
||||||
$emoji_data_list.each_line do |entry|
|
$emoji_data_list.each_line do |entry|
|
||||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Extended_Pictographic\W/
|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Extended_Pictographic\W/
|
||||||
$1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC" }
|
$1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC" }
|
||||||
@ -120,7 +146,7 @@ $emoji_data_list.each_line do |entry|
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
$charwidth_list = File.read("CharWidths.txt")
|
$charwidth_list = File.read("CharWidths.txt", :encoding => 'utf-8')
|
||||||
$charwidth = Hash.new(0)
|
$charwidth = Hash.new(0)
|
||||||
$charwidth_list.each_line do |entry|
|
$charwidth_list.each_line do |entry|
|
||||||
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([0-9]+)/
|
if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([0-9]+)/
|
||||||
@ -130,13 +156,13 @@ $charwidth_list.each_line do |entry|
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
$exclusions = File.read("CompositionExclusions.txt")[/# \(1\) Script Specifics.*?# Total code points:/m]
|
$exclusions = File.read("CompositionExclusions.txt", :encoding => 'utf-8')[/# \(1\) Script Specifics.*?# Total code points:/m]
|
||||||
$exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex }
|
$exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex }
|
||||||
|
|
||||||
$excl_version = File.read("CompositionExclusions.txt")[/# \(2\) Post Composition Version precomposed characters.*?# Total code points:/m]
|
$excl_version = File.read("CompositionExclusions.txt", :encoding => 'utf-8')[/# \(2\) Post Composition Version precomposed characters.*?# Total code points:/m]
|
||||||
$excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex }
|
$excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex }
|
||||||
|
|
||||||
$case_folding_string = File.open("CaseFolding.txt", :encoding => 'utf-8').read
|
$case_folding_string = File.read("CaseFolding.txt", :encoding => 'utf-8')
|
||||||
$case_folding = {}
|
$case_folding = {}
|
||||||
$case_folding_string.chomp.split("\n").each do |line|
|
$case_folding_string.chomp.split("\n").each do |line|
|
||||||
next unless line =~ /([0-9A-F]+); [CF]; ([0-9A-F ]+);/i
|
next unless line =~ /([0-9A-F]+); [CF]; ([0-9A-F ]+);/i
|
||||||
@ -174,13 +200,13 @@ def cpary2c(array)
|
|||||||
return "UINT16_MAX" if array.nil? || array.length == 0
|
return "UINT16_MAX" if array.nil? || array.length == 0
|
||||||
lencode = array.length - 1 #no sequence has len 0, so we encode len 1 as 0, len 2 as 1, ...
|
lencode = array.length - 1 #no sequence has len 0, so we encode len 1 as 0, len 2 as 1, ...
|
||||||
array = cpary2utf16encoded(array)
|
array = cpary2utf16encoded(array)
|
||||||
if lencode >= 7 #we have only 3 bits for the length (which is already cutting it close. might need to change it to 2 bits in future Unicode versions)
|
if lencode >= 3 #we have only 2 bits for the length
|
||||||
array = [lencode] + array
|
array = [lencode] + array
|
||||||
lencode = 7
|
lencode = 3
|
||||||
end
|
end
|
||||||
idx = pushary(array)
|
idx = pushary(array)
|
||||||
raise "Array index out of bound" if idx > 0x1FFF
|
raise "Array index out of bound" if idx > 0x3FFF
|
||||||
return "#{idx | (lencode << 13)}"
|
return "#{idx | (lencode << 14)}"
|
||||||
end
|
end
|
||||||
def singlecpmap(cp)
|
def singlecpmap(cp)
|
||||||
return "UINT16_MAX" if cp == nil
|
return "UINT16_MAX" if cp == nil
|
||||||
@ -249,7 +275,8 @@ class UnicodeChar
|
|||||||
"#{$ignorable.include?(code)}, " <<
|
"#{$ignorable.include?(code)}, " <<
|
||||||
"#{%W[Zl Zp Cc Cf].include?(category) and not [0x200C, 0x200D].include?(category)}, " <<
|
"#{%W[Zl Zp Cc Cf].include?(category) and not [0x200C, 0x200D].include?(category)}, " <<
|
||||||
"#{$charwidth[code]}, 0, " <<
|
"#{$charwidth[code]}, 0, " <<
|
||||||
"#{$grapheme_boundclass[code]}},\n"
|
"#{$grapheme_boundclass[code]}, " <<
|
||||||
|
"#{$icb[code]}},\n"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -415,7 +442,7 @@ end
|
|||||||
$stdout << "};\n\n"
|
$stdout << "};\n\n"
|
||||||
|
|
||||||
$stdout << "static const utf8proc_property_t utf8proc_properties[] = {\n"
|
$stdout << "static const utf8proc_property_t utf8proc_properties[] = {\n"
|
||||||
$stdout << " {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER},\n"
|
$stdout << " {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER, UTF8PROC_INDIC_CONJUNCT_BREAK_NONE},\n"
|
||||||
properties.each { |line|
|
properties.each { |line|
|
||||||
$stdout << line
|
$stdout << line
|
||||||
}
|
}
|
||||||
|
10
3rdparty/utf8proc/libutf8proc.pc.cmakein
vendored
Normal file
10
3rdparty/utf8proc/libutf8proc.pc.cmakein
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
prefix=@CMAKE_INSTALL_PREFIX@
|
||||||
|
exec_prefix=@CMAKE_INSTALL_FULL_BINDIR@
|
||||||
|
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||||
|
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||||
|
|
||||||
|
Name: libutf8proc
|
||||||
|
Description: UTF8 processing
|
||||||
|
Version: @PROJECT_VERSION@
|
||||||
|
Libs: -L${libdir} -lutf8proc
|
||||||
|
Cflags: -I${includedir} -DUTF8PROC_EXPORTS
|
12
3rdparty/utf8proc/test/case.c
vendored
12
3rdparty/utf8proc/test/case.c
vendored
@ -26,27 +26,27 @@ int main(int argc, char **argv)
|
|||||||
++error;
|
++error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sizeof(wint_t) > 2 || c < (1<<16)) {
|
if (sizeof(wint_t) > 2 || (c < (1<<16) && u < (1<<16) && l < (1<<16))) {
|
||||||
wint_t l0 = towlower(c), u0 = towupper(c);
|
wint_t l0 = towlower((wint_t)c), u0 = towupper((wint_t)c);
|
||||||
|
|
||||||
/* OS unicode tables may be out of date. But if they
|
/* OS unicode tables may be out of date. But if they
|
||||||
do have a lower/uppercase mapping, hopefully it
|
do have a lower/uppercase mapping, hopefully it
|
||||||
is correct? */
|
is correct? */
|
||||||
if (l0 != c && l0 != l) {
|
if (l0 != (wint_t)c && l0 != (wint_t)l) {
|
||||||
fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
|
fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
|
||||||
l, c, l0);
|
l, c, l0);
|
||||||
++error;
|
++error;
|
||||||
}
|
}
|
||||||
else if (l0 != l) { /* often true for out-of-date OS unicode */
|
else if (l0 != (wint_t)l) { /* often true for out-of-date OS unicode */
|
||||||
++better;
|
++better;
|
||||||
/* printf("%x != towlower(%x) == %x\n", l, c, l0); */
|
/* printf("%x != towlower(%x) == %x\n", l, c, l0); */
|
||||||
}
|
}
|
||||||
if (u0 != c && u0 != u) {
|
if (u0 != (wint_t)c && u0 != (wint_t)u) {
|
||||||
fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
|
fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
|
||||||
u, c, u0);
|
u, c, u0);
|
||||||
++error;
|
++error;
|
||||||
}
|
}
|
||||||
else if (u0 != u) { /* often true for out-of-date OS unicode */
|
else if (u0 != (wint_t)u) { /* often true for out-of-date OS unicode */
|
||||||
++better;
|
++better;
|
||||||
/* printf("%x != towupper(%x) == %x\n", u, c, u0); */
|
/* printf("%x != towupper(%x) == %x\n", u, c, u0); */
|
||||||
}
|
}
|
||||||
|
54
3rdparty/utf8proc/test/fuzz_main.c
vendored
Normal file
54
3rdparty/utf8proc/test/fuzz_main.c
vendored
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/* Fuzz target entry point, works without libFuzzer */
|
||||||
|
|
||||||
|
int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
FILE *f;
|
||||||
|
char *buf = NULL;
|
||||||
|
long siz_buf;
|
||||||
|
|
||||||
|
if(argc < 2)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "no input file\n");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
f = fopen(argv[1], "rb");
|
||||||
|
if(f == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "error opening input file %s\n", argv[1]);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
fseek(f, 0, SEEK_END);
|
||||||
|
|
||||||
|
siz_buf = ftell(f);
|
||||||
|
rewind(f);
|
||||||
|
|
||||||
|
if(siz_buf < 1) goto err;
|
||||||
|
|
||||||
|
buf = (char*)malloc(siz_buf);
|
||||||
|
if(buf == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "malloc() failed\n");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(fread(buf, siz_buf, 1, f) != 1)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "fread() failed\n");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
(void)LLVMFuzzerTestOneInput((uint8_t*)buf, siz_buf);
|
||||||
|
|
||||||
|
err:
|
||||||
|
free(buf);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
84
3rdparty/utf8proc/test/fuzzer.c
vendored
Normal file
84
3rdparty/utf8proc/test/fuzzer.c
vendored
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
#include <utf8proc.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
|
||||||
|
{
|
||||||
|
if(size < 1) return 0;
|
||||||
|
|
||||||
|
/* Avoid timeout with long inputs */
|
||||||
|
if(size > (64 * 1024)) return 0;
|
||||||
|
|
||||||
|
if(data[size-1] != '\0') return 0;
|
||||||
|
|
||||||
|
const uint8_t* ptr = data;
|
||||||
|
utf8proc_int32_t c = 0, c_prev = 0, state = 0;
|
||||||
|
utf8proc_option_t options;
|
||||||
|
utf8proc_ssize_t ret, bytes = 0;
|
||||||
|
size_t len = strlen((const char*)data);
|
||||||
|
|
||||||
|
while(bytes != len)
|
||||||
|
{
|
||||||
|
ret = utf8proc_iterate(ptr, -1, &c);
|
||||||
|
|
||||||
|
if(ret < 0 || ret == 0) break;
|
||||||
|
|
||||||
|
bytes += ret;
|
||||||
|
ptr += ret;
|
||||||
|
|
||||||
|
utf8proc_tolower(c);
|
||||||
|
utf8proc_toupper(c);
|
||||||
|
utf8proc_totitle(c);
|
||||||
|
utf8proc_islower(c);
|
||||||
|
utf8proc_isupper(c);
|
||||||
|
utf8proc_charwidth(c);
|
||||||
|
utf8proc_category(c);
|
||||||
|
utf8proc_category_string(c);
|
||||||
|
utf8proc_codepoint_valid(c);
|
||||||
|
|
||||||
|
utf8proc_grapheme_break(c_prev, c);
|
||||||
|
utf8proc_grapheme_break_stateful(c_prev, c, &state);
|
||||||
|
|
||||||
|
c_prev = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
utf8proc_int32_t *copy = size >= 4 ? NULL : malloc(size);
|
||||||
|
|
||||||
|
if(copy)
|
||||||
|
{
|
||||||
|
size /= 4;
|
||||||
|
|
||||||
|
options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS | UTF8PROC_NLF2PS;
|
||||||
|
memcpy(copy, data, size);
|
||||||
|
utf8proc_normalize_utf32(copy, size, options);
|
||||||
|
|
||||||
|
options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS;
|
||||||
|
memcpy(copy, data, size);
|
||||||
|
utf8proc_normalize_utf32(copy, size, options);
|
||||||
|
|
||||||
|
options = UTF8PROC_STRIPCC | UTF8PROC_NLF2PS;
|
||||||
|
memcpy(copy, data, size);
|
||||||
|
utf8proc_normalize_utf32(copy, size, options);
|
||||||
|
|
||||||
|
options = UTF8PROC_STRIPCC;
|
||||||
|
memcpy(copy, data, size);
|
||||||
|
utf8proc_normalize_utf32(copy, size, options);
|
||||||
|
|
||||||
|
options = UTF8PROC_LUMP;
|
||||||
|
memcpy(copy, data, size);
|
||||||
|
utf8proc_normalize_utf32(copy, size, options);
|
||||||
|
|
||||||
|
options = 0;
|
||||||
|
memcpy(copy, data, size);
|
||||||
|
utf8proc_normalize_utf32(copy, size, options);
|
||||||
|
|
||||||
|
free(copy);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(utf8proc_NFD(data));
|
||||||
|
free(utf8proc_NFC(data));
|
||||||
|
free(utf8proc_NFKD(data));
|
||||||
|
free(utf8proc_NFKC(data));
|
||||||
|
free(utf8proc_NFKC_Casefold(data));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
12
3rdparty/utf8proc/test/graphemetest.c
vendored
12
3rdparty/utf8proc/test/graphemetest.c
vendored
@ -43,7 +43,7 @@ void checkline(const char *_buf, bool verbose) {
|
|||||||
else
|
else
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
|
glen = utf8proc_map(utf8, (utf8proc_ssize_t)j, &g, UTF8PROC_CHARBOUND);
|
||||||
if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
|
if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
|
||||||
/* the test file contains surrogate codepoints, which are only for UTF-16 */
|
/* the test file contains surrogate codepoints, which are only for UTF-16 */
|
||||||
printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
|
printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
|
||||||
@ -66,7 +66,7 @@ void checkline(const char *_buf, bool verbose) {
|
|||||||
utf8proc_bool expectbreak = false;
|
utf8proc_bool expectbreak = false;
|
||||||
do {
|
do {
|
||||||
utf8proc_int32_t codepoint;
|
utf8proc_int32_t codepoint;
|
||||||
i += utf8proc_iterate(src + i, si - i, &codepoint);
|
i += (size_t)utf8proc_iterate(src + i, (utf8proc_ssize_t)(si - i), &codepoint);
|
||||||
check(codepoint >= 0, "invalid UTF-8 data");
|
check(codepoint >= 0, "invalid UTF-8 data");
|
||||||
if (codepoint == 0x002F)
|
if (codepoint == 0x002F)
|
||||||
expectbreak = true;
|
expectbreak = true;
|
||||||
@ -110,6 +110,7 @@ int main(int argc, char **argv)
|
|||||||
utf8proc_uint8_t *g;
|
utf8proc_uint8_t *g;
|
||||||
glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
|
glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
|
||||||
check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
|
check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
|
||||||
|
check(glen != 6, "mishandled u+ffff and u+fffe grapheme breaks");
|
||||||
free(g);
|
free(g);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -118,6 +119,13 @@ int main(int argc, char **argv)
|
|||||||
checkline("/ 1f926 1f3fc 200d 2642 fe0f /", true); /* facepalm + pale skin + zwj + male sign + FE0F */
|
checkline("/ 1f926 1f3fc 200d 2642 fe0f /", true); /* facepalm + pale skin + zwj + male sign + FE0F */
|
||||||
checkline("/ 1f468 1f3fb 200d 1f91d 200d 1f468 1f3fd /", true); /* man face + pale skin + zwj + hand holding + zwj + man face + dark skin */
|
checkline("/ 1f468 1f3fb 200d 1f91d 200d 1f468 1f3fd /", true); /* man face + pale skin + zwj + hand holding + zwj + man face + dark skin */
|
||||||
|
|
||||||
|
/* more GB9c tests */
|
||||||
|
checkline("/ 0915 0300 094d 0300 0924 / 0915 /", true);
|
||||||
|
checkline("/ 0915 0300 094d 0300 094d 0924 / 0915 /", true);
|
||||||
|
checkline("/ 0915 0300 0300 / 0924 / 0915 /", true);
|
||||||
|
checkline("/ 0915 0300 094d 0300 / 0078 /", true);
|
||||||
|
checkline("/ 0300 094d 0300 / 0924 / 0915 /", true);
|
||||||
|
|
||||||
check(utf8proc_grapheme_break(0x03b1, 0x03b2), "failed 03b1 / 03b2 test");
|
check(utf8proc_grapheme_break(0x03b1, 0x03b2), "failed 03b1 / 03b2 test");
|
||||||
check(!utf8proc_grapheme_break(0x03b1, 0x0302), "failed 03b1 0302 test");
|
check(!utf8proc_grapheme_break(0x03b1, 0x0302), "failed 03b1 0302 test");
|
||||||
|
|
||||||
|
12
3rdparty/utf8proc/test/iterate.c
vendored
12
3rdparty/utf8proc/test/iterate.c
vendored
@ -8,7 +8,7 @@ static int error;
|
|||||||
#define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
|
#define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
|
||||||
#define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
|
#define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
|
||||||
|
|
||||||
static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line)
|
static void testbytes(utf8proc_uint8_t *buf, utf8proc_ssize_t len, utf8proc_ssize_t retval, int line)
|
||||||
{
|
{
|
||||||
utf8proc_int32_t out[16];
|
utf8proc_int32_t out[16];
|
||||||
utf8proc_ssize_t ret;
|
utf8proc_ssize_t ret;
|
||||||
@ -16,13 +16,13 @@ static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int
|
|||||||
/* Make a copy to ensure that memory is left uninitialized after "len"
|
/* Make a copy to ensure that memory is left uninitialized after "len"
|
||||||
* bytes. This way, Valgrind can detect overreads.
|
* bytes. This way, Valgrind can detect overreads.
|
||||||
*/
|
*/
|
||||||
unsigned char tmp[16];
|
utf8proc_uint8_t tmp[16];
|
||||||
memcpy(tmp, buf, len);
|
memcpy(tmp, buf, (unsigned long int)len);
|
||||||
|
|
||||||
tests++;
|
tests++;
|
||||||
if ((ret = utf8proc_iterate(tmp, len, out)) != retval) {
|
if ((ret = utf8proc_iterate(tmp, len, out)) != retval) {
|
||||||
fprintf(stderr, "Failed (%d):", line);
|
fprintf(stderr, "Failed (%d):", line);
|
||||||
for (int i = 0; i < len ; i++) {
|
for (utf8proc_ssize_t i = 0; i < len ; i++) {
|
||||||
fprintf(stderr, " 0x%02x", tmp[i]);
|
fprintf(stderr, " 0x%02x", tmp[i]);
|
||||||
}
|
}
|
||||||
fprintf(stderr, " -> %zd\n", ret);
|
fprintf(stderr, " -> %zd\n", ret);
|
||||||
@ -32,8 +32,8 @@ static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int
|
|||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
uint32_t byt;
|
utf8proc_int32_t byt;
|
||||||
unsigned char buf[16];
|
utf8proc_uint8_t buf[16];
|
||||||
|
|
||||||
(void) argc; (void) argv; /* unused */
|
(void) argc; (void) argv; /* unused */
|
||||||
|
|
||||||
|
13
3rdparty/utf8proc/test/ossfuzz.sh
vendored
Executable file
13
3rdparty/utf8proc/test/ossfuzz.sh
vendored
Executable file
@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/bash -eu
|
||||||
|
# This script is meant to be run by
|
||||||
|
# https://github.com/google/oss-fuzz/blob/master/projects/utf8proc/Dockerfile
|
||||||
|
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -DUTF8PROC_ENABLE_TESTING=ON -DLIB_FUZZING_ENGINE="$LIB_FUZZING_ENGINE"
|
||||||
|
make -j$(nproc)
|
||||||
|
|
||||||
|
cp $SRC/utf8proc/build/fuzzer $OUT/utf8proc_fuzzer
|
||||||
|
|
||||||
|
find $SRC/utf8proc/test -name "*.txt" | \
|
||||||
|
xargs zip $OUT/utf8proc_fuzzer_seed_corpus.zip
|
8
3rdparty/utf8proc/test/printproperty.c
vendored
8
3rdparty/utf8proc/test/printproperty.c
vendored
@ -8,12 +8,14 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
for (i = 1; i < argc; ++i) {
|
for (i = 1; i < argc; ++i) {
|
||||||
utf8proc_uint8_t cstr[16], *map;
|
utf8proc_uint8_t cstr[16], *map;
|
||||||
unsigned int c;
|
utf8proc_uint32_t x;
|
||||||
|
utf8proc_int32_t c;
|
||||||
if (!strcmp(argv[i], "-V")) {
|
if (!strcmp(argv[i], "-V")) {
|
||||||
printf("utf8proc version %s\n", utf8proc_version());
|
printf("utf8proc version %s\n", utf8proc_version());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
|
check(sscanf(argv[i],"%x", &x) == 1, "invalid hex input %s", argv[i]);
|
||||||
|
c = (utf8proc_int32_t)x;
|
||||||
const utf8proc_property_t *p = utf8proc_get_property(c);
|
const utf8proc_property_t *p = utf8proc_get_property(c);
|
||||||
|
|
||||||
if (utf8proc_codepoint_valid(c))
|
if (utf8proc_codepoint_valid(c))
|
||||||
@ -37,6 +39,7 @@ int main(int argc, char **argv)
|
|||||||
" ignorable = %d\n"
|
" ignorable = %d\n"
|
||||||
" control_boundary = %d\n"
|
" control_boundary = %d\n"
|
||||||
" boundclass = %d\n"
|
" boundclass = %d\n"
|
||||||
|
" indic_conjunct_break = %d\n"
|
||||||
" charwidth = %d\n",
|
" charwidth = %d\n",
|
||||||
argv[i], (char*) cstr,
|
argv[i], (char*) cstr,
|
||||||
utf8proc_category_string(c),
|
utf8proc_category_string(c),
|
||||||
@ -53,6 +56,7 @@ int main(int argc, char **argv)
|
|||||||
p->ignorable,
|
p->ignorable,
|
||||||
p->control_boundary,
|
p->control_boundary,
|
||||||
p->boundclass,
|
p->boundclass,
|
||||||
|
p->indic_conjunct_break,
|
||||||
utf8proc_charwidth(c));
|
utf8proc_charwidth(c));
|
||||||
free(map);
|
free(map);
|
||||||
}
|
}
|
||||||
|
3
3rdparty/utf8proc/test/tests.c
vendored
3
3rdparty/utf8proc/test/tests.c
vendored
@ -29,7 +29,8 @@ size_t skipspaces(const unsigned char *buf, size_t i)
|
|||||||
in dest, returning the number of bytes read from buf */
|
in dest, returning the number of bytes read from buf */
|
||||||
size_t encode(unsigned char *dest, const unsigned char *buf)
|
size_t encode(unsigned char *dest, const unsigned char *buf)
|
||||||
{
|
{
|
||||||
size_t i = 0, j, d = 0;
|
size_t i = 0, j;
|
||||||
|
utf8proc_ssize_t d = 0;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
int c;
|
int c;
|
||||||
i = skipspaces(buf, i);
|
i = skipspaces(buf, i);
|
||||||
|
85
3rdparty/utf8proc/utf8proc.c
vendored
85
3rdparty/utf8proc/utf8proc.c
vendored
@ -1,6 +1,6 @@
|
|||||||
/* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
|
/* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2014-2019 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
* Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
||||||
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
@ -101,7 +101,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
|
UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
|
||||||
return "13.0.0";
|
return "15.1.0";
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
||||||
@ -125,7 +125,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
|||||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
||||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
|
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
|
||||||
) {
|
) {
|
||||||
utf8proc_uint32_t uc;
|
utf8proc_int32_t uc;
|
||||||
const utf8proc_uint8_t *end;
|
const utf8proc_uint8_t *end;
|
||||||
|
|
||||||
*dst = -1;
|
*dst = -1;
|
||||||
@ -137,7 +137,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
// Must be between 0xc2 and 0xf4 inclusive to be valid
|
// Must be between 0xc2 and 0xf4 inclusive to be valid
|
||||||
if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
|
if ((utf8proc_uint32_t)(uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
if (uc < 0xe0) { // 2-byte sequence
|
if (uc < 0xe0) { // 2-byte sequence
|
||||||
// Must have valid continuation character
|
// Must have valid continuation character
|
||||||
if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
|
if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
|
||||||
@ -288,35 +288,54 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
|
|||||||
true; // GB999
|
true; // GB999
|
||||||
}
|
}
|
||||||
|
|
||||||
static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
|
static utf8proc_bool grapheme_break_extended(int lbc, int tbc, int licb, int ticb, utf8proc_int32_t *state)
|
||||||
{
|
{
|
||||||
if (state) {
|
if (state) {
|
||||||
int lbc_override;
|
int state_bc, state_icb; /* boundclass and indic_conjunct_break state */
|
||||||
if (*state == UTF8PROC_BOUNDCLASS_START)
|
if (*state == 0) { /* state initialization */
|
||||||
*state = lbc_override = lbc;
|
state_bc = lbc;
|
||||||
else
|
state_icb = licb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT ? licb : UTF8PROC_INDIC_CONJUNCT_BREAK_NONE;
|
||||||
lbc_override = *state;
|
}
|
||||||
utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
|
else { /* lbc and licb are already encoded in *state */
|
||||||
|
state_bc = *state & 0xff; // 1st byte of state is bound class
|
||||||
|
state_icb = *state >> 8; // 2nd byte of state is indic conjunct break
|
||||||
|
}
|
||||||
|
|
||||||
|
utf8proc_bool break_permitted = grapheme_break_simple(state_bc, tbc) &&
|
||||||
|
!(state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER
|
||||||
|
&& ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT); // GB9c
|
||||||
|
|
||||||
|
// Special support for GB9c. Don't break between two consonants
|
||||||
|
// separated 1+ linker characters and 0+ extend characters in any order.
|
||||||
|
// After a consonant, we enter LINKER state after at least one linker.
|
||||||
|
if (ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT
|
||||||
|
|| state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT
|
||||||
|
|| state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND)
|
||||||
|
state_icb = ticb;
|
||||||
|
else if (state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER)
|
||||||
|
state_icb = ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND ?
|
||||||
|
UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER : ticb;
|
||||||
|
|
||||||
// Special support for GB 12/13 made possible by GB999. After two RI
|
// Special support for GB 12/13 made possible by GB999. After two RI
|
||||||
// class codepoints we want to force a break. Do this by resetting the
|
// class codepoints we want to force a break. Do this by resetting the
|
||||||
// second RI's bound class to UTF8PROC_BOUNDCLASS_OTHER, to force a break
|
// second RI's bound class to UTF8PROC_BOUNDCLASS_OTHER, to force a break
|
||||||
// after that character according to GB999 (unless of course such a break is
|
// after that character according to GB999 (unless of course such a break is
|
||||||
// forbidden by a different rule such as GB9).
|
// forbidden by a different rule such as GB9).
|
||||||
if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
|
if (state_bc == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
|
||||||
*state = UTF8PROC_BOUNDCLASS_OTHER;
|
state_bc = UTF8PROC_BOUNDCLASS_OTHER;
|
||||||
// Special support for GB11 (emoji extend* zwj / emoji)
|
// Special support for GB11 (emoji extend* zwj / emoji)
|
||||||
else if (*state == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
|
else if (state_bc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
|
||||||
if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
|
if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
|
||||||
*state = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
|
state_bc = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
|
||||||
else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
|
else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
|
||||||
*state = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
|
state_bc = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
|
||||||
else
|
else
|
||||||
*state = tbc;
|
state_bc = tbc;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
*state = tbc;
|
state_bc = tbc;
|
||||||
|
|
||||||
|
*state = state_bc + (state_icb << 8);
|
||||||
return break_permitted;
|
return break_permitted;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -326,8 +345,12 @@ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t
|
|||||||
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
|
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
|
||||||
utf8proc_int32_t c1, utf8proc_int32_t c2, utf8proc_int32_t *state) {
|
utf8proc_int32_t c1, utf8proc_int32_t c2, utf8proc_int32_t *state) {
|
||||||
|
|
||||||
return grapheme_break_extended(utf8proc_get_property(c1)->boundclass,
|
const utf8proc_property_t *p1 = utf8proc_get_property(c1);
|
||||||
utf8proc_get_property(c2)->boundclass,
|
const utf8proc_property_t *p2 = utf8proc_get_property(c2);
|
||||||
|
return grapheme_break_extended(p1->boundclass,
|
||||||
|
p2->boundclass,
|
||||||
|
p1->indic_conjunct_break,
|
||||||
|
p2->indic_conjunct_break,
|
||||||
state);
|
state);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -356,9 +379,9 @@ static utf8proc_int32_t seqindex_decode_index(const utf8proc_uint32_t seqindex)
|
|||||||
|
|
||||||
static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
||||||
utf8proc_ssize_t written = 0;
|
utf8proc_ssize_t written = 0;
|
||||||
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
|
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x3FFF];
|
||||||
int len = seqindex >> 13;
|
int len = seqindex >> 14;
|
||||||
if (len >= 7) {
|
if (len >= 3) {
|
||||||
len = *entry;
|
len = *entry;
|
||||||
entry++;
|
entry++;
|
||||||
}
|
}
|
||||||
@ -376,19 +399,19 @@ static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqinde
|
|||||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
|
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
|
||||||
{
|
{
|
||||||
utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
|
utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
|
||||||
return cl != UINT16_MAX ? seqindex_decode_index(cl) : c;
|
return cl != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cl) : c;
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
|
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
|
||||||
{
|
{
|
||||||
utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
|
utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
|
||||||
return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
|
return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
|
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
|
||||||
{
|
{
|
||||||
utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
|
utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
|
||||||
return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
|
return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c)
|
UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c)
|
||||||
@ -410,7 +433,7 @@ UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
|
UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
|
||||||
return utf8proc_get_property(c)->category;
|
return (utf8proc_category_t) utf8proc_get_property(c)->category;
|
||||||
}
|
}
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
|
UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
|
||||||
@ -420,7 +443,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
|
|||||||
|
|
||||||
#define utf8proc_decompose_lump(replacement_uc) \
|
#define utf8proc_decompose_lump(replacement_uc) \
|
||||||
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
||||||
options & ~UTF8PROC_LUMP, last_boundclass)
|
options & ~(unsigned int)UTF8PROC_LUMP, last_boundclass)
|
||||||
|
|
||||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
||||||
const utf8proc_property_t *property;
|
const utf8proc_property_t *property;
|
||||||
@ -498,8 +521,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
|
|||||||
}
|
}
|
||||||
if (options & UTF8PROC_CHARBOUND) {
|
if (options & UTF8PROC_CHARBOUND) {
|
||||||
utf8proc_bool boundary;
|
utf8proc_bool boundary;
|
||||||
int tbc = property->boundclass;
|
boundary = grapheme_break_extended(0, property->boundclass, 0, property->indic_conjunct_break,
|
||||||
boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
|
last_boundclass);
|
||||||
if (boundary) {
|
if (boundary) {
|
||||||
if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
|
if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
|
||||||
if (bufsize >= 2) dst[1] = uc;
|
if (bufsize >= 2) dst[1] = uc;
|
||||||
@ -735,7 +758,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
|
|||||||
*dstptr = NULL;
|
*dstptr = NULL;
|
||||||
result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
|
result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
|
||||||
if (result < 0) return result;
|
if (result < 0) return result;
|
||||||
buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
|
buffer = (utf8proc_int32_t *) malloc(((utf8proc_size_t)result) * sizeof(utf8proc_int32_t) + 1);
|
||||||
if (!buffer) return UTF8PROC_ERROR_NOMEM;
|
if (!buffer) return UTF8PROC_ERROR_NOMEM;
|
||||||
result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
|
result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
|
||||||
if (result < 0) {
|
if (result < 0) {
|
||||||
|
22
3rdparty/utf8proc/utf8proc.h
vendored
22
3rdparty/utf8proc/utf8proc.h
vendored
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2014-2019 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
* Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
|
||||||
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
@ -71,9 +71,9 @@
|
|||||||
/** The MAJOR version number (increased when backwards API compatibility is broken). */
|
/** The MAJOR version number (increased when backwards API compatibility is broken). */
|
||||||
#define UTF8PROC_VERSION_MAJOR 2
|
#define UTF8PROC_VERSION_MAJOR 2
|
||||||
/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
|
/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
|
||||||
#define UTF8PROC_VERSION_MINOR 6
|
#define UTF8PROC_VERSION_MINOR 9
|
||||||
/** The PATCH version (increased for fixes that do not change the API). */
|
/** The PATCH version (increased for fixes that do not change the API). */
|
||||||
#define UTF8PROC_VERSION_PATCH 1
|
#define UTF8PROC_VERSION_PATCH 0
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -273,7 +273,8 @@ typedef struct utf8proc_property_struct {
|
|||||||
* Boundclass.
|
* Boundclass.
|
||||||
* @see utf8proc_boundclass_t.
|
* @see utf8proc_boundclass_t.
|
||||||
*/
|
*/
|
||||||
unsigned boundclass:8;
|
unsigned boundclass:6;
|
||||||
|
unsigned indic_conjunct_break:2;
|
||||||
} utf8proc_property_t;
|
} utf8proc_property_t;
|
||||||
|
|
||||||
/** Unicode categories. */
|
/** Unicode categories. */
|
||||||
@ -388,6 +389,14 @@ typedef enum {
|
|||||||
UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */
|
UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */
|
||||||
} utf8proc_boundclass_t;
|
} utf8proc_boundclass_t;
|
||||||
|
|
||||||
|
/** Indic_Conjunct_Break property. (TR44) */
|
||||||
|
typedef enum {
|
||||||
|
UTF8PROC_INDIC_CONJUNCT_BREAK_NONE = 0,
|
||||||
|
UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER = 1,
|
||||||
|
UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT = 2,
|
||||||
|
UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND = 3,
|
||||||
|
} utf8proc_indic_conjunct_break_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function pointer type passed to @ref utf8proc_map_custom and
|
* Function pointer type passed to @ref utf8proc_map_custom and
|
||||||
* @ref utf8proc_decompose_custom, which is used to specify a user-defined
|
* @ref utf8proc_decompose_custom, which is used to specify a user-defined
|
||||||
@ -481,8 +490,9 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
|
|||||||
* - @ref UTF8PROC_STRIPNA - remove unassigned codepoints
|
* - @ref UTF8PROC_STRIPNA - remove unassigned codepoints
|
||||||
* @param last_boundclass
|
* @param last_boundclass
|
||||||
* Pointer to an integer variable containing
|
* Pointer to an integer variable containing
|
||||||
* the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND
|
* the previous codepoint's (boundclass + indic_conjunct_break << 1) if the @ref UTF8PROC_CHARBOUND
|
||||||
* option is used. Otherwise, this parameter is ignored.
|
* option is used. If the string is being processed in order, this can be initialized to 0 for
|
||||||
|
* the beginning of the string, and is thereafter updated automatically. Otherwise, this parameter is ignored.
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
* In case of success, the number of codepoints written is returned; in case
|
* In case of success, the number of codepoints written is returned; in case
|
||||||
|
24975
3rdparty/utf8proc/utf8proc_data.c
vendored
24975
3rdparty/utf8proc/utf8proc_data.c
vendored
File diff suppressed because it is too large
Load Diff
2
COPYING
2
COPYING
@ -197,7 +197,7 @@ tap-windows.h
|
|||||||
Copyright (C) 2002-2014 OpenVPN Technologies, Inc.
|
Copyright (C) 2002-2014 OpenVPN Technologies, Inc.
|
||||||
|
|
||||||
utf8proc
|
utf8proc
|
||||||
Copyright (c) 2014-2015 Steven G. Johnson, Jiahao Chen, Tony Kelman,
|
Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Tony Kelman,
|
||||||
Jonas Fonseca, and other contributors
|
Jonas Fonseca, and other contributors
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a
|
Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
@ -1747,11 +1747,6 @@ project "utf8proc"
|
|||||||
"verbose=-1",
|
"verbose=-1",
|
||||||
}
|
}
|
||||||
|
|
||||||
configuration { "gmake or ninja" }
|
|
||||||
buildoptions_c {
|
|
||||||
"-Wno-strict-prototypes",
|
|
||||||
}
|
|
||||||
|
|
||||||
configuration { }
|
configuration { }
|
||||||
|
|
||||||
files {
|
files {
|
||||||
|
Loading…
Reference in New Issue
Block a user