3rdparty/utf8proc: Updated to 2.9.0.

2025-07-13 13:35:41 +03:00 · 2023-12-06 07:05:45 +11:00 · 2023-12-06 07:05:45 +11:00 · 466c450cb3
commit 466c450cb3
parent 12590d6ad8
28 changed files with 13449 additions and 12383 deletions
--- a/3rdparty/utf8proc/.github/workflows/ci-fuzz.yml
+++ b/3rdparty/utf8proc/.github/workflows/ci-fuzz.yml
@ -0,0 +1,23 @@
+name: CIFuzz
+on: [pull_request]
+jobs:
+  Fuzzing:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Build Fuzzers
+      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'utf8proc'
+        dry-run: false
+    - name: Run Fuzzers
+      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'utf8proc'
+        fuzz-seconds: 600
+        dry-run: false
+    - name: Upload Crash
+      uses: actions/upload-artifact@v1
+      if: failure()
+      with:
+        name: artifacts
+        path: ./out/artifacts
--- a/3rdparty/utf8proc/.github/workflows/cmake.yml
+++ b/3rdparty/utf8proc/.github/workflows/cmake.yml
@ -0,0 +1,64 @@
+name: CMake
+
+on:
+  push:
+    branches:
+      - master
+      - 'release-*'
+  pull_request:
+  # run on all pr
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macOS-latest]
+        shared: ["ON", "OFF"]
+    runs-on: ${{ matrix.os }}
+    name: ${{ matrix.os }} - shared=${{ matrix.shared }}
+    steps:
+    - uses: actions/checkout@v2
+    - name: Build
+      run: |
+        mkdir build
+        cmake -S . -B build -DBUILD_SHARED_LIBS=${{ matrix.shared }} -DUTF8PROC_ENABLE_TESTING=ON
+        cmake --build build
+    - name: Run Test
+      run: ctest --test-dir build -V
+    - name: Upload shared lib
+      if: matrix.shared == 'ON'
+      uses: actions/upload-artifact@v2
+      with:
+        name: ${{ matrix.os }}
+        path: |
+          build/libutf8proc.*
+          build/Debug/utf8proc.*
+
+  mingw:
+    strategy:
+      matrix:
+        os: [windows-latest]
+        shared: ["ON", "OFF"]
+    runs-on: ${{ matrix.os }}
+    name: mingw64 - shared=${{ matrix.shared }}
+    defaults:
+      run:
+        shell: msys2 {0}
+    steps:
+    - uses: actions/checkout@v2
+    - uses: msys2/setup-msys2@v2
+      with:
+        install: gcc make mingw-w64-x86_64-cmake
+    - name: Build
+      run: |
+        mkdir build
+        cmake -S . -B build -DBUILD_SHARED_LIBS=${{ matrix.shared }} -DUTF8PROC_ENABLE_TESTING=ON -G'MSYS Makefiles'
+        cmake --build build
+    - name: Run Test
+      run: ctest --test-dir build -V
+    - name: Upload shared lib
+      if: matrix.shared == 'ON'
+      uses: actions/upload-artifact@v2
+      with:
+        name: windows-mingw64
+        path: build/libutf8proc.*
--- a/3rdparty/utf8proc/.github/workflows/make.yml
+++ b/3rdparty/utf8proc/.github/workflows/make.yml
@ -0,0 +1,41 @@
+name: Make
+
+on:
+  push:
+    branches:
+      - master
+      - 'release-*'
+  pull_request:
+  # run on all pr
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macOS-latest]
+    runs-on: ${{ matrix.os }}
+    name: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+    # TODO: update makefile to check MANIFEST
+    # - name: Install dependencies (MacOS)
+    #   if: matrix.config.os == 'macos-latest'
+    #   run: brew install ruby findutils
+
+    - name: Check MANIFEST
+      if: matrix.config.os == 'ubuntu-latest'
+      run: make manifest && diff MANIFEST.new MANIFEST
+    - name: Run Test
+      run: make check
+    - name: Check utf8proc_data.c
+      run: make data && diff data/utf8proc_data.c.new utf8proc_data.c
+    - name: Clean
+      run: make clean && git status --ignored --porcelain && test -z "$(git status --ignored --porcelain)"
+
+    - name: Make lib
+      run: make
+    - name: Upload shared lib
+      uses: actions/upload-artifact@v2
+      with:
+        name: make-${{ matrix.os }}
+        path: libutf8proc.*
--- a/3rdparty/utf8proc/.gitignore
+++ b/3rdparty/utf8proc/.gitignore
@ -0,0 +1,38 @@
+*.tar.gz
+*.exe
+*.dll
+*.do
+*.o
+*.so*
+*.a
+*.dll
+*.dylib
+*.dSYM
+*.out
+*.new
+.vscode
+/data/*.txt
+/data/*.ttf
+/data/*.sfd
+/docs/
+/bench/bench
+/bench/icu
+/bench/unistring
+/test/normtest
+/test/graphemetest
+/test/printproperty
+/test/charwidth
+/test/misc
+/test/valid
+/test/iterate
+/test/case
+/test/iscase
+/test/custom
+/tmp/
+/mingw_static/
+/mingw_shared/
+/msvc_shared/
+/msvc_static/
+/build/
+NEWS-update.jl
+libutf8proc.pc
--- a/3rdparty/utf8proc/.travis.yml
+++ b/3rdparty/utf8proc/.travis.yml
@ -1,22 +0,0 @@
-language: c
-compiler:
-    - gcc
-    - clang
-notifications:
-    email: false
-before_install:
-    - sudo add-apt-repository ppa:staticfloat/julia-deps -y
-    - sudo add-apt-repository ppa:staticfloat/juliareleases -y
-    - sudo apt-get update -qq -y
-    - sudo apt-get install libpcre3-dev julia fontforge -y
-script:
-    - make manifest && diff MANIFEST.new MANIFEST
-    - make check
-    - make data && diff data/utf8proc_data.c.new utf8proc_data.c
-    - make clean && git status --ignored --porcelain && test -z "$(git status --ignored --porcelain)"
-    - (mkdir build_static && cd build_static && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make)
-    - (mkdir build_shared && cd build_shared && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_SHARED_LIBS=ON && make)
-env:
-    # use JuliaLang caching (https://github.com/staticfloat/cache.julialang.org)
-    # so that Travis builds do not depend on anyone's flaky servers but our own
-    - URLCACHE=https://cache.julialang.org/ CFLAGS="-O2 -Werror -Wmissing-prototypes"
--- a/3rdparty/utf8proc/CMakeLists.txt
+++ b/3rdparty/utf8proc/CMakeLists.txt
@ -1,20 +1,24 @@
-cmake_minimum_required (VERSION 2.8.12)
+cmake_minimum_required (VERSION 3.0.0)

 include (utils.cmake)

 disallow_intree_builds()

-project (utf8proc C)
+if (POLICY CMP0048)
+  cmake_policy (SET CMP0048 NEW)
+endif ()
+project (utf8proc VERSION 2.9.0 LANGUAGES C)

 # This is the ABI version number, which may differ from the
-# API version number (defined in utf8proc.h).
+# API version number (defined in utf8proc.h and above).
 # Be sure to also update these in Makefile and MANIFEST!
-set(SO_MAJOR 2)
-set(SO_MINOR 4)
-set(SO_PATCH 1)
+set(SO_MAJOR 3)
+set(SO_MINOR 0)
+set(SO_PATCH 0)

 option(UTF8PROC_INSTALL "Enable installation of utf8proc" On)
 option(UTF8PROC_ENABLE_TESTING "Enable testing of utf8proc" Off)
+option(LIB_FUZZING_ENGINE "Fuzzing engine to link against" Off)

 add_library (utf8proc
  utf8proc.c
@ -50,23 +54,23 @@ set_target_properties (utf8proc PROPERTIES
 )

 if (UTF8PROC_INSTALL)
+  include(GNUInstallDirs)
+  install(FILES utf8proc.h DESTINATION "${CMAKE_INSTALL_FULL_INCLUDEDIR}")
  install(TARGETS utf8proc
-    RUNTIME DESTINATION bin
-    LIBRARY DESTINATION lib
-    ARCHIVE DESTINATION lib)
-
-  install(
-    FILES
-      "${PROJECT_SOURCE_DIR}/utf8proc.h"
-    DESTINATION include)
+    ARCHIVE DESTINATION "${CMAKE_INSTALL_FULL_LIBDIR}"
+    LIBRARY DESTINATION "${CMAKE_INSTALL_FULL_LIBDIR}"
+    RUNTIME DESTINATION "${CMAKE_INSTALL_FULL_BINDIR}"
+  )
+  configure_file(libutf8proc.pc.cmakein libutf8proc.pc @ONLY)
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libutf8proc.pc" DESTINATION "${CMAKE_INSTALL_FULL_LIBDIR}/pkgconfig")
 endif()

 if(UTF8PROC_ENABLE_TESTING)
  enable_testing()
  file(MAKE_DIRECTORY data)
-  set(UNICODE_VERSION 13.0.0)
-  file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt data/NormalizationTest.txt SHOW_PROGRESS)
-  file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTest.txt SHOW_PROGRESS)
+  set(UNICODE_VERSION 15.1.0)
+  file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt ${CMAKE_BINARY_DIR}/data/NormalizationTest.txt SHOW_PROGRESS)
+  file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt ${CMAKE_BINARY_DIR}/data/GraphemeBreakTest.txt SHOW_PROGRESS)
  add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c)
  target_link_libraries(case utf8proc)
  add_executable(custom test/tests.h test/tests.c utf8proc.h test/custom.c)
@ -98,4 +102,12 @@ if(UTF8PROC_ENABLE_TESTING)
  target_link_libraries(normtest utf8proc)
  add_test(utf8proc.testgraphemetest graphemetest data/GraphemeBreakTest.txt)
  add_test(utf8proc.testnormtest normtest data/NormalizationTest.txt)
+
+  if(LIB_FUZZING_ENGINE)
+    add_executable(fuzzer utf8proc.h test/fuzzer.c)
+    target_link_libraries(fuzzer ${LIB_FUZZING_ENGINE} utf8proc)
+  else()
+    add_executable(fuzzer utf8proc.h test/fuzz_main.c test/fuzzer.c)
+    target_link_libraries(fuzzer utf8proc)
+  endif()
 endif()
--- a/3rdparty/utf8proc/Doxyfile
+++ b/3rdparty/utf8proc/Doxyfile
@ -1,4 +1,4 @@
-# Doxyfile 1.8.18
+# Doxyfile 1.9.1

 # This file describes the settings to be used by the documentation system
 # doxygen (www.doxygen.org) for a project.
@ -32,7 +32,7 @@ DOXYFILE_ENCODING      = UTF-8
 # title of most generated pages and in a few other places.
 # The default value is: My Project.

-PROJECT_NAME           = "utf8proc"
+PROJECT_NAME           = utf8proc

 # The PROJECT_NUMBER tag can be used to enter a project or revision number. This
 # could be handy for archiving the generated documentation or if some version
@ -217,6 +217,14 @@ QT_AUTOBRIEF           = NO

 MULTILINE_CPP_IS_BRIEF = NO

+# By default Python docstrings are displayed as preformatted text and doxygen's
+# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
+# doxygen's special commands can be used and the contents of the docstring
+# documentation blocks is shown as doxygen documentation.
+# The default value is: YES.
+
+PYTHON_DOCSTRING       = YES
+
 # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
 # documentation from any documented member that it re-implements.
 # The default value is: YES.
@ -305,7 +313,10 @@ OPTIMIZE_OUTPUT_SLICE  = NO
 # Note: For files without extension you can use no_extension as a placeholder.
 #
 # Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.

 EXTENSION_MAPPING      =

@ -439,6 +450,19 @@ TYPEDEF_HIDES_STRUCT   = NO

 LOOKUP_CACHE_SIZE      = 0

+# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
+# during processing. When set to 0 doxygen will based this on the number of
+# cores available in the system. You can set it explicitly to a value larger
+# than 0 to get more control over the balance between CPU load and processing
+# speed. At this moment only the input processing can be done using multiple
+# threads. Since this is still an experimental feature the default is set to 1,
+# which efficively disables parallel processing. Please report any issues you
+# encounter. Generating dot graphs in parallel is controlled by the
+# DOT_NUM_THREADS setting.
+# Minimum value: 0, maximum value: 32, default value: 1.
+
+NUM_PROC_THREADS       = 1
+
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
@ -502,6 +526,13 @@ EXTRACT_LOCAL_METHODS  = NO

 EXTRACT_ANON_NSPACES   = NO

+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
 # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
 # undocumented members inside documented classes or files. If set to NO these
 # members will be included in the various overviews, but no documentation
@ -539,11 +570,18 @@ HIDE_IN_BODY_DOCS      = NO

 INTERNAL_DOCS          = NO

-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# (including Cygwin) ands Mac users are advised to set this option to NO.
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
 # The default value is: system dependent.

 CASE_SENSE_NAMES       = NO
@ -782,7 +820,10 @@ WARN_IF_DOC_ERROR      = YES
 WARN_NO_PARAMDOC       = NO

 # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
-# a warning is encountered.
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# Possible values are: NO, YES and FAIL_ON_WARNINGS.
 # The default value is: NO.

 WARN_AS_ERROR          = NO
@ -818,8 +859,8 @@ INPUT                  =
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
 # libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
-# possible encodings.
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
 # The default value is: UTF-8.

 INPUT_ENCODING         = UTF-8
@ -832,13 +873,15 @@ INPUT_ENCODING         = UTF-8
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # read by doxygen.
 #
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
 # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
 # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
 # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
 # *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
-# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
-# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd,
-# *.vhdl, *.ucf, *.qsf and *.ice.
+# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
+# *.ucf, *.qsf and *.ice.

 FILE_PATTERNS          =

@ -1065,13 +1108,6 @@ VERBATIM_HEADERS       = YES

 ALPHABETICAL_INDEX     = YES

-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 5
-
 # In case all classes in a project start with a common prefix, all classes will
 # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
 # can be used to specify a prefix (or a list of prefixes) that should be ignored
@ -1242,10 +1278,11 @@ HTML_INDEX_NUM_ENTRIES = 100

 # If the GENERATE_DOCSET tag is set to YES, additional index files will be
 # generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: https://developer.apple.com/xcode/), introduced with OSX
-# 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
 # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
 # genXcode/_index.html for more information.
@ -1287,8 +1324,8 @@ DOCSET_PUBLISHER_NAME  = Publisher
 # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
 # additional HTML index files: index.hhp, index.hhc, and index.hhk. The
 # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
+# (see:
+# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
 #
 # The HTML Help Workshop contains a compiler that can convert all HTML output
 # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
@ -1318,7 +1355,7 @@ CHM_FILE               =
 HHC_LOCATION           =

 # The GENERATE_CHI flag controls if a separate .chi index file is generated
-# (YES) or that it should be included in the master .chm file (NO).
+# (YES) or that it should be included in the main .chm file (NO).
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.

@ -1363,7 +1400,8 @@ QCH_FILE               =

 # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
 # Project output. For more information please see Qt Help Project / Namespace
-# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.

@ -1371,8 +1409,8 @@ QHP_NAMESPACE          = org.doxygen.Project

 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
-# folders).
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
 # The default value is: doc.
 # This tag requires that the tag GENERATE_QHP is set to YES.

@ -1380,16 +1418,16 @@ QHP_VIRTUAL_FOLDER     = doc

 # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
 # filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHP_CUST_FILTER_NAME   =

 # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
 # custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHP_CUST_FILTER_ATTRS  =
@ -1401,9 +1439,9 @@ QHP_CUST_FILTER_ATTRS  =

 QHP_SECT_FILTER_ATTRS  =

-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHG_LOCATION           =
@ -1484,8 +1522,8 @@ EXT_LINKS_IN_WINDOW    = NO
 # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
 # https://inkscape.org) to generate formulas as SVG images instead of PNGs for
 # the HTML output. These images will generally look nicer at scaled resolutions.
-# Possible values are: png The default and svg Looks nicer but requires the
-# pdf2svg tool.
+# Possible values are: png (the default) and svg (looks nicer but requires the
+# pdf2svg or inkscape tool).
 # The default value is: png.
 # This tag requires that the tag GENERATE_HTML is set to YES.

@ -1530,7 +1568,7 @@ USE_MATHJAX            = NO

 # When MathJax is enabled you can set the default output format to be used for
 # the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
 # Possible values are: HTML-CSS (which is slower, but has the best
 # compatibility), NativeMML (i.e. MathML) and SVG.
 # The default value is: HTML-CSS.
@ -1560,7 +1598,8 @@ MATHJAX_EXTENSIONS     =

 # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
 # of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
 # example see the documentation.
 # This tag requires that the tag USE_MATHJAX is set to YES.

@ -1607,7 +1646,8 @@ SERVER_BASED_SEARCH    = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/).
+# Xapian (see:
+# https://xapian.org/).
 #
 # See the section "External Indexing and Searching" for details.
 # The default value is: NO.
@ -1620,8 +1660,9 @@ EXTERNAL_SEARCH        = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/). See the section "External Indexing and
-# Searching" for details.
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
 # This tag requires that the tag SEARCHENGINE is set to YES.

 SEARCHENGINE_URL       =
@ -1785,9 +1826,11 @@ LATEX_EXTRA_FILES      =

 PDF_HYPERLINKS         = YES

-# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
-# the PDF file directly from the LaTeX files. Set this option to YES, to get a
-# higher quality PDF documentation.
+# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
+# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
+# files. Set this option to YES, to get a higher quality PDF documentation.
+#
+# See also section LATEX_CMD_NAME for selecting the engine.
 # The default value is: YES.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

@ -2298,10 +2341,32 @@ UML_LOOK               = NO
 # but if the number exceeds 15, the total amount of fields shown is limited to
 # 10.
 # Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
+# This tag requires that the tag UML_LOOK is set to YES.

 UML_LIMIT_NUM_FIELDS   = 10

+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS        = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD     = 17
+
 # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
 # collaboration graphs will show the relations between templates and their
 # instances.
@ -2491,9 +2556,11 @@ DOT_MULTI_TARGETS      = NO

 GENERATE_LEGEND        = YES

-# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
 # files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc and
+# plantuml temporary files.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.

 DOT_CLEANUP            = YES
--- a/3rdparty/utf8proc/LICENSE.md
+++ b/3rdparty/utf8proc/LICENSE.md
@ -7,7 +7,7 @@ whose copyright and license statements are reproduced below, all new
 work on the utf8proc library is licensed under the [MIT "expat"
 license](http://opensource.org/licenses/MIT):

-*Copyright &copy; 2014-2019 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
+*Copyright &copy; 2014-2021 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*

 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the "Software"),
--- a/3rdparty/utf8proc/MANIFEST
+++ b/3rdparty/utf8proc/MANIFEST
@ -2,8 +2,8 @@ include/
 include/utf8proc.h
 lib/
 lib/libutf8proc.a
-lib/libutf8proc.so -> libutf8proc.so.2.4.1
-lib/libutf8proc.so.2 -> libutf8proc.so.2.4.1
-lib/libutf8proc.so.2.4.1
+lib/libutf8proc.so -> libutf8proc.so.3.0.0
+lib/libutf8proc.so.2 -> libutf8proc.so.3.0.0
+lib/libutf8proc.so.3.0.0
 lib/pkgconfig/
 lib/pkgconfig/libutf8proc.pc
--- a/3rdparty/utf8proc/Makefile
+++ b/3rdparty/utf8proc/Makefile
@ -11,7 +11,7 @@ PERL=perl
 CFLAGS ?= -O2
 PICFLAG = -fPIC
 C99FLAG = -std=c99
-WCFLAGS = -Wall -Wextra -pedantic
+WCFLAGS = -Wsign-conversion -Wall -Wextra -pedantic
 UCFLAGS = $(CPPFLAGS) $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS $(UTF8PROC_DEFINES)
 LDFLAG_SHARED = -shared
 SOFLAG = -Wl,-soname
@ -22,9 +22,12 @@ SOFLAG = -Wl,-soname
 # compatibility is broken, even if the API is backward-compatible.
 # The API version number is defined in utf8proc.h.
 # Be sure to also update these ABI versions in MANIFEST and CMakeLists.txt!
-MAJOR=2
-MINOR=4
-PATCH=1
+MAJOR=3
+MINOR=0
+PATCH=0
+
+# api version (also in utf8proc.h and CMakeLists.txt)
+VERSION=2.9.0

 OS := $(shell uname)
 ifeq ($(OS),Darwin) # MacOS X
@ -78,7 +81,7 @@ utf8proc.o: utf8proc.h utf8proc.c utf8proc_data.c

 libutf8proc.a: utf8proc.o
 	rm -f libutf8proc.a
-	$(AR) rs libutf8proc.a utf8proc.o
+	$(AR) crs libutf8proc.a utf8proc.o

 libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH): utf8proc.o
 	$(CC) $(LDFLAGS) $(LDFLAG_SHARED) -o $@ $(SOFLAG) -Wl,libutf8proc.so.$(MAJOR) utf8proc.o
@ -168,6 +171,20 @@ test/custom: test/custom.c test/tests.o utf8proc.o utf8proc.h test/tests.h
 test/misc: test/misc.c test/tests.o utf8proc.o utf8proc.h test/tests.h
 	$(CC) $(UCFLAGS) $(LDFLAGS) -DUNICODE_VERSION='"'`$(PERL) -ne "/^UNICODE_VERSION=/ and print $$';" data/Makefile`'"' test/misc.c test/tests.o utf8proc.o -o $@

+# make release tarball from master branch
+dist:
+	git archive master --prefix=utf8proc-$(VERSION)/ -o utf8proc-$(VERSION).tar.gz
+
+# build tarball, make sure it passes checks, and make sure version numbers are consistent
+distcheck: dist
+	test `grep UTF8PROC_VERSION utf8proc.h | cut -d' ' -f3 | tr '\n' .` = $(VERSION). || exit 1
+	test `grep "utf8proc VERSION" CMakeLists.txt |cut -d' ' -f 4` = $(VERSION) || exit 1
+	test `grep  libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) MANIFEST | wc -l` = 3 || exit 1
+	test `grep 'set(SO_' CMakeLists.txt |cut -d' ' -f2 | tr -d ')' | tr '\n' '.'` = $(MAJOR).$(MINOR).$(PATCH). || exit 1
+	tar xzf utf8proc-$(VERSION).tar.gz
+	make -C utf8proc-$(VERSION) check
+	rm -rf utf8proc-$(VERSION)
+
 check: test/normtest data/NormalizationTest.txt data/Lowercase.txt data/Uppercase.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/iscase test/custom test/charwidth test/misc test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
 	$(MAKE) -C bench
 	test/normtest data/NormalizationTest.txt
--- a/3rdparty/utf8proc/NEWS.md
+++ b/3rdparty/utf8proc/NEWS.md
@ -1,5 +1,29 @@
 # utf8proc release history #

+## Version 2.9.0 ##
+
+2023-10-20
+
+ - Unicode 15.1 support ([#253]).
+
+## Version 2.8.0 ##
+
+2022-10-30
+
+ - Unicode 15 support ([#247]).
+
+## Version 2.7.0 ##
+
+2021-12-16
+
+ - Unicode 14 support ([#233]).
+
+ - Support `GNUInstallDirs` in CMake build ([#159]).
+
+ - `cmake` build now installs `pkg-config` file ([#224]).
+
+ - Various build and portability improvements.
+
 ## Version 2.6.1 ##

 2020-12-15
@ -409,8 +433,13 @@ Release of version 1.0.1
 [#152]: https://github.com/JuliaStrings/utf8proc/issues/152
 [#154]: https://github.com/JuliaStrings/utf8proc/issues/154
 [#156]: https://github.com/JuliaStrings/utf8proc/issues/156
+[#159]: https://github.com/JuliaStrings/utf8proc/issues/159
 [#167]: https://github.com/JuliaStrings/utf8proc/issues/167
 [#173]: https://github.com/JuliaStrings/utf8proc/issues/173
 [#179]: https://github.com/JuliaStrings/utf8proc/issues/179
 [#196]: https://github.com/JuliaStrings/utf8proc/issues/196
 [#205]: https://github.com/JuliaStrings/utf8proc/issues/205
+[#224]: https://github.com/JuliaStrings/utf8proc/issues/224
+[#233]: https://github.com/JuliaStrings/utf8proc/issues/233
+[#247]: https://github.com/JuliaStrings/utf8proc/issues/247
+[#253]: https://github.com/JuliaStrings/utf8proc/issues/253
--- a/3rdparty/utf8proc/README.md
+++ b/3rdparty/utf8proc/README.md
@ -1,5 +1,5 @@
 # utf8proc
-[![Travis CI Status](https://travis-ci.org/JuliaStrings/utf8proc.png)](https://travis-ci.org/JuliaStrings/utf8proc)
+[![CI](https://github.com/NanoComp/meep/actions/workflows/build-ci.yml/badge.svg)](https://github.com/JuliaStrings/utf8proc/actions/workflows/build-ci.yml)
 [![AppVeyor status](https://ci.appveyor.com/api/projects/status/ivaa0v6ikxrmm5r6?svg=true)](https://ci.appveyor.com/project/StevenGJohnson/utf8proc)

 [utf8proc](http://juliastrings.github.io/utf8proc/) is a small, clean C
@ -38,9 +38,8 @@ For compilation of the C library, run `make`.  You can also install the library
 Alternatively, you can compile with `cmake`, e.g. by
 ```sh
 mkdir build
-cd build
-cmake ..
-make
+cmake -S . -B build
+cmake --build build
 ```

 ### Using other compilers
@ -60,7 +59,7 @@ The C library is found in this directory after successful compilation
 and is named `libutf8proc.a` (for the static library) and
 `libutf8proc.so` (for the dynamic library).

-The Unicode version supported is 13.0.0.
+The Unicode version supported is 15.1.0.

 For Unicode normalizations, the following options are used:

--- a/3rdparty/utf8proc/data/Makefile
+++ b/3rdparty/utf8proc/data/Makefile
@ -22,7 +22,7 @@ CharWidths.txt: charwidths.jl EastAsianWidth.txt
 	$(JULIA) charwidths.jl > $@

 # Unicode data version (must also update utf8proc_unicode_version function)
-UNICODE_VERSION=13.0.0
+UNICODE_VERSION=15.1.0

 UnicodeData.txt:
 	$(CURL) $(CURLFLAGS) -o $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
--- a/3rdparty/utf8proc/data/data_generator.rb
+++ b/3rdparty/utf8proc/data/data_generator.rb
@ -67,7 +67,7 @@
 #  authorization of the copyright holder.


-$ignorable_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Default_Ignorable_Code_Point.*?# Total code points:/m]
+$ignorable_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Default_Ignorable_Code_Point.*?# Total code points:/m]
 $ignorable = []
 $ignorable_list.each_line do |entry|
  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
@ -77,7 +77,7 @@ $ignorable_list.each_line do |entry|
  end
 end

-$uppercase_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Uppercase.*?# Total code points:/m]
+$uppercase_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Uppercase.*?# Total code points:/m]
 $uppercase = []
 $uppercase_list.each_line do |entry|
  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
@ -87,7 +87,7 @@ $uppercase_list.each_line do |entry|
  end
 end

-$lowercase_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Lowercase.*?# Total code points:/m]
+$lowercase_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Derived Property: Lowercase.*?# Total code points:/m]
 $lowercase = []
 $lowercase_list.each_line do |entry|
  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
@ -97,7 +97,33 @@ $lowercase_list.each_line do |entry|
  end
 end

-$grapheme_boundclass_list = File.read("GraphemeBreakProperty.txt")
+$icb_linker_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Linker.*?# Total code points:/m]
+$icb = Hash.new("UTF8PROC_INDIC_CONJUNCT_BREAK_NONE")
+$icb_linker_list.each_line do |entry|
+  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
+    $1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER" }
+  elsif entry =~ /^[0-9A-F]+/
+    $icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER"
+  end
+end
+$icb_consonant_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Consonant.*?# Total code points:/m]
+$icb_consonant_list.each_line do |entry|
+  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
+    $1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT" }
+  elsif entry =~ /^[0-9A-F]+/
+    $icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT"
+  end
+end
+$icb_extend_list = File.read("DerivedCoreProperties.txt", :encoding => 'utf-8')[/# Indic_Conjunct_Break=Extend.*?# Total code points:/m]
+$icb_extend_list.each_line do |entry|
+  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/
+    $1.hex.upto($2.hex) { |e2| $icb[e2] = "UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND" }
+  elsif entry =~ /^[0-9A-F]+/
+    $icb[$&.hex] = "UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND"
+  end
+end
+
+$grapheme_boundclass_list = File.read("GraphemeBreakProperty.txt", :encoding => 'utf-8')
 $grapheme_boundclass = Hash.new("UTF8PROC_BOUNDCLASS_OTHER")
 $grapheme_boundclass_list.each_line do |entry|
  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([A-Za-z_]+)/
@ -107,7 +133,7 @@ $grapheme_boundclass_list.each_line do |entry|
  end
 end

-$emoji_data_list = File.read("emoji-data.txt")
+$emoji_data_list = File.read("emoji-data.txt", :encoding => 'utf-8')
 $emoji_data_list.each_line do |entry|
  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*Extended_Pictographic\W/
    $1.hex.upto($2.hex) { |e2| $grapheme_boundclass[e2] = "UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC" }
@ -120,7 +146,7 @@ $emoji_data_list.each_line do |entry|
  end
 end

-$charwidth_list = File.read("CharWidths.txt")
+$charwidth_list = File.read("CharWidths.txt", :encoding => 'utf-8')
 $charwidth = Hash.new(0)
 $charwidth_list.each_line do |entry|
  if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*([0-9]+)/
@ -130,13 +156,13 @@ $charwidth_list.each_line do |entry|
  end
 end

-$exclusions = File.read("CompositionExclusions.txt")[/# \(1\) Script Specifics.*?# Total code points:/m]
+$exclusions = File.read("CompositionExclusions.txt", :encoding => 'utf-8')[/# \(1\) Script Specifics.*?# Total code points:/m]
 $exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex }

-$excl_version = File.read("CompositionExclusions.txt")[/# \(2\) Post Composition Version precomposed characters.*?# Total code points:/m]
+$excl_version = File.read("CompositionExclusions.txt", :encoding => 'utf-8')[/# \(2\) Post Composition Version precomposed characters.*?# Total code points:/m]
 $excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex }

-$case_folding_string = File.open("CaseFolding.txt", :encoding => 'utf-8').read
+$case_folding_string = File.read("CaseFolding.txt", :encoding => 'utf-8')
 $case_folding = {}
 $case_folding_string.chomp.split("\n").each do |line|
  next unless line =~ /([0-9A-F]+); [CF]; ([0-9A-F ]+);/i
@ -174,13 +200,13 @@ def cpary2c(array)
  return "UINT16_MAX" if array.nil? || array.length == 0
  lencode = array.length - 1 #no sequence has len 0, so we encode len 1 as 0, len 2 as 1, ...
  array = cpary2utf16encoded(array)
-  if lencode >= 7 #we have only 3 bits for the length (which is already cutting it close. might need to change it to 2 bits in future Unicode versions)
+  if lencode >= 3 #we have only 2 bits for the length
    array = [lencode] + array
-    lencode = 7
+    lencode = 3
  end
  idx = pushary(array)
-  raise "Array index out of bound" if idx > 0x1FFF
-  return "#{idx | (lencode << 13)}"
+  raise "Array index out of bound" if idx > 0x3FFF
+  return "#{idx | (lencode << 14)}"
 end
 def singlecpmap(cp)
  return "UINT16_MAX" if cp == nil
@ -249,7 +275,8 @@ class UnicodeChar
    "#{$ignorable.include?(code)}, " <<
    "#{%W[Zl Zp Cc Cf].include?(category) and not [0x200C, 0x200D].include?(category)}, " <<
    "#{$charwidth[code]}, 0, " <<
-    "#{$grapheme_boundclass[code]}},\n"
+    "#{$grapheme_boundclass[code]}, " <<
+    "#{$icb[code]}},\n"
  end
 end

@ -415,7 +442,7 @@ end
 $stdout << "};\n\n"

 $stdout << "static const utf8proc_property_t utf8proc_properties[] = {\n"
-$stdout << "  {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX,  false,false,false,false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER},\n"
+$stdout << "  {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX,  false,false,false,false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER, UTF8PROC_INDIC_CONJUNCT_BREAK_NONE},\n"
 properties.each { |line|
  $stdout << line
 }
--- a/3rdparty/utf8proc/libutf8proc.pc.cmakein
+++ b/3rdparty/utf8proc/libutf8proc.pc.cmakein
@ -0,0 +1,10 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=@CMAKE_INSTALL_FULL_BINDIR@
+libdir=@CMAKE_INSTALL_FULL_LIBDIR@
+includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
+
+Name: libutf8proc
+Description: UTF8 processing
+Version: @PROJECT_VERSION@
+Libs: -L${libdir} -lutf8proc
+Cflags: -I${includedir} -DUTF8PROC_EXPORTS
--- a/3rdparty/utf8proc/test/case.c
+++ b/3rdparty/utf8proc/test/case.c
@ -26,27 +26,27 @@ int main(int argc, char **argv)
               ++error;
          }

-          if (sizeof(wint_t) > 2 || c < (1<<16)) {
-               wint_t l0 = towlower(c), u0 = towupper(c);
+          if (sizeof(wint_t) > 2 || (c < (1<<16) && u < (1<<16) && l < (1<<16))) {
+               wint_t l0 = towlower((wint_t)c), u0 = towupper((wint_t)c);

               /* OS unicode tables may be out of date.  But if they
                  do have a lower/uppercase mapping, hopefully it
                  is correct? */
-               if (l0 != c && l0 != l) {
+               if (l0 != (wint_t)c && l0 != (wint_t)l) {
                    fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
                            l, c, l0);
                    ++error;
               }
-               else if (l0 != l) { /* often true for out-of-date OS unicode */
+               else if (l0 != (wint_t)l) { /* often true for out-of-date OS unicode */
                    ++better;
                    /* printf("%x != towlower(%x) == %x\n", l, c, l0); */
               }
-               if (u0 != c && u0 != u) {
+               if (u0 != (wint_t)c && u0 != (wint_t)u) {
                    fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
                            u, c, u0);
                    ++error;
               }
-               else if (u0 != u) { /* often true for out-of-date OS unicode */
+               else if (u0 != (wint_t)u) { /* often true for out-of-date OS unicode */
                    ++better;
                    /* printf("%x != towupper(%x) == %x\n", u, c, u0); */
               }
--- a/3rdparty/utf8proc/test/fuzz_main.c
+++ b/3rdparty/utf8proc/test/fuzz_main.c
@ -0,0 +1,54 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+/* Fuzz target entry point, works without libFuzzer */
+
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+
+int main(int argc, char **argv)
+{
+    FILE *f;
+    char *buf = NULL;
+    long siz_buf;
+
+    if(argc < 2)
+    {
+        fprintf(stderr, "no input file\n");
+        goto err;
+    }
+
+    f = fopen(argv[1], "rb");
+    if(f == NULL)
+    {
+        fprintf(stderr, "error opening input file %s\n", argv[1]);
+        goto err;
+    }
+
+    fseek(f, 0, SEEK_END);
+
+    siz_buf = ftell(f);
+    rewind(f);
+
+    if(siz_buf < 1) goto err;
+
+    buf = (char*)malloc(siz_buf);
+    if(buf == NULL)
+    {
+        fprintf(stderr, "malloc() failed\n");
+        goto err;
+    }
+
+    if(fread(buf, siz_buf, 1, f) != 1)
+    {
+        fprintf(stderr, "fread() failed\n");
+        goto err;
+    }
+
+    (void)LLVMFuzzerTestOneInput((uint8_t*)buf, siz_buf);
+
+err:
+    free(buf);
+
+    return 0;
+}
--- a/3rdparty/utf8proc/test/fuzzer.c
+++ b/3rdparty/utf8proc/test/fuzzer.c
@ -0,0 +1,84 @@
+#include <utf8proc.h>
+#include <string.h>
+
+int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
+{
+    if(size < 1) return 0;
+
+    /* Avoid timeout with long inputs */
+    if(size > (64 * 1024)) return 0;
+
+    if(data[size-1] != '\0') return 0;
+
+    const uint8_t* ptr = data;
+    utf8proc_int32_t c = 0, c_prev = 0, state = 0;
+    utf8proc_option_t options;
+    utf8proc_ssize_t ret, bytes = 0;
+    size_t len = strlen((const char*)data);
+    
+    while(bytes != len)
+    {
+        ret = utf8proc_iterate(ptr, -1, &c);
+        
+        if(ret < 0 || ret == 0) break;
+        
+        bytes += ret;
+        ptr += ret;
+
+        utf8proc_tolower(c);
+        utf8proc_toupper(c);
+        utf8proc_totitle(c);
+        utf8proc_islower(c);
+        utf8proc_isupper(c);
+        utf8proc_charwidth(c);
+        utf8proc_category(c);
+        utf8proc_category_string(c);
+        utf8proc_codepoint_valid(c);
+        
+        utf8proc_grapheme_break(c_prev, c);
+        utf8proc_grapheme_break_stateful(c_prev, c, &state);
+        
+        c_prev = c;
+    }
+    
+    utf8proc_int32_t *copy = size >= 4 ? NULL : malloc(size);
+    
+    if(copy)
+    {
+        size /= 4;
+        
+        options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS | UTF8PROC_NLF2PS;
+        memcpy(copy, data, size);
+        utf8proc_normalize_utf32(copy, size, options);
+        
+        options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS;
+        memcpy(copy, data, size);
+        utf8proc_normalize_utf32(copy, size, options);
+        
+        options = UTF8PROC_STRIPCC | UTF8PROC_NLF2PS;
+        memcpy(copy, data, size);
+        utf8proc_normalize_utf32(copy, size, options);
+        
+        options = UTF8PROC_STRIPCC;
+        memcpy(copy, data, size);
+        utf8proc_normalize_utf32(copy, size, options);
+
+        options = UTF8PROC_LUMP;
+        memcpy(copy, data, size);
+        utf8proc_normalize_utf32(copy, size, options);
+
+        options = 0;
+        memcpy(copy, data, size);
+        utf8proc_normalize_utf32(copy, size, options);
+        
+        free(copy);
+    }
+
+    free(utf8proc_NFD(data));
+    free(utf8proc_NFC(data));
+    free(utf8proc_NFKD(data));
+    free(utf8proc_NFKC(data));
+    free(utf8proc_NFKC_Casefold(data));
+
+    return 0;
+}
--- a/3rdparty/utf8proc/test/graphemetest.c
+++ b/3rdparty/utf8proc/test/graphemetest.c
@ -43,7 +43,7 @@ void checkline(const char *_buf, bool verbose) {
            else
                i++;
        }
-        glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
+        glen = utf8proc_map(utf8, (utf8proc_ssize_t)j, &g, UTF8PROC_CHARBOUND);
        if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
            /* the test file contains surrogate codepoints, which are only for UTF-16 */
            printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
@ -66,7 +66,7 @@ void checkline(const char *_buf, bool verbose) {
        utf8proc_bool expectbreak = false;
        do {
            utf8proc_int32_t codepoint;
-            i += utf8proc_iterate(src + i, si - i, &codepoint);
+            i += (size_t)utf8proc_iterate(src + i, (utf8proc_ssize_t)(si - i), &codepoint);
            check(codepoint >= 0, "invalid UTF-8 data");
            if (codepoint == 0x002F)
                expectbreak = true;
@ -110,6 +110,7 @@ int main(int argc, char **argv)
        utf8proc_uint8_t *g;
        glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
        check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
+        check(glen != 6, "mishandled u+ffff and u+fffe grapheme breaks");
        free(g);
    };

@ -118,6 +119,13 @@ int main(int argc, char **argv)
    checkline("/ 1f926 1f3fc 200d 2642 fe0f /", true); /* facepalm + pale skin + zwj + male sign + FE0F */
    checkline("/ 1f468 1f3fb 200d 1f91d 200d 1f468 1f3fd /", true); /* man face + pale skin + zwj + hand holding + zwj + man face + dark skin */

+    /* more GB9c tests */
+    checkline("/ 0915 0300 094d 0300 0924 / 0915 /", true);
+    checkline("/ 0915 0300 094d 0300 094d 0924 / 0915 /", true);
+    checkline("/ 0915 0300 0300 / 0924 / 0915 /", true);
+    checkline("/ 0915 0300 094d 0300 / 0078 /", true);
+    checkline("/ 0300 094d 0300 / 0924 / 0915 /", true);
+
    check(utf8proc_grapheme_break(0x03b1, 0x03b2), "failed 03b1 / 03b2 test");
    check(!utf8proc_grapheme_break(0x03b1, 0x0302), "failed 03b1 0302 test");

--- a/3rdparty/utf8proc/test/iterate.c
+++ b/3rdparty/utf8proc/test/iterate.c
@ -8,7 +8,7 @@ static  int     error;
 #define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
 #define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)

-static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line)
+static void testbytes(utf8proc_uint8_t *buf, utf8proc_ssize_t len, utf8proc_ssize_t retval, int line)
 {
    utf8proc_int32_t out[16];
    utf8proc_ssize_t ret;
@ -16,13 +16,13 @@ static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int
    /* Make a copy to ensure that memory is left uninitialized after "len"
     * bytes. This way, Valgrind can detect overreads.
     */
-    unsigned char tmp[16];
-    memcpy(tmp, buf, len);
+    utf8proc_uint8_t tmp[16];
+    memcpy(tmp, buf, (unsigned long int)len);

    tests++;
    if ((ret = utf8proc_iterate(tmp, len, out)) != retval) {
        fprintf(stderr, "Failed (%d):", line);
-        for (int i = 0; i < len ; i++) {
+        for (utf8proc_ssize_t i = 0; i < len ; i++) {
            fprintf(stderr, " 0x%02x", tmp[i]);
        }
        fprintf(stderr, " -> %zd\n", ret);
@ -32,8 +32,8 @@ static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int

 int main(int argc, char **argv)
 {
-    uint32_t byt;
-    unsigned char buf[16];
+    utf8proc_int32_t byt;
+    utf8proc_uint8_t buf[16];

    (void) argc; (void) argv; /* unused */

--- a/3rdparty/utf8proc/test/ossfuzz.sh
+++ b/3rdparty/utf8proc/test/ossfuzz.sh
@ -0,0 +1,13 @@
+#!/bin/bash -eu
+# This script is meant to be run by
+# https://github.com/google/oss-fuzz/blob/master/projects/utf8proc/Dockerfile
+
+mkdir build
+cd build
+cmake .. -DUTF8PROC_ENABLE_TESTING=ON -DLIB_FUZZING_ENGINE="$LIB_FUZZING_ENGINE"
+make -j$(nproc)
+
+cp $SRC/utf8proc/build/fuzzer $OUT/utf8proc_fuzzer
+
+find $SRC/utf8proc/test -name "*.txt" | \
+     xargs zip $OUT/utf8proc_fuzzer_seed_corpus.zip
--- a/3rdparty/utf8proc/test/printproperty.c
+++ b/3rdparty/utf8proc/test/printproperty.c
@ -8,12 +8,14 @@ int main(int argc, char **argv)

    for (i = 1; i < argc; ++i) {
        utf8proc_uint8_t cstr[16], *map;
-        unsigned int c;
+        utf8proc_uint32_t x;
+        utf8proc_int32_t c;
        if (!strcmp(argv[i], "-V")) {
            printf("utf8proc version %s\n", utf8proc_version());
            continue;
        }
-        check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
+        check(sscanf(argv[i],"%x", &x) == 1, "invalid hex input %s", argv[i]);
+        c = (utf8proc_int32_t)x;
        const utf8proc_property_t *p = utf8proc_get_property(c);

        if (utf8proc_codepoint_valid(c))
@ -37,6 +39,7 @@ int main(int argc, char **argv)
            "  ignorable = %d\n"
            "  control_boundary = %d\n"
            "  boundclass = %d\n"
+            "  indic_conjunct_break = %d\n"
            "  charwidth = %d\n",
        argv[i], (char*) cstr,
        utf8proc_category_string(c),
@ -53,6 +56,7 @@ int main(int argc, char **argv)
        p->ignorable,
        p->control_boundary,
        p->boundclass,
+        p->indic_conjunct_break,
        utf8proc_charwidth(c));
        free(map);
    }
--- a/3rdparty/utf8proc/test/tests.c
+++ b/3rdparty/utf8proc/test/tests.c
@ -29,7 +29,8 @@ size_t skipspaces(const unsigned char *buf, size_t i)
   in dest, returning the number of bytes read from buf */
 size_t encode(unsigned char *dest, const unsigned char *buf)
 {
-     size_t i = 0, j, d = 0;
+     size_t i = 0, j;
+     utf8proc_ssize_t d = 0;
     for (;;) {
          int c;
          i = skipspaces(buf, i);
--- a/3rdparty/utf8proc/utf8proc.c
+++ b/3rdparty/utf8proc/utf8proc.c
@ -1,6 +1,6 @@
 /* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
 /*
- *  Copyright (c) 2014-2019 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
+ *  Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
 *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
 *
 *  Permission is hereby granted, free of charge, to any person obtaining a
@ -101,7 +101,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
 }

 UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
-  return "13.0.0";
+  return "15.1.0";
 }

 UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
@ -125,7 +125,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
  const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
 ) {
-  utf8proc_uint32_t uc;
+  utf8proc_int32_t uc;
  const utf8proc_uint8_t *end;

  *dst = -1;
@ -137,7 +137,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
    return 1;
  }
  // Must be between 0xc2 and 0xf4 inclusive to be valid
-  if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
+  if ((utf8proc_uint32_t)(uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
  if (uc < 0xe0) {         // 2-byte sequence
     // Must have valid continuation character
     if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
@ -288,35 +288,54 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
    true; // GB999
 }

-static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
+static utf8proc_bool grapheme_break_extended(int lbc, int tbc, int licb, int ticb, utf8proc_int32_t *state)
 {
  if (state) {
-    int lbc_override;
-    if (*state == UTF8PROC_BOUNDCLASS_START)
-      *state = lbc_override = lbc;
-    else
-      lbc_override = *state;
-    utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
+    int state_bc, state_icb; /* boundclass and indic_conjunct_break state */
+    if (*state == 0) { /* state initialization */
+      state_bc = lbc;
+      state_icb = licb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT ? licb : UTF8PROC_INDIC_CONJUNCT_BREAK_NONE;
+    }
+    else { /* lbc and licb are already encoded in *state */
+      state_bc = *state & 0xff;  // 1st byte of state is bound class
+      state_icb = *state >> 8;   // 2nd byte of state is indic conjunct break
+    }
+
+    utf8proc_bool break_permitted = grapheme_break_simple(state_bc, tbc) &&
+       !(state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER
+        && ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT); // GB9c
+
+    // Special support for GB9c.  Don't break between two consonants
+    // separated 1+ linker characters and 0+ extend characters in any order.
+    // After a consonant, we enter LINKER state after at least one linker.
+    if (ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT
+        || state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT
+        || state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND)
+      state_icb = ticb;
+    else if (state_icb == UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER)
+      state_icb = ticb == UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND ?
+                  UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER : ticb;

    // Special support for GB 12/13 made possible by GB999. After two RI
    // class codepoints we want to force a break. Do this by resetting the
    // second RI's bound class to UTF8PROC_BOUNDCLASS_OTHER, to force a break
    // after that character according to GB999 (unless of course such a break is
    // forbidden by a different rule such as GB9).
-    if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
-      *state = UTF8PROC_BOUNDCLASS_OTHER;
+    if (state_bc == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
+      state_bc = UTF8PROC_BOUNDCLASS_OTHER;
    // Special support for GB11 (emoji extend* zwj / emoji)
-    else if (*state == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
+    else if (state_bc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
      if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
-        *state = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
+        state_bc = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
      else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
-        *state = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
+        state_bc = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
      else
-        *state = tbc;
+        state_bc = tbc;
    }
    else
-      *state = tbc;
+      state_bc = tbc;

+    *state = state_bc + (state_icb << 8);
    return break_permitted;
  }
  else
@ -326,8 +345,12 @@ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t
 UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
    utf8proc_int32_t c1, utf8proc_int32_t c2, utf8proc_int32_t *state) {

-  return grapheme_break_extended(utf8proc_get_property(c1)->boundclass,
-                                 utf8proc_get_property(c2)->boundclass,
+  const utf8proc_property_t *p1 = utf8proc_get_property(c1);
+  const utf8proc_property_t *p2 = utf8proc_get_property(c2);
+  return grapheme_break_extended(p1->boundclass,
+                                 p2->boundclass,
+                                 p1->indic_conjunct_break,
+                                 p2->indic_conjunct_break,
                                 state);
 }

@ -356,9 +379,9 @@ static utf8proc_int32_t seqindex_decode_index(const utf8proc_uint32_t seqindex)

 static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
  utf8proc_ssize_t written = 0;
-  const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
-  int len = seqindex >> 13;
-  if (len >= 7) {
+  const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x3FFF];
+  int len = seqindex >> 14;
+  if (len >= 3) {
    len = *entry;
    entry++;
  }
@ -376,19 +399,19 @@ static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqinde
 UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
 {
  utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
-  return cl != UINT16_MAX ? seqindex_decode_index(cl) : c;
+  return cl != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cl) : c;
 }

 UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
 {
  utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
-  return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
+  return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
 }

 UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
 {
  utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
-  return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
+  return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
 }

 UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c)
@ -410,7 +433,7 @@ UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
 }

 UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
-  return utf8proc_get_property(c)->category;
+  return (utf8proc_category_t) utf8proc_get_property(c)->category;
 }

 UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
@ -420,7 +443,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {

 #define utf8proc_decompose_lump(replacement_uc) \
  return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
-  options & ~UTF8PROC_LUMP, last_boundclass)
+  options & ~(unsigned int)UTF8PROC_LUMP, last_boundclass)

 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
  const utf8proc_property_t *property;
@ -498,8 +521,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
  }
  if (options & UTF8PROC_CHARBOUND) {
    utf8proc_bool boundary;
-    int tbc = property->boundclass;
-    boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
+    boundary = grapheme_break_extended(0, property->boundclass, 0, property->indic_conjunct_break,
+                                       last_boundclass);
    if (boundary) {
      if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
      if (bufsize >= 2) dst[1] = uc;
@ -735,7 +758,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
  *dstptr = NULL;
  result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
  if (result < 0) return result;
-  buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
+  buffer = (utf8proc_int32_t *) malloc(((utf8proc_size_t)result) * sizeof(utf8proc_int32_t) + 1);
  if (!buffer) return UTF8PROC_ERROR_NOMEM;
  result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
  if (result < 0) {
--- a/3rdparty/utf8proc/utf8proc.h
+++ b/3rdparty/utf8proc/utf8proc.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2019 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
+ * Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
 * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -71,9 +71,9 @@
 /** The MAJOR version number (increased when backwards API compatibility is broken). */
 #define UTF8PROC_VERSION_MAJOR 2
 /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
-#define UTF8PROC_VERSION_MINOR 6
+#define UTF8PROC_VERSION_MINOR 9
 /** The PATCH version (increased for fixes that do not change the API). */
-#define UTF8PROC_VERSION_PATCH 1
+#define UTF8PROC_VERSION_PATCH 0
 /** @} */

 #include <stdlib.h>
@ -273,7 +273,8 @@ typedef struct utf8proc_property_struct {
   * Boundclass.
   * @see utf8proc_boundclass_t.
   */
-  unsigned boundclass:8;
+  unsigned boundclass:6;
+  unsigned indic_conjunct_break:2;
 } utf8proc_property_t;

 /** Unicode categories. */
@ -388,6 +389,14 @@ typedef enum {
  UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */
 } utf8proc_boundclass_t;

+/** Indic_Conjunct_Break property. (TR44) */
+typedef enum {
+  UTF8PROC_INDIC_CONJUNCT_BREAK_NONE = 0,
+  UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER = 1,
+  UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT = 2,
+  UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND = 3,
+} utf8proc_indic_conjunct_break_t;
+
 /**
 * Function pointer type passed to @ref utf8proc_map_custom and
 * @ref utf8proc_decompose_custom, which is used to specify a user-defined
@ -481,8 +490,9 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
 * - @ref UTF8PROC_STRIPNA   - remove unassigned codepoints
 * @param last_boundclass
 * Pointer to an integer variable containing
- * the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND
- * option is used.  Otherwise, this parameter is ignored.
+ * the previous codepoint's (boundclass + indic_conjunct_break << 1) if the @ref UTF8PROC_CHARBOUND
+ * option is used.  If the string is being processed in order, this can be initialized to 0 for
+ * the beginning of the string, and is thereafter updated automatically.  Otherwise, this parameter is ignored.
 *
 * @return
 * In case of success, the number of codepoints written is returned; in case
--- a/3rdparty/utf8proc/utf8proc_data.c
+++ b/3rdparty/utf8proc/utf8proc_data.c
--- a/2
+++ b/2
@ -197,7 +197,7 @@ tap-windows.h
 Copyright (C) 2002-2014 OpenVPN Technologies, Inc.

 utf8proc
-Copyright (c) 2014-2015 Steven G. Johnson, Jiahao Chen, Tony Kelman,
+Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Tony Kelman,
 Jonas Fonseca, and other contributors

 Permission is hereby granted, free of charge, to any person obtaining a
--- a/scripts/src/3rdparty.lua
+++ b/scripts/src/3rdparty.lua
@ -1747,11 +1747,6 @@ project "utf8proc"
 			"verbose=-1",
 		}

-	configuration { "gmake or ninja" }
-		buildoptions_c {
-			"-Wno-strict-prototypes",
-		}
-
 	configuration { }

 	files {