% \iffalse meta-comment
%
% Copyright (C) 2026 Alan J. Cain
%
% This file may be distributed and/or modified under the conditions of the LaTeX Project Public License, either version
% 1.3c of this license or (at your option) any later version. The latest version of this license is in:
%
% http://www.latex-project.org/lppl.txt
%
% and version 1.3c or later is part of all distributions of LaTeX version 2008-05-04 or later.
%
% \fi
%
% \iffalse
%<*driver>
\PassOptionsToPackage{inline}{enumitem}
\documentclass{l3doc}

\usepackage{polyglossia}
\setmainlanguage[variant=british]{english}


\makeatletter
\ExplSyntaxOn

\cs_gset:Npn \l@subsection { \@dottedtocline{2}{2.5em}{2.8em} }  % #2 = 1.5em
\cs_gset:Npn \l@subsubsection { \@dottedtocline{3}{5.3em}{3.5em} }  % #2 = 1.5em
\cs_gset:Npn \l@paragraph { \@dottedtocline{4}{8.8em}{3.2em} }  % #2 = 1.5em

\ExplSyntaxOff
\makeatother


\usepackage{xcolor}

\definecolor{linkcolor}{rgb}{0.0,0.4,0.7}
\colorlet{citecolor}{linkcolor}
\colorlet{urlcolor}{linkcolor}

\hypersetup{
  linkcolor=linkcolor,%
  citecolor=citecolor,%
  urlcolor=urlcolor,%
}
\usepackage{xurl}
\renewcommand*\UrlBigBreaks{}

\newcommand*\fullref[2]{%
  \hyperref[#2]{#1\penalty 200\ \ref*{#2}}%
}
\newcommand*\fullpageref[1]{%
  \hyperref[#1]{page\penalty 200\ \pageref*{#1}}%
}


\setcounter{tocdepth}{7}
\numberwithin{figure}{section}



\usepackage{lua-list-hyphen}

\usepackage{lipsum}
\usepackage{tikz}

\newcommand*\key[1]{\texttt{#1}}
\newcommand*\val[1]{\texttt{#1}}
\newcommand*\keyvalue[2]{\texttt{#1=#2}}

\newlist{vallist}{description}{1}
\setlist[vallist]{
  leftmargin=3em,
  style=unboxed,
  labelsep=1em,
  font=\descriptionitemcolon,
  nosep,
}
\newcommand*{\descriptionitemcolon}[1]{\kern 1em #1:}

\NewDocumentCommand{\default}{ m }{(\textit{Default:}\nobreakspace #1)}


\newcommand*\luafunc[1]{\texttt{#1}}
\newcommand*\luavar[1]{\texttt{#1}}



\newcommand*\prefixedurl[1]{\textsc{url}:~\url{#1}}



\begin{document}

\DocInput{lua-list-hyphen.dtx}

\PrintIndex

\end{document}
%</driver>
% \fi
%
%
%
% \GetFileInfo{lua-list-hyphen.sty}
%
%
%
% \title{^^A
% \pkg{lua-list-hyphen} ^^A
%   --- Per-language listing of hyphenated words for Lua\LaTeX^^A
%   \footnote{This document describes \fileversion, last revised \filedate.}^^A
% }
%
% \author{^^A
%  Alan J. Cain\footnote{\texttt{a.j.cain (AT) gmail.com}}^^A
% }
%
% \date{Released \filedate}
%
% \maketitle
%
%
%
% \begin{abstract}
%   This Lua\LaTeX\ package writes each word that has been hyphenated across lines to a file, using a different file for
%   each language, for subsequent external checking.
% \end{abstract}
%
%
%
% \tableofcontents
%
%
%
% \begin{documentation}
%
%
%
% \section{Introduction}
%
% \TeX's algorithm for finding points where a word can be hyphenated is good, but not perfect.\footnote{For a
% description of the algorithm and its limitations, see Knuth's account in Appendix~H of \textit{The \TeX book}
% (Addison-Wesley, 2021. ISBN:~\texttt{978-0-201-13447-6})} The present author writes in British English, where the
% valid division points can depend on both the pronunciation of a word and its internal structure (and hence its
% etymology). Currently, \TeX's pattern-based approach produces \textit{bio-lo-gic}, \textit{bio-logy},
% \textit{bio-lo-gist}, rather than the standard \textit{bio-logic}, \textit{biol-ogy},
% \textit{biolo-gist}.\footnote{See the \textit{New Oxford Spelling Dictionary}, which is the authority for word
% divisions in British English (Oxford University Press, 2005. ISBN:~\texttt{978-0-19-860881-3}).} To deal with such
% cases, at least a substantially larger number of patterns would be required than are available at present. There are
% also various words where the valid division points in British English cannot be deduced from their spelling alone: for
% instance, the verbs \textit{at-trib-ute}, \textit{pre-sent}, \textit{pro-duce}, \textit{re-cord} have different
% division points from the orthographically identical nouns \textit{at-tri-bute}, \textit{pres-ent}, \textit{prod-uce},
% \textit{rec-ord}. For another example, compare \textit{cur-ric-ulum vitae} and \textit{school cur-ricu-lum}.
%
% Easy checking of the chosen hyphenations is desirable. With Lua\TeX, it is possible to extract the hyphenated words.
% The Lua\LaTeX\ package \pkg{lua-check-hyphen} offers this facility. It checks hyphenated words against a whitelist,
% visually flags unknown hyphenations, and writes unknown hyphenations to a file. But it was first written in 2012, when
% Lua\TeX\ was at an earlier stage of development, and so it has certain problems, such as with words containing
% ligatures. It also lacks multi-language support.
%
% This Lua\LaTeX\ package, \pkg{lua-list-hyphen}, uses some ideas from \pkg{lua-check-hyphen} but was written from
% scratch to work with a modern Lua\TeX. It simply writes hyphenated words from each language to a separate file, so
% that they can be checked (manually or by an external program).
%
% [The author has written a simple Python application \texttt{hyphenassist}\footnote{\textsc{url}:
% \url{https://codeberg.org/ajcain/hyphenassist}.} that checks the listed hyphenations against a dictionary of valid
% divisions and allows the user to quickly choose to add entries to the division dictionary, add hyphenation exceptions,
% or ignore particular hyphenations. He has used this program in conjunction with code incorporated into this package to
% check hyphenations in his own books.\footnote{In particular, \textit{Form \& Number: A History of Mathematical
% Beauty}. \textsc{url}: \url{https://archive.org/details/cain_formandnumber_ebook_large}.}]
%
%
%
% \paragraph*{Licence.} \noindent\pkg{lua-list-hyphen} is released under the \LaTeX\ Project Public Licence v1.3c or
% later.\footnote{\textsc{url}: \url{https://www.latex-project.org/lppl.txt}}
%
%
%
% \paragraph*{Acknowledgements.} The author thanks Keno Wehr for corrections and comments on the documentation.
%
%
%
% \paragraph*{Feature requests and bug reports}
%
% The development code and issue tracker are hosted at Codeberg.\footnote{\textsc{url}:
% \url{https://codeberg.org/ajcain/lua-list-hyphen}}
%
%
%
% \section{Requirements}
%
% \pkg{lua-list-hyphen} requires
% \begin{enumerate}[label={(\arabic*)}]
%   \item Lua\LaTeX,
%   \item a recent \LaTeX\ kernel with \pkg{expl3} support (any kernel version since 2020-02-02 should suffice).
% \end{enumerate}
% It does not depend on any other packages, but will interface with \pkg{babel} or \pkg{polyglossia} (if one of them is
% loaded) to determine language names.
%
%
%
% \section{Installation}
%
% To install \pkg{lua-list-hyphen} manually, run \texttt{luatex lua-list-hyphen.ins} and copy
% \texttt{lua-list-hyphen.sty} and \texttt{lua-list-hyphen.lua} to somewhere Lua\LaTeX\ can find them.
%
%
%
% \section{Getting started}
%
% Simply load the package; the hyphenated words are by default written to the file
% \cs{jobname}\file{-}\meta{lang-id}\file{.hyph}, without being sorted or having duplicates removed. The \meta{lang-id}
% is either a Lua\TeX\ numerical language~ID, or a \pkg{babel} or \pkg{polyglossia} name of the language, if one of
% these packages is in use. The prefix \cs{jobname}\file{-} and the extension \file{.hyph} can be customized; see
% \fullref{Section}{sec:options}.
%
%
%
% \section{Package options}
% \label{sec:options}
%
% \DescribeOption{verbose} The boolean option \key{verbose} controls how much information is written to the file about
% each hyphenated word. When \val{true}, for each hyphenated word, both the undivided original and the divided word are
% written out, as well as the page number on which the hyphenated word appears (or, more precisely, begins) and the
% undivided word in context (as specified by the \key{context} keys; see below). When \val{false}, only the hyphenated
% word is written. \default{\val{false}}
%
% \DescribeOption{context}
% \DescribeOption{context-before}
% \DescribeOption{context-after}
% Integer options controlling how many words before (\key{context-before}) and after (\key{context-after}) the
% hyphenated word are written as context when \keyvalue{verbose}{true}. The key \key{context} is simply a shortcut for
% setting \key{control-before} and \key{control-after} to the same value. \default{\val{2}}
%
% \medskip
% \DescribeOption{unique}
% The option \key{unique} controls removal of duplicates from the list of hyphenated words written out. It can be be
% set to one of the following three values:
% \begin{vallist}
%   \item[\val{none}] Duplicate hyphenations are not removed.
%   \item[\val{case}] Hyphenations that are duplicate (case-sensitively) are removed. In this case, the hyphenations
%     \texttt{geo-metry} and \texttt{Geo-metry} are considered to be distinct.
%   \item[\val{nocase}] Hyphenations that are duplicate (case-insensitively) are removed. In this case, the hyphenations
%     \texttt{geo-metry} and \texttt{Geo-metry} are considered to be duplicates. The case of each listed hyphenation
%     will be that of the first appearance of that hyphenation.
% \end{vallist}
% Note that removal of duplicates is unaffected by the page number or context that is written out when
% \keyvalue{verbose}{true}. \default{\val{none}}
%
% \DescribeOption{sort}
% The option \key{sort} controls sorting of the list of hyphenated words. It can be be
% set to one of the following three values:
% \begin{vallist}
%   \item[\val{none}] Hyphenations appear in the same order as the occur in the document, or, if duplicates are removed,
%     in the order of first appearance in the document.
%     \item[\val{case}] Hyphenations are sorted case-sensitively. In this case, \texttt{Geo-metry} precedes
%     \texttt{geo-meter}.
%   \item[\val{nocase}] Hyphenations are sorted case-insensitively. In this case, \texttt{geo-meter} precedes
%     \texttt{Geo-metry}.
% \end{vallist}
% \default{\val{none}}
%
% \DescribeOption{include-non-output}
% Boolean option determining whether hyphenated words that are never written to the page are listed. (For instance,
% a hyphenated word might occur in text that a package temporarily typesets into a box, measures, and then discards.)
% \default{\val{false}}
%
% \medskip
% The two options \key{prefix} and \key{extension} specify the files to which hyphenations are written. Between the
% prefix and the extension is either a Lua\TeX\ numerical language~ID, or a \pkg{babel} or \pkg{polyglossia}
% name of the language, if one of these packages is in use.
%
% \DescribeOption{prefix}
% The \key{prefix} is the part of the file name to which the list of hyphenated words is written, before the
% language~ID.
% \default{\cs{jobname}\file{-} (note the hyphen).}
%
% \DescribeOption{extension}
% The extension of the file (including the \file{.}) to which the list of hyphenated words for each language is written.
% \default{\file{.hyph}}
%
% \medskip
% \DescribeOption{debug}
% The boolean option \key{debug} controls whether debugging information is written to the terminal.
% \default{\val{false}}
%
%
%
% \section{Output format}
%
% Each output file begins with a header (each line of which begins with a ‘comment’ symbol \texttt{\%}) that includes
% information about the language and the package options that were used. Each line of the remainder of the file
% describes one hyphenation.
%
% When \keyvalue{verbose}{false}, the line contains only the hyphenated word.
%
% ^^A Use \symbol{34} in this paragraph since " is an active character.
% When \keyvalue{verbose}{true}, the line contains the original undivided word, the hyphenated word, the page number
% where the hyphenated appears (or, to be precise, begins), and the context in which the hyphenated word appears. Each
% part of the output is padded so that they various lines align. The original and undivided words are separated by the
% ASCII ‘arrow’ \texttt{->}; the page number is prefixed by \texttt{p.}; and the context is surrounded by (straight)
% quotation marks \texttt{\symbol{34}}\kern .5em\texttt{\symbol{34}}. If the hyphenation was never written to the page,
% \texttt{p.}\meta{page} is replaced by \texttt{<none>}. (This can only happen with
% \keyvalue{include-non-output}{true}.)
%
%
%
% \section{Usage notes}
%
% \subsection{Languages}
%
% To determine the language of a word, \pkg{lua-list-hyphen} looks at what language is applied at the first possible
% hyphenation point, first considering the part of the word before it, then the part after it. In the (presumably rare)
% case of a ‘mixed-language’ word like ‘near-Zugzwang’ being specified (using, for example, \pkg{babel}) with
% \texttt{near-\cs{foreignlanguage}\{german\}\{Zugzwang\}}, it would be assigned to the language in which \hbox{‘near-’}
% is set.
%
% Duplicates are removed within each language. If the same hyphenation occurs in two different languages, it will appear
% in both files, regardless of the value of \key{unique}.
%
%
%
% \subsection{Limitations}
%
% \pkg{lua-list-hyphen} uses Lua\TeX's built-in Unicode functions for pattern matching and converting between upper and
% lower case, which are based on the \texttt{slnunicode} library. This library has not been updated for some time and is
% based on an out-of-date version of the Unicode standard. Thus there may be problems with languages added to Unicode
% more recently. Hyphenated words from such languages should still be listed, but may contain extraneous characters
% (such as adjacent punctuation) and may not be sorted correctly. Users may prefer to leave sorting and removal of
% duplicates to an external program that adheres to the current Unicode standard.
%
%
%
% \end{documentation}
%
%
%
% \clearpage
% \begin{implementation}
%
%
%
% \section{Implementation (\LaTeX\ package)}
%
%    \begin{macrocode}
%<*package>
%<@@=lualisthyphen>
%    \end{macrocode}
%
%
%
% \subsection{Initial set-up}
%
% Package identification/version information.
%    \begin{macrocode}
\NeedsTeXFormat{LaTeX2e}[2020-02-02]
\ProvidesExplPackage{lua-list-hyphen}{2026-05-02}{0.3.10}
  {Listing hyphenated words for LuaLaTeX}
%    \end{macrocode}
% Check that Lua\TeX\ is in use.
%    \begin{macrocode}
\sys_if_engine_luatex:F
  {
    \msg_new:nnn{ lua-list-hyphen }{ lualatex_required }
      { LuaLaTeX~required.~Package~loading~will~abort. }
    \msg_critical:nn{ lua-list-hyphen }{ lualatex_required }
  }
%    \end{macrocode}
%
%
%
% \subsection{Options}
%
% \begin{macro}{\l_@@_verbose_bool}
%   Boolean option to indicate whether lists of hyphenations should be written verbosely.
%    \begin{macrocode}
\keys_define:nn { lua-list-hyphen }{
  verbose .bool_set:N = \l_@@_verbose_bool,
}
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}{\l_@@_context_before_int,\l_@@_context_after_int}
%   Integer options to determine the number of words before and after a hyphenation shown as context in verbose output.
%    \begin{macrocode}
\keys_define:nn { lua-list-hyphen }{
  context-before .int_set:N = \l_@@_context_before_int,
  context-before .initial:n = { 2 },
  context-after .int_set:N = \l_@@_context_after_int,
  context-after .initial:n = { 2 },
  context .code:n = {
    \keys_set:nn{ lua-list-hyphen }{
      context-before=#1,
      context-after=#1,
    }
  },

}
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}{\l_@@_unique_int}
%   Choice option to indicate whether lists of hyphenations should have duplicates removed, case-sensitively or
%   case-insensitively.
%    \begin{macrocode}
\int_new:N\l_@@_unique_int
\keys_define:nn { lua-list-hyphen }{
  unique .choices:nn = { none, case, nocase }{
    \int_set:Nn\l_@@_unique_int{ \l_keys_choice_int - 1 }
  },
}
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}{\l_@@_sort_int}
%   Choice option to indicate whether lists of hyphenations should be sorted, case-sensitively or case-insensitively.
%    \begin{macrocode}
\int_new:N\l_@@_sort_int
\keys_define:nn { lua-list-hyphen }{
  sort .choices:nn = { none, case, nocase }{
    \int_set:Nn\l_@@_sort_int{ \l_keys_choice_int - 1 }
  },
}
%    \end{macrocode}
% \end{macro}
%
%
%
%
% \begin{macro}{\l_@@_include_non_output_bool}
%   Boolean option to indicate whether lists of hyphenations should include those that are never output to the page.
%    \begin{macrocode}
\keys_define:nn { lua-list-hyphen }{
  include-non-output .bool_set:N = \l_@@_include_non_output_bool,
}
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}{\l_@@_file_prefix_str}
%   String option for the prefix of files to which hyphenations are writtten.
%    \begin{macrocode}
\keys_define:nn { lua-list-hyphen }{
  prefix .str_set:N = \l_@@_file_prefix_str,
  prefix .initial:e = { \c_sys_jobname_str- },
}
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}{\l_@@_file_extension_str}
%   String option for the extension of files to which hyphenations are writtten.
%    \begin{macrocode}
\keys_define:nn { lua-list-hyphen }{
  extension .str_set:N = \l_@@_file_extension_str,
  extension .initial:n = { .hyph },
}
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}{
%   \l_@@_debug_int
% }
%   Option to specify whether debug information is written to the terminal. Not intended for end users.
%    \begin{macrocode}
\int_new:N\l_@@_debug_int
\keys_define:nn { lua-list-hyphen }{
  debug .code:n = {\int_set_eq:NN\l_@@_debug_int\c_one_int}
}
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{Processing package options}
%
% Process package options.
%    \begin{macrocode}
\ProcessKeyOptions [ lua-list-hyphen ]
%    \end{macrocode}
%
%
%
% Convert boolean options to integers (which can be accessed from Lua).
%    \begin{macrocode}
\int_new:N\l_@@_verbose_int
\bool_if:NT\l_@@_verbose_bool
  { \int_set_eq:NN\l_@@_verbose_int\c_one_int }
\int_new:N\l_@@_include_non_output_int
\bool_if:NT\l_@@_include_non_output_bool
  { \int_set_eq:NN\l_@@_include_non_output_int\c_one_int }
%    \end{macrocode}
%
%
%
% \subsection{Lua backend}
%
% Load the Lua backend.
%    \begin{macrocode}
\lua_now:n{
  lualisthyphen = require('lua-list-hyphen')
}
%    \end{macrocode}
%
%
%
% \subsection{Saving \pkg{babel} language names}
%
% At \texttt{enddocument/afterlastpage}, if possible save \pkg{babel}'s language names. (\pkg{polyglossia}'s names can
% be found directly from Lua.)
%    \begin{macrocode}
\hook_gput_code:nnn{ enddocument/afterlastpage }{ lua-list-hyphen } {
  \@@_babel_save_language_names:
}
%    \end{macrocode}
%
% \begin{macro}{\@@_babel_save_language_names:}
%   If \pkg{babel} is in use, get language names from \cs{bbl@languages}.
%    \begin{macrocode}
\cs_new:Npn \@@_babel_save_language_names:
  {
    \cs_if_exist:NT\bbl@languages
      {
%    \end{macrocode}
%   Iterate through \cs{bbl@languages} to get language names. Items stored in this macro are quadruples prefixed with
%   \cs{bbl@elt}, so locally redefine this latter macro to an auxiliary function that passes language ID/name pairs to
%   the Lua backend.
%    \begin{macrocode}
        \group_begin:
        \cs_set_eq:NN
          \bbl@elt
          \@@_babel_save_language_names_elt:nnnn
        \bbl@languages
        \group_end:
      }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_babel_save_language_names_elt:nnnn}
%   Auxiliary function that takes a quadruple stored in \cs{bbl@languages} and passes language ID/name pairs to the Lua
%   backend.
%    \begin{macrocode}
\cs_new:Npn \@@_babel_save_language_names_elt:nnnn #1#2#3#4
  {
    \lua_now:n{
      lualisthyphen.babel_save_language_name(#2,'#1')
    }
  }
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{Processing and writing hyphenation lists}
%
% At \texttt{enddocument/info}, process and output the hyphenations that have been found.
%    \begin{macrocode}
\hook_gput_code:nnn{ enddocument/info }{ lua-list-hyphen } {
  \@@_process_write_hyphenation_lists:ee
    {\str_use:N\l_@@_file_prefix_str}
    {\str_use:N\l_@@_file_extension_str}
}
%    \end{macrocode}
%
%
%
% \begin{macro}{\@@_process_write_hyphenation_lists:nn}
%   Sort the list of hyphenations into separate lists for each language, sort and deduplicate them as required, and
%   write them to files with prefix given in the first parameter and suffix in the second.
%    \begin{macrocode}
\cs_new:Npn \@@_process_write_hyphenation_lists:nn #1#2
  {
    \lua_now:e{
      lualisthyphen.process_write_hyphenation_lists(
        '\luaescapestring{#1}',
        '\luaescapestring{#2}'
      )
    }
  }
\cs_generate_variant:Nn
  \@@_process_write_hyphenation_lists:nn
  { ee }
%    \end{macrocode}
% \end{macro}
%
%
%
%    \begin{macrocode}
%</package>
%    \end{macrocode}
%
%
%
% \section{Implementation (Lua backend)}
%
%    \begin{macrocode}
%<*lua>
%    \end{macrocode}
%
%
%
% \subsection{Debugging function}
%
% \begin{macro}[int]{debug}
%   Debugging function. Defined according to the package option \key{debug} to either do nothing or write debugging
%   information.
%    \begin{macrocode}
local debug

if tex.count['l__lualisthyphen_debug_int'] == 0 then
  debug = function(s)
  end
else
  debug = function(s)
    print('lua-list-hyphen DEBUG: ' .. s)
  end
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{Table key constants}
%
% Keys for tables containing hyphenatable/hyphenated word data.
%    \begin{macrocode}
local KEY_TYPE = 'type'
local KEY_WORD = 'word'
local KEY_LANG = 'lang'
local KEY_DIVISION = 'division'
local KEY_INDEX = 'index'
local KEY_CONTEXT = 'context'
local KEY_PAGE = 'page'
%    \end{macrocode}
%
%
%
% \subsection{Segment type}
%
% Constants for types of segments found while scanning hlist before linebreaking.
%    \begin{macrocode}
local SEGMENT_WORD = 0
local SEGMENT_SPACE = 1
local SEGMENT_MATH = 2
%    \end{macrocode}
%
%
%
% \subsection{Node ID and subtype constants}
%
% Define constants for the node IDs that need to be recognized.
%    \begin{macrocode}
local NODE_ID_HLIST = node.id('hlist')
local NODE_ID_DISC = node.id('disc')
local NODE_ID_GLUE = node.id('glue')
local NODE_ID_KERN = node.id('kern')
local NODE_ID_MARGIN_KERN = node.id('margin_kern')
local NODE_ID_GLYPH = node.id('glyph')
local NODE_ID_MATH = node.id('math')
%    \end{macrocode}
% Define constants for the kern node subtypes that have to be recognized. (There seems to be no automatic way to get
% the numerical value from the subtype text other than searching the \luavar{node.subtype(\meta{node type})} tables.)
%    \begin{macrocode}
local NODE_KERN_SUBTYPE_FONTKERN
local NODE_KERN_SUBTYPE_USERKERN
for k,v in pairs(node.subtypes('kern')) do
  if v == 'fontkern' then
    NODE_KERN_SUBTYPE_FONTKERN = k
  elseif v == 'userkern' then
    NODE_KERN_SUBTYPE_USERKERN = k
  end
end
%    \end{macrocode}
% Define constants for the math node subtypes.
%    \begin{macrocode}
local NODE_MATH_SUBTYPE_BEGIN
local NODE_MATH_SUBTYPE_END
for k,v in pairs(node.subtypes('math')) do
  if v == 'beginmath' then
    NODE_MATH_SUBTYPE_BEGIN = k
  elseif v == 'endmath' then
    NODE_MATH_SUBTYPE_END = k
  end
end
%    \end{macrocode}
%
%
%
% \subsection{Output constants}
%
% Constants for output.
%    \begin{macrocode}
local STR_MATH = '[MATH]'
local STR_SPACE = ' '
local STR_SPACE_TWO = '  '
local STR_ARROW = ' -> '
local STR_PAGE_PREFIX = 'p.'
local STR_PAGE_NONE = '<none>'
local STR_QUOTE_OPEN = '"'
local STR_QUOTE_CLOSE = '"'
%    \end{macrocode}
%
%
%
% \subsection{Utility functions}
%
% \begin{macro}[int]{list_filter}
%   Take a list \luavar{t} and remove from it any elements for which the function
%   \luavar{f} does not return true. (The index \luavar{j} is always the destination index to which a ‘keep’ element
%   is moved.)\footnote{Code adapted from \url{https://stackoverflow.com/a/53038524}.}
%    \begin{macrocode}
local function list_filter(t, f)
  local j = 1
  local n = #t

  for i=1,n do
    if (f(t[i])) then
      if (i ~= j) then
        t[j] = t[i]
        t[i] = nil
      end
      j = j + 1
    else
      t[i] = nil
    end
  end

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{list_uniq}
%   Take a list \luavar{t} and remove from it adjacent elements for which the function \luavar{f} returns true. (The
%   index \luavar{j} is always the last ‘kept’ element.)
%    \begin{macrocode}
local function list_uniq(t, f)
  local j = 1
  local n = #t

  for i=2,n do
    if (f(t[i],t[j])) then
      t[i] = nil
    else
      j = i
    end
  end

  list_filter(
    t,
    function(a) return a end
  )
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{Getting text from nodes}
%
% Getting the components of the ligatures that have Unicode code points can be problematic, at least for some fonts,
% so define a lookup table for these cases.
%    \begin{macrocode}
local LIGATURE_TEXT = {
  [0xfb00] = 'ff',
  [0xfb01] = 'fi',
  [0xfb02] = 'fl',
  [0xfb03] = 'ffi',
  [0xfb04] = 'ffl',
}
%    \end{macrocode}
%
%
%
% Cache to save table lookups when extracting text.
%    \begin{macrocode}
local font_characters = {}
%    \end{macrocode}
%
%
%
% Extracting text from nodes uses two functions that call each other, so the names have to be defined ahead of time.
%    \begin{macrocode}
local get_node_text
local get_nodelist_text
%    \end{macrocode}
%
%
%
% \begin{macro}[int]{get_node_text}
%   Return the text content of a glyph node (which might be a normal glyph, a ligature, etc.).
%    \begin{macrocode}
get_node_text = function(n)

  if n.id == NODE_ID_GLYPH then

    local ligature_text = LIGATURE_TEXT[n.char]
    if ligature_text ~= nil then
      return ligature_text
    elseif n.components then
      return get_nodelist_text(n.components)
    else
      -- See [https://tug.org/pipermail/luatex/2018-March/006786.html]
      local characters = font_characters[n.font]
      if not characters then
        characters = fonts.hashes.identifiers[n.font].characters
        font_characters[n.font] = characters
      end
      local u = characters[n.char].tounicode
      return utf8.char(tonumber(u,16))
    end

  elseif n.id == NODE_ID_DISC then

    if n.replace then
      return get_nodelist_text(n.replace)
    else
      return ''
    end

  else
    return ''
  end

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{get_nodelist_text}
%   Return the text content of the glyph nodes in the list starting at \luavar{head} up to and including the node
%   \luavar{last}, or up to the end of the list if \luavar{last} is not specified.
%    \begin{macrocode}
get_nodelist_text = function (head,last)

  local text = ''

  for item in node.traverse(head) do

    text = text .. get_node_text(item)

    if item == last then
      break
    end
  end

  return text

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{is_possible_word_node}
%  Return boolean indicating if node \luavar{n} could be part of a word. Assume that \luavar{glyph}, \luavar{disc},
%  and \luavar{margin_kern} nodes could be part of a word, as could a \luavar{kern} node with subtype
%  \luavar{fontkern}.
%    \begin{macrocode}
local function is_possible_word_node(n)

  return (
    n.id == NODE_ID_GLYPH
    or
    n.id == NODE_ID_DISC
    or
    (n.id == NODE_ID_KERN and n.subtype == NODE_KERN_SUBTYPE_FONTKERN)
    or
    n.id == NODE_ID_MARGIN_KERN
  )

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{is_possible_space_node}
%  Return boolean indicating if node \luavar{n} could be part of a space. Assume that \luavar{glue} nodes could be
%  part of a space, as could a \luavar{kern} node with subtype \luavar{userkern}.
%    \begin{macrocode}
local function is_possible_space_node(n)

  return (
    n.id == NODE_ID_GLUE
    or
    (n.id == NODE_ID_KERN and n.subtype == NODE_KERN_SUBTYPE_USERKERN)
  )

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{String manipulation}
%
% \begin{macro}[int]{trim_nonlettershyphens_both}
%   Remove characters other than letters and hyphens from both the start and end of a string.
%    \begin{macrocode}
local function trim_nonlettershyphens_both(s)

  return unicode.utf8.match(s,'^[^%a-]*(.-)[^%a-]*$')

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{trim_nonlettershyphens_start}
%   Remove characters other than letters and hyphens from the start of a string.
%    \begin{macrocode}
local function trim_nonlettershyphens_start(s)

  return unicode.utf8.match(s,'^[^%a-]*(.-)$')

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{trim_nonlettershyphens_end}
%   Remove characters other than letters and hyphens from the end of a string.
%    \begin{macrocode}
local function trim_nonlettershyphens_end(s)

  return unicode.utf8.match(s,'^(.-)[^%a-]*$')

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{rpad}
%   Return string \luavar{s} padded on the right with spaces to length \luavar{n}.
%    \begin{macrocode}
local function rpad(s,n)

   return s .. unicode.utf8.rep(STR_SPACE,n - unicode.utf8.len(s))

end
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[int]{lpad}
%   Return string \luavar{s} padded on the left with spaces to length \luavar{n}.
%    \begin{macrocode}
local function lpad(s,n)

   return unicode.utf8.rep(STR_SPACE,n - unicode.utf8.len(s)) .. s

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{Pre-linebreak processing}
%
% Before each line has been broken, find all potential division points and store the words in which they occur,
% linking each potential break point to the corresponding word.
%
% Declare a new attribute, which will be used to store in each disc node the index of the corresponding word in the
% table \luavar{hlist_segment_list}.
%    \begin{macrocode}
local hyphen_attr = luatexbase.new_attribute('hyphen_attr')
%    \end{macrocode}
%
%
%
% Table to hold segments (word/space/math) in the hlist that will be broken. This table will be cleared after the
% post-linebreak processing.
%    \begin{macrocode}
local hlist_segment_list = {}
%    \end{macrocode}
%
%
%
% \begin{macro}[int]{get_first_glyph_lang}
%   Return the lang attribute of the first glyph in the the part of the list starting n that could be part of a word.
%   (Currently unused; see the documentation of \luafunc{get_disc_lang}.)
%    \begin{macrocode}
  -- local function get_first_glyph_lang(n)

  --   local item = n
  --   while item and is_possible_word_node(item) do
  --     if item.id == NODE_ID_GLYPH then
  --       return item.lang
  --     end
  --     item = item.next
  --   end

  --   return nil

  -- end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{get_disc_lang}
%   Try to find the language ID in force at a given disc node by looking at (1)~the last glyph in the word
%   before the disc node; (2)~the first glyph in the word after the disc node. Default to language ID \luavar{0}.
%
%   (Looking at \luavar{replace}, \luavar{pre}, \luavar{post} is possible, but is unreliable and so disabled for the
%   present. The author has encountered the situation where an explicit hyphen results in the hyphen characters in
%   \luavar{replace} and \luavar{pre} having different language IDs. He has not had time to investigate how this
%   arises from the interaction of \pkg{babel}/\pkg{polyglossia} and Lua\LaTeX.)
%    \begin{macrocode}
local function get_disc_lang(n)

  -- lang = get_first_glyph_lang(n.replace)
  -- if lang then
  --   print(lang)
  --   return lang
  -- end

  -- lang = get_first_glyph_lang(n.pre)
  -- if lang then
  --   print(lang)
  --   return lang
  -- end

  -- lang = get_first_glyph_lang(n.post)
  -- if lang then
  --   return lang
  -- end

  local item
%    \end{macrocode}
%   Before the disc node.
%    \begin{macrocode}
  item = n
  while item and is_possible_word_node(item) do
    if item.id == NODE_ID_GLYPH then
      return item.lang
    end
    item = item.prev
  end
%    \end{macrocode}
%   After the disc node.
%    \begin{macrocode}
  item = n
  while item and is_possible_word_node(item) do
    if item.id == NODE_ID_GLYPH then
      return item.lang
    end
    item = item.next
  end

  return 0

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{pre_linebreak}
%   Extract segments (word/space/math) from the hlist at \luavar{hlist_head} and store appropriate data in
%   \luavar{hlist_segment_list}. For spaces and math, this is just the existence of a segment. For a word, store its
%   text and its language ID (as determined by \luafunc{get_disc_lang}). Also, for each disc node, assign the index
%   of the word in \luavar{hlist_segment_list} to its \luavar{hyphen_attr} attribute (declared above).
%    \begin{macrocode}
local function pre_linebreak(hlist_head,groupcode)

  local word_start_node = nil
  local segment_count = 0
  local lang = nil

  debug('Pre-linebreak processing start')

  local item = hlist_head
  while item do
%    \end{macrocode}
%   If \luavar{item} is a math node (which must have subtype beginmath, unless something has changed the node list),
%   skip the math and add "[MATH]" to \luavar{hlist_segment_list}.
%    \begin{macrocode}
    if item.id == NODE_ID_MATH then
      assert(item.subtype == NODE_MATH_SUBTYPE_BEGIN)
      while not (
        item.id == NODE_ID_MATH and item.subtype == NODE_MATH_SUBTYPE_END
      ) do
        item = item.next
      end
      item = item.next

      segment_count = segment_count + 1
      hlist_segment_list[segment_count] = {
        [KEY_TYPE] = SEGMENT_MATH
      }

      goto continue
    end
%    \end{macrocode}
%   If \luavar{item} is a possible word node, read the whole word, setting the \luavar{hyphen_attr} of any disc nodes
%   to \luavar{segment_count}, and adding the word to \luavar{hlist_segment_list}.
%    \begin{macrocode}
    if is_possible_word_node(item) then
      word_start_node = item
      segment_count = segment_count + 1
      while item and is_possible_word_node(item) do

%    \end{macrocode}
%   When the first disc node is found, find the language of the word.
%    \begin{macrocode}
        if item.id == NODE_ID_DISC then
          if not lang then
            lang = get_disc_lang(item)
          end
          node.set_attribute(item,hyphen_attr,segment_count)
        end

        item = item.next
      end
%    \end{macrocode}
%   \luavar{item} should be a node, because even after the last word node, the hlist will contain something. But just
%   in case, check and find the last node using \luafunc{node.tail} if necessary. This latter case should be very
%   rare, so it is more efficient to recalculate here if necessary rather than having an extra assignment to store the
%   previous node in the while loop.
%    \begin{macrocode}
      local word_end_node
      if item then
        word_end_node = item.prev
      else
        word_end_node = node.tail(word_start_node)
      end

      local word = get_nodelist_text(word_start_node,word_end_node)
      hlist_segment_list[segment_count] = {
        [KEY_TYPE] = SEGMENT_WORD,
        [KEY_WORD] = word,
        [KEY_LANG] = lang,
      }

      word_start_node = nil
      lang = nil

      goto continue
    end
%    \end{macrocode}
%   If \luavar{item} is a node that could be part of a space, add a space to the segment list.
%    \begin{macrocode}
    if is_possible_space_node(item) then
      segment_count = segment_count + 1

      while item and is_possible_space_node(item) do
        item = item.next
      end

      hlist_segment_list[segment_count] = {
        [KEY_TYPE] = SEGMENT_SPACE
      }

      goto continue
    end
%    \end{macrocode}
%   If \luavar{item} is anything else, just move on.
%    \begin{macrocode}
    item = item.next

    ::continue::
  end

  debug('Pre-linebreak processing finish')

  return true
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{Post-linebeak processing}
%
% After linebreaking, look for a discretionary node at the end of each line, which indicates that a word has been
% divided between the end of that line and the start of the next. Extract the two word-pieces from the lines and store
% them, together with the undivided word and its context in the appropriate language table. Also insert a whatsit to
% that will set the page number when the hyphenation is written out.
%
% \begin{macro}[int]{get_used_disc}
%   If at the tail of the hlist at \luavar{hlist_head} (which will be a line) there is a disc node not followed by a
%   glyph node, return that disc node. Otherwise return \luavar{nil}.
%    \begin{macrocode}
local function get_used_disc(hlist_head)

  local item = node.tail(hlist_head)

  while item and item.id ~= NODE_ID_GLYPH do
    if item.id == NODE_ID_DISC then
      return item
    end
    item = item.prev
  end

  return nil

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{get_disc_word_start}
%   Return the node starting the word that includes a given disc node \luavar{n}, or \luavar{nil} if there is no such
%   node.
%    \begin{macrocode}
local function get_disc_word_start(hlist_head,n)

  local item = n

  while item do
    local prev = item.prev

    if not (prev and is_possible_word_node(prev)) then
      return item
    end

    item = prev
  end

  return nil
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{get_next_hlist}
%   Return the next hlist in the list containing the given node \luavar{n}, or \luavar{nil} if there is no such hlist
%   node.
%    \begin{macrocode}
local function get_next_hlist(n)

  local item = n.next

  while item do
    if item.id == NODE_ID_HLIST then
      return item
    end
    item = item.next
  end

  return nil

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{get_line_first_word}
%   Return the first word in the hlist at \luavar{hlist_head}, or \luavar{nil} if there is no such word.
%    \begin{macrocode}
local function get_line_first_word(hlist_head)
%    \end{macrocode}
%   \luavar{word_start_node} is either \luavar{nil} or the (glyph) node that starts the word.
%    \begin{macrocode}
  local word_start_node = nil

  for item in node.traverse(hlist_head) do

    if item.id == NODE_ID_GLYPH then
      if not word_start_node then
        word_start_node = item
      end
    end

    if not is_possible_word_node(item) then
      if word_start_node then
        return get_nodelist_text(word_start_node,item.prev)
      end
    end

  end
%    \end{macrocode}
%   It is possible that the word ends at the end of the hlist, so check if a word has been started.
%    \begin{macrocode}
  if word_start_node then
    return get_nodelist_text(word_start_node,node.tail(hlist_head))
  else
    return nil
  end
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{get_context}
%   Return a string assembled from the part of \luavar{hlist_segment_list} before or after \luavar{index} according to
%   \luavar{incr} (which must be \(\pm 1\)) up a maximum of \luavar{target_word_count} words.
%    \begin{macrocode}
local function get_context(index,incr,target_word_count)

  local result = ''
  local word_count = 0

  local i = index + incr
  while (
    i > 0 and i <= #hlist_segment_list and word_count < target_word_count
  ) do
    local t = hlist_segment_list[i]

    local item

    if t[KEY_TYPE] == SEGMENT_WORD then
      item = t[KEY_WORD]
      word_count = word_count + 1
    elseif t[KEY_TYPE] == SEGMENT_SPACE then
      item = STR_SPACE
    elseif t[KEY_TYPE] == SEGMENT_MATH then
      item = STR_MATH
    end

    if incr > 0 then
      result = result .. item
    else
      result = item .. result
    end

    i = i + incr
  end

  return result

end
%    \end{macrocode}
% \end{macro}
%
%
%
% Count and list for hyphenated words. Each entry in the list will be a table containing the original word, the
% hyphenation, the language, the index of the table in the list (which is needed later for stable sorting and sorting
% into the original order), and the context.
%    \begin{macrocode}
local hyphenation_list = {}
local hyphenation_count = 0
%    \end{macrocode}
%
%
%
% \begin{macro}[int]{check_line_hyphenation}
%   Check whether there is a hyphenated word at the end of the given hlist; if so, save the word to
%   \luavar{hyphenation_list}.
%    \begin{macrocode}
local function check_line_hyphenation(hlist)
%    \end{macrocode}
%   First, is there a disc node not followed by a glyph node at the end of the list?
%    \begin{macrocode}
  local last_disc = get_used_disc(hlist.head)
  if not last_disc then
    debug('  No disc node found at end of line')
    return
  end
%    \end{macrocode}
%   Get the undivided word and its language from \luavar{hlist_segment_list}.
%    \begin{macrocode}
  local hyphenation_index = node.has_attribute(last_disc,hyphen_attr)
  local t = hlist_segment_list[hyphenation_index]
  assert(t)
  assert(t[KEY_TYPE] == SEGMENT_WORD)
  local word = t[KEY_WORD]
  local lang = t[KEY_LANG]
%    \end{macrocode}
%   \luavar{word} might be something other than a genuine word, such as an ISBN (with hyphen separators). So only
%   proceed if it contains at least one letter.
%    \begin{macrocode}
  if not unicode.utf8.match(word,'%a') then
    debug('  Divided "word" contains no letters')
    return
  end
%    \end{macrocode}
%   There should always be a next line, since there is a disc node at the end of \luavar{hlist}, but check anyway.
%    \begin{macrocode}
  local next_line = get_next_hlist(hlist)

  if not next_line then
    debug('  No following line found (which should not happen)')
    return
  end
%    \end{macrocode}
%   For the pre-linebreak part of the word, get the word that ends the line, and trim any leading non-letters. This
%   could leave an empty word; for example, if \(n\)-dimensional is broken at the hyphen, the word ending the line is
%   just the hyphen. If an empty word is left, just use the non-trimmed result.
%    \begin{macrocode}
  local pre = get_nodelist_text(get_disc_word_start(hlist.head,last_disc))
  local pre_temp = trim_nonlettershyphens_start(pre)
  if pre_temp ~= '' then
    pre = pre_temp
  end
%    \end{macrocode}
%   For the post-linebreak part, just get the word at the start of the next line, and trim and trailing non-letters.
%    \begin{macrocode}
  local post = trim_nonlettershyphens_end(get_line_first_word(next_line.head))
%    \end{macrocode}
%   Compute the context and then trim any unwanted symbols from the word itself.
%    \begin{macrocode}
  local context =
    get_context(
      hyphenation_index,-1,tex.count['l__lualisthyphen_context_before_int']
    )
    .. word ..
    get_context(
      hyphenation_index,1,tex.count['l__lualisthyphen_context_after_int']
    )

  word = trim_nonlettershyphens_both(word)

  debug(
    '  Hyphenated word found: "' .. word .. '" -> "' .. pre .. '<>' .. post .. '"'
  )
%    \end{macrocode}
%   Store everything (except the page number on which the hyphenated word appears, which is not yet known) in the
%   hyphenation list.
%    \begin{macrocode}
  hyphenation_count = hyphenation_count + 1
  hyphenation_list[hyphenation_count] = {
    [KEY_LANG] = lang,
    [KEY_WORD] = word,
    [KEY_DIVISION] = pre .. post,
    [KEY_INDEX] = hyphenation_count,
    [KEY_CONTEXT] = context,
  }
%    \end{macrocode}
%   Add a whatsit to record the page number when the page with the hyphenation is shipped out. This information also
%   serves to distinguish hyphenations that are written to the page from those that occur in (e.g.) boxes that are
%   discarded without being written to the page.
%    \begin{macrocode}
  late_lua_n = node.new('whatsit','late_lua')
  late_lua_n.data =
    'lualisthyphen.set_hyphenation_page(' .. hyphenation_count .. ',tex.count["c@page"])'

  node.insert_after(hlist.head,last_disc,late_lua_n)

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{set_hyphenation_page}
%   Set the page on which the hyphenation with the given index appears.
%    \begin{macrocode}
local function set_hyphenation_page(index,page)

  hyphenation_list[index][KEY_PAGE] = page

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{post_linebreak}
%   For every line in the vlist at \luavar{vlist_head}, check whether there is a hyphenated word at the end.
%    \begin{macrocode}
local function post_linebreak(vlist_head,groupcode)

  debug('Post-linebreak processing start')

  local line_no = 0

  for item in node.traverse(vlist_head) do

    if item.id == NODE_ID_HLIST then
      line_no = line_no + 1
      debug('  Line no.' .. line_no)
      check_line_hyphenation(item)
    end

  end

  hlist_segment_list = {}

  debug('Post-linebreak processing end')

  return true

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{Callbacks}
%
% Add \luafunc{pre_linebreak} and \luafunc{post_linebreak} to the relevant callbacks.
%    \begin{macrocode}
local LUA_LIST_HYPHEN_PRE_LINEBREAK = 'LUA_LIST_HYPHEN_PRE_LINEBREAK'
luatexbase.add_to_callback(
  'pre_linebreak_filter',
  pre_linebreak,
  LUA_LIST_HYPHEN_PRE_LINEBREAK
)

local LUA_LIST_HYPHEN_POST_LINEBREAK = 'LUA_LIST_HYPHEN_POST_LINEBREAK'
luatexbase.add_to_callback(
  'post_linebreak_filter',
  post_linebreak,
  LUA_LIST_HYPHEN_POST_LINEBREAK
)
%    \end{macrocode}
%
%
%
% \subsection{Language settings}
%
% Table mapping language IDs to textual names.
%    \begin{macrocode}
local language_table = {}
%    \end{macrocode}
%
% Populating \luavar{language_table} is done differently for \pkg{babel} and \pkg{polyglossia}. If \pkg{babel} is in
% use, the \LaTeX\ frontend iterates through \cs{bbl@languages} and calls \luafunc{babel_save_language_name}. If
% \pkg{polyglossia} is in use, \luavar{language_table} is populated by \luafunc{polyglossia_get_language_names}, which
% is called just before the hyphenation lists are written.
%
% \begin{macro}[int]{babel_save_language_name}
%   Store the association of a language ID to \pkg{babel}'s texual name, if no name has been assigned to that ID
%   already.
%    \begin{macrocode}
local function babel_save_language_name(lang_id,name)

  if not language_table[lang_id] then
    language_table[lang_id] = name
  end

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{polyglossia_get_language_names}
%   If polyglossia has been loaded, use it to build the table mapping language IDs to textual names.
%    \begin{macrocode}
local function polyglossia_get_language_names()

  if not polyglossia then
    return
  end

  for name,language in pairs(polyglossia.newloader_loaded_languages) do
    language_table[lang.id(language)] = name
  end

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{Processing hyphenation lists}
%
% Before writing out hyphenation lists, remove duplicates and/or perform sorting, in accordance with the set options.
%
%
%
% \subsubsection{Comparisons and equality checks}
%
% \begin{macro}[int]{equal_hyphenation_case_sensitive}
%   Equality check for deduplicating the list of hyphenations case-sensitively.
%    \begin{macrocode}
local function equal_hyphenation_case_sensitive(a,b)
  return (
    a[KEY_WORD] == b[KEY_WORD]
    and
    a[KEY_DIVISION] == b[KEY_DIVISION]
  )
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{equal_hyphenation_case_insensitive}
%   Equality check for deduplicating the list of hyphenations case-insensitively.
%    \begin{macrocode}
local function equal_hyphenation_case_insensitive(a,b)
  return (
    unicode.utf8.lower(a[KEY_WORD]) == unicode.utf8.lower(b[KEY_WORD])
    and
    unicode.utf8.lower(a[KEY_DIVISION]) == unicode.utf8.lower(b[KEY_DIVISION])
  )
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{lessthan_hyphenation_case_sensitive}
%   Comparison for sorting the list of hyphenations case-sensitively.
%
%   The comparison of index keys ensures that the sorting is stable.
%    \begin{macrocode}
local function lessthan_hyphenation_case_sensitive(a,b)
  return (
    a[KEY_WORD] < b[KEY_WORD]
    or
    (
      a[KEY_WORD] == b[KEY_WORD]
      and
      a[KEY_DIVISION] < b[KEY_DIVISION]
    )
    or
    (
      a[KEY_WORD] == b[KEY_WORD]
      and
      a[KEY_DIVISION] == b[KEY_DIVISION]
      and
      a[KEY_INDEX] < b[KEY_INDEX]
    )
  )
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{lessthan_hyphenation_case_insensitive}
%   Comparison for sorting the list of hyphenations case-insensitively.
%
%   The comparison of index keys ensures that the sorting is stable.
%    \begin{macrocode}
local function lessthan_hyphenation_case_insensitive(a,b)
  return (
    unicode.utf8.lower(a[KEY_WORD]) < unicode.utf8.lower(b[KEY_WORD])
    or
    (
      unicode.utf8.lower(a[KEY_WORD]) == unicode.utf8.lower(b[KEY_WORD])
      and
      unicode.utf8.lower(a[KEY_DIVISION]) < unicode.utf8.lower(b[KEY_DIVISION])
    )
    or
    (
      unicode.utf8.lower(a[KEY_WORD]) == unicode.utf8.lower(b[KEY_WORD])
      and
      unicode.utf8.lower(a[KEY_DIVISION]) < unicode.utf8.lower(b[KEY_DIVISION])
      and
      a[KEY_INDEX] < b[KEY_INDEX]
    )
  )
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsubsection{Sorting}
%
% \begin{macro}[int]{sort_hyphenation_list_none}
%   Sort \luavar{hyphenation_list} into its original order of appearance.
%    \begin{macrocode}
local function sort_hyphenation_list_none(hyphenation_list)
  table.sort(
    hyphenation_list,
    function(a,b)
      return a[KEY_INDEX] < b[KEY_INDEX]
    end
  )
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{sort_hyphenation_list_case}
%   Sort \luavar{hyphenation_list} case-sensitively.
%    \begin{macrocode}
local function sort_hyphenation_list_case(hyphenation_list)
  table.sort(
    hyphenation_list,
    lessthan_hyphenation_case_sensitive
  )
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{sort_hyphenation_list_nocase}
%   Sort \luavar{hyphenation_list} case-insensitively.
%    \begin{macrocode}
local function sort_hyphenation_list_nocase(hyphenation_list)
  table.sort(
    hyphenation_list,
    lessthan_hyphenation_case_insensitive
  )
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{process_lang_hyphenation_list_sort}
%   Select the appropriate function for sorting.
%    \begin{macrocode}
local sort_hyphenation_list
if tex.count['l__lualisthyphen_sort_int'] == 1 then
  sort_hyphenation_list = sort_hyphenation_list_case
elseif tex.count['l__lualisthyphen_sort_int'] == 2 then
  sort_hyphenation_list = sort_hyphenation_list_nocase
else
  sort_hyphenation_list = sort_hyphenation_list_none
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsubsection{Deduplication}
%
% \begin{macro}[int]{deduplicate_hyphenation_list_none}
%   Dummy function; does not deduplicate \luavar{hyphenation_list}.
%    \begin{macrocode}
local function deduplicate_hyphenation_list_none(hyphenation_list)
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{deduplicate_hyphenation_list_case}
%   Remove duplicates from \luavar{hyphenation_list} case-sensitively.
%    \begin{macrocode}
local function deduplicate_hyphenation_list_case(hyphenation_list)
  table.sort(
    hyphenation_list,
    lessthan_hyphenation_case_sensitive
  )
  list_uniq(
    hyphenation_list,
    equal_hyphenation_case_sensitive
  )
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{deduplicate_hyphenation_list_nocase}
%   Remove duplicates from \luavar{hyphenation_list} case-insensitively.
%    \begin{macrocode}
local function deduplicate_hyphenation_list_nocase(hyphenation_list)
  table.sort(
    hyphenation_list,
    lessthan_hyphenation_case_insensitive
  )
  list_uniq(
    hyphenation_list,
    equal_hyphenation_case_insensitive
  )
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{deduplicate_hyphenation_list}
%   Select the appropriate function for whether duplicates whould be removed.
%    \begin{macrocode}
local deduplicate_hyphenation_list
if tex.count['l__lualisthyphen_unique_int'] == 1 then
  deduplicate_hyphenation_list = deduplicate_hyphenation_list_case
elseif tex.count['l__lualisthyphen_unique_int'] == 2 then
  deduplicate_hyphenation_list = deduplicate_hyphenation_list_nocase
else
  deduplicate_hyphenation_list = deduplicate_hyphenation_list_none
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsubsection{Combined processing}
%
% \begin{macro}[int]{process_lang_hyphenation_list}
%   Remove duplicates and sort \luavar{hyphenation_list}.
%    \begin{macrocode}
local function process_lang_hyphenation_list(hyphenation_list)
  deduplicate_hyphenation_list(hyphenation_list)
  sort_hyphenation_list(hyphenation_list)
end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{Writing}
%
% \begin{macro}[int]{write_lang_hyphenation_list_standard}
%   Write out just the hyphenated words in \luavar{hyphenation_list} to file handle \luavar{f}.
%    \begin{macrocode}
local function write_lang_hyphenation_list_standard(f,hyphenation_list,widths)

  for i,v in ipairs(hyphenation_list) do

    if v then
      f:write(v[KEY_DIVISION] .. '\n')
    end

  end

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{write_lang_hyphenation_list_verbose}
%   Write out all hyphenation information in \luavar{hyphenation_list} to file handle \luavar{f}, in columns as
%   specified in \luavar{widths}.
%    \begin{macrocode}
local function write_lang_hyphenation_list_verbose(f,hyphenation_list,widths)

  local cols_word = widths[KEY_WORD]
  local cols_division = widths[KEY_DIVISION]
  local cols_page = widths[KEY_PAGE]

  for i,v in ipairs(hyphenation_list) do

    if v then
%    \end{macrocode}
%   It is possible for KEY_PAGE not to have been set, for instance if the hyphenation occured in a box that was never
%   output.
%    \begin{macrocode}
      local page = v[KEY_PAGE]
      if page then
        page = STR_PAGE_PREFIX .. page
      else
        page = STR_PAGE_NONE
      end

      f:write(
        rpad(v[KEY_WORD],cols_word)
        .. STR_ARROW
        .. rpad(v[KEY_DIVISION],cols_division)
        .. STR_SPACE_TWO
        .. lpad(page,cols_page)
        .. STR_SPACE
        .. STR_QUOTE_OPEN
        .. v[KEY_CONTEXT]
        .. STR_QUOTE_CLOSE
        .. '\n'
      )
    end

  end

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{write_lang_hyphenation_list}
%   Set \luafunc{write_lang_hyphenation_list} to be either \luafunc{write_lang_hyphenation_list_standard} or
%   \luafunc{write_lang_hyphenation_list_verbose}, depending on the
%   package options.
%    \begin{macrocode}
local write_lang_hyphenation_list
if tex.count['l__lualisthyphen_verbose_int'] == 0 then
  write_lang_hyphenation_list = write_lang_hyphenation_list_standard
else
  write_lang_hyphenation_list = write_lang_hyphenation_list_verbose
end
%    \end{macrocode}
% \end{macro}
%
%
%
% Compute a settings description to insert into file headers.
%    \begin{macrocode}
local settings_desc
if tex.count['l__lualisthyphen_verbose_int'] == 0 then
  settings_desc = 'verbose=false'
else
  settings_desc = 'verbose=true'
    .. ',context-before=' .. tex.count['l__lualisthyphen_context_before_int']
    .. ',context-after=' .. tex.count['l__lualisthyphen_context_after_int']
end
if tex.count['l__lualisthyphen_include_non_output_int'] == 0 then
  settings_desc = settings_desc .. ',include-non-output=false'
else
  settings_desc = settings_desc .. ',include-non-output=true'
end
local NONE_CASE_NOCASE = {
  [0] = 'none',
  [1] = 'case',
  [2] = 'nocase'
}
settings_desc = settings_desc
  .. ',sort=' .. NONE_CASE_NOCASE[tex.count['l__lualisthyphen_sort_int']]
  .. ',unique=' .. NONE_CASE_NOCASE[tex.count['l__lualisthyphen_unique_int']]
%    \end{macrocode}
%
%
%
% \begin{macro}[int]{get_hyphenation_file_path}
%   Get the file to which the list of hyphenated words will be written, based on the given \luavar{prefix},
%   \luavar{extension}, \luavar{lang_name}, and taking into account any specified output directory for
%   Lua\TeX, and with a file header.
%    \begin{macrocode}
local function get_hyphenation_file_path(prefix,extension,lang_name)

  local hyphenation_file_path = prefix .. tostring(lang_name) .. extension

  if not status.output_directory then
    return hyphenation_file_path
  end

  if string.sub(status.output_directory,-1,-1) == '/' then
    hyphenation_file_path = status.output_directory
      .. hyphenation_file_path
  else
    hyphenation_file_path = status.output_directory
      .. '/' .. hyphenation_file_path
  end

  return hyphenation_file_path

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{process_write_lang_hyphenation_list}
%   Process and write out the \luavar{hyphenation_list} (which will be for the language with the numerical
%   \luavar{lang_id}) to a file with the given \luavar{prefix} and \luavar{extension}, using \luavar{widths} for the
%   ‘columns’ in verbose mode.
%    \begin{macrocode}
local function process_write_lang_hyphenation_list(
    prefix,extension,lang_id,hyphenation_list,widths
)

  process_lang_hyphenation_list(hyphenation_list)

  local lang_name = language_table[lang_id]
  local lang_desc
  if not lang_name then
    lang_name = lang_id
    lang_desc = 'language with ID ' .. lang_id
  else
    lang_desc = 'language "' .. lang_name .. '" (ID ' .. lang_id .. ')'
  end

  local f = io.open(get_hyphenation_file_path(prefix,extension,lang_name),'w')

  f:write('% Chosen hyphenations for ' ..  lang_desc .. '\n')
  f:write('% Generated by lua-list-hyphen (' .. settings_desc .. ')\n')

  write_lang_hyphenation_list(f,hyphenation_list,widths)
  f:close()

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \begin{macro}[int]{process_write_hyphenation_lists}
%   Sort \luavar{hyphenation_list} into per-language lists and write them out to separate files.
%    \begin{macrocode}
local function process_write_hyphenation_lists(prefix,extension)

  local lang_hyphenation_table = {}
  local lang_widths_table = {}
%    \end{macrocode}
%   Iterate through all the stored hyphenations. Sort them into per-language lists (creating the list the first time
%   each language is encountered) and also storing the maximum width of values, for output alignment.
%    \begin{macrocode}
  for _,h in pairs(hyphenation_list) do

    if h[KEY_PAGE] or tex.count['l__lualisthyphen_include_non_output_int'] == 1 then

      local lang = h[KEY_LANG]

      local t = lang_hyphenation_table[lang]
      if not t then
        lang_hyphenation_table[lang] = {}
        t = lang_hyphenation_table[lang]
      end

      local widths = lang_widths_table[lang]
      if not widths then
        lang_widths_table[lang] = {
          [KEY_WORD] = 0,
          [KEY_DIVISION] = 0,
          [KEY_PAGE] = 0
        }
        widths = lang_widths_table[lang]
      end

      widths[KEY_WORD] = math.max(
        widths[KEY_WORD],
        unicode.utf8.len(h[KEY_WORD])
      )
      widths[KEY_DIVISION] = math.max(
        widths[KEY_DIVISION],
        unicode.utf8.len(h[KEY_DIVISION])
      )
      widths[KEY_PAGE] = math.max(
        widths[KEY_PAGE],
        unicode.utf8.len(tostring(h[KEY_PAGE]))
      )

      table.insert(t,h)

    end

  end
%    \end{macrocode}
%   Adjust the maximum width for the page output, since there is a prefix and a ‘no page’ indicator to consider.
%    \begin{macrocode}
  for _,widths in pairs(lang_widths_table) do
    widths[KEY_PAGE] = math.max(
      widths[KEY_PAGE] + unicode.utf8.len(STR_PAGE_PREFIX),
      unicode.utf8.len(STR_PAGE_NONE)
    )
  end
%    \end{macrocode}
%   If polyglossia is in use, populate \luavar{language_table}.
%    \begin{macrocode}
  polyglossia_get_language_names()
%    \end{macrocode}
%   For each language, process and write out its hyphenations to a file.
%    \begin{macrocode}
  for k,v in pairs(lang_hyphenation_table) do
    process_write_lang_hyphenation_list(prefix,extension,k,v,lang_widths_table[k])
  end

end
%    \end{macrocode}
% \end{macro}
%
%
%
% \subsection{Export public functions}
%
% Finally, make available the functions that will be called from the \LaTeX\ frontend using \cs{lua_now:n}.
%    \begin{macrocode}
return {
  process_write_hyphenation_lists = process_write_hyphenation_lists,
  set_hyphenation_page = set_hyphenation_page,
  babel_save_language_name = babel_save_language_name,
}
%    \end{macrocode}
%
%
%
%    \begin{macrocode}
%</lua>
%    \end{macrocode}

%
%
%
% \clearpage
% \end{implementation}
%
%
%
% \iffalse
%<*metadriver>
\input{lua-list-hyphen.dtx}
%</metadriver>
% \fi
