libunibreak 5.1
|
Definitions of internal data structures, declarations of global variables, and function prototypes for the line breaking algorithm. More...
#include "unibreakdef.h"
Go to the source code of this file.
Data Structures | |
struct | LineBreakProperties |
Struct for entries of line break properties. More... | |
struct | LineBreakPropertiesLang |
Struct for association of language-specific line breaking properties with language names. More... | |
struct | LineBreakContext |
Context representing internal state of the line breaking algorithm. More... | |
Enumerations | |
enum | LineBreakClass { LBP_Undefined , LBP_OP , LBP_CL , LBP_CP , LBP_QU , LBP_GL , LBP_NS , LBP_EX , LBP_SY , LBP_IS , LBP_PR , LBP_PO , LBP_NU , LBP_AL , LBP_HL , LBP_ID , LBP_IN , LBP_HY , LBP_BA , LBP_BB , LBP_B2 , LBP_ZW , LBP_CM , LBP_WJ , LBP_H2 , LBP_H3 , LBP_JL , LBP_JV , LBP_JT , LBP_RI , LBP_EB , LBP_EM , LBP_ZWJ , LBP_CB , LBP_AI , LBP_BK , LBP_CJ , LBP_CR , LBP_LF , LBP_NL , LBP_SA , LBP_SG , LBP_SP , LBP_XX } |
Line break classes. More... | |
enum | BreakOutputType { LBOT_PER_CODE_UNIT , LBOT_PER_CODE_POINT } |
Functions | |
void | lb_init_break_context (struct LineBreakContext *lbpCtx, utf32_t ch, const char *lang) |
Initializes line breaking context for a given language. | |
int | lb_process_next_char (struct LineBreakContext *lbpCtx, utf32_t ch) |
Updates LineBreakingContext for the next codepoint and returns the detected break. | |
size_t | set_linebreaks (const void *s, size_t len, const char *lang, enum BreakOutputType outputType, char *brks, get_next_char_t get_next_char) |
Sets the line breaking information for a generic input string. | |
Variables | |
const struct LineBreakProperties | lb_prop_supplementary [] |
Line breaking properties for supplementary planes. | |
const unsigned int | lb_prop_supplementary_len |
const char | lb_prop_bmp [] |
Line breaking properties for BMP. | |
const struct LineBreakPropertiesLang | lb_prop_lang_map [] |
Association data of language-specific line breaking properties with language names. | |
Definitions of internal data structures, declarations of global variables, and function prototypes for the line breaking algorithm.
enum BreakOutputType |
enum LineBreakClass |
Line break classes.
This is a mapping of Table 1 of Unicode Standard Annex 14.
void lb_init_break_context | ( | struct LineBreakContext * | lbpCtx, |
utf32_t | ch, | ||
const char * | lang | ||
) |
Initializes line breaking context for a given language.
[in,out] | lbpCtx | pointer to the line breaking context |
[in] | ch | the first character to process |
[in] | lang | language of the input |
int lb_process_next_char | ( | struct LineBreakContext * | lbpCtx, |
utf32_t | ch | ||
) |
Updates LineBreakingContext for the next codepoint and returns the detected break.
[in,out] | lbpCtx | pointer to the line breaking context |
[in] | ch | Unicode codepoint |
size_t set_linebreaks | ( | const void * | s, |
size_t | len, | ||
const char * | lang, | ||
enum BreakOutputType | outputType, | ||
char * | brks, | ||
get_next_char_t | get_next_char | ||
) |
Sets the line breaking information for a generic input string.
Currently, this implementation has customization for the following ISO 639-1 language codes (for lang):
In addition, a suffix "-strict"
may be added to indicate strict (as versus normal) line-breaking behaviour. See the Conditional Japanese Starter section of UAX #14 for more details.
[in] | s | input string |
[in] | len | length of the input |
[in] | lang | language of the input |
[in] | outputType | output per code-unit or per code-point |
[out] | brks | pointer to the output breaking data, containing LINEBREAK_MUSTBREAK, LINEBREAK_ALLOWBREAK, LINEBREAK_NOBREAK, or LINEBREAK_INSIDEACHAR |
[in] | get_next_char | function to get the next UTF-32 character |
|
extern |
Line breaking properties for BMP.
|
extern |
Association data of language-specific line breaking properties with language names.
This is the definition for the static data in this file. If you want more flexibility, or do not need the data here, you may want to redefine lb_prop_lang_map in your C source file.
|
extern |
Line breaking properties for supplementary planes.
|
extern |