libunibreak 6.1
|
Implementation of the word breaking algorithm as described in Unicode Standard Annex 29. More...
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include "unibreakdef.h"
#include "wordbreak.h"
#include "wordbreakdata.c"
#include "emojidef.h"
Macros | |
#define | IS_WB3ab(cls) |
Functions | |
void | init_wordbreak (void) |
Initializes the wordbreak internals. More... | |
void | set_wordbreaks_utf8 (const utf8_t *s, size_t len, const char *lang, char *brks) |
Sets the word breaking information for a UTF-8 input string. More... | |
void | set_wordbreaks_utf16 (const utf16_t *s, size_t len, const char *lang, char *brks) |
Sets the word breaking information for a UTF-16 input string. More... | |
void | set_wordbreaks_utf32 (const utf32_t *s, size_t len, const char *lang, char *brks) |
Sets the word breaking information for a UTF-32 input string. More... | |
Implementation of the word breaking algorithm as described in Unicode Standard Annex 29.
#define IS_WB3ab | ( | cls | ) |
void init_wordbreak | ( | void | ) |
Initializes the wordbreak internals.
It currently does nothing, but it may in the future.
void set_wordbreaks_utf16 | ( | const utf16_t * | s, |
size_t | len, | ||
const char * | lang, | ||
char * | brks | ||
) |
Sets the word breaking information for a UTF-16 input string.
[in] | s | input UTF-16 string |
[in] | len | length of the input |
[in] | lang | language of the input (reserved for future use) |
[out] | brks | pointer to the output breaking data, containing WORDBREAK_BREAK, WORDBREAK_NOBREAK, or WORDBREAK_INSIDEACHAR |
void set_wordbreaks_utf32 | ( | const utf32_t * | s, |
size_t | len, | ||
const char * | lang, | ||
char * | brks | ||
) |
Sets the word breaking information for a UTF-32 input string.
[in] | s | input UTF-32 string |
[in] | len | length of the input |
[in] | lang | language of the input (reserved for future use) |
[out] | brks | pointer to the output breaking data, containing WORDBREAK_BREAK, WORDBREAK_NOBREAK, or WORDBREAK_INSIDEACHAR |
void set_wordbreaks_utf8 | ( | const utf8_t * | s, |
size_t | len, | ||
const char * | lang, | ||
char * | brks | ||
) |
Sets the word breaking information for a UTF-8 input string.
[in] | s | input UTF-8 string |
[in] | len | length of the input |
[in] | lang | language of the input (reserved for future use) |
[out] | brks | pointer to the output breaking data, containing WORDBREAK_BREAK, WORDBREAK_NOBREAK, or WORDBREAK_INSIDEACHAR |