libunibreak 6.1
unibreakdef.h
Go to the documentation of this file.
1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2
3/*
4 * Break processing in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
6 *
7 * Copyright (C) 2015-2024 Wu Yongwei <wuyongwei at gmail dot com>
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must
18 * not claim that you wrote the original software. If you use this
19 * software in a product, an acknowledgement in the product
20 * documentation would be appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must
22 * not be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source
24 * distribution.
25 */
26
35#ifndef UNIBREAKDEF_H
36#define UNIBREAKDEF_H
37
38#if defined(_MSC_VER) && _MSC_VER < 1800
39typedef int bool;
40#define false 0
41#define true 1
42#else
43#include <stdbool.h>
44#endif
45
46#include <stddef.h>
47#include "unibreakbase.h"
48
49#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
50
51#ifdef __cplusplus
52extern "C" {
53#endif
54
59#define EOS 0xFFFFFFFF
60
65typedef utf32_t (*get_next_char_t)(const void *, size_t, size_t *);
66
67/* Function Prototype */
68utf32_t ub_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip);
69utf32_t ub_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip);
70utf32_t ub_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip);
71
72__inline const void *ub_bsearch(utf32_t ch, const void *ptr, size_t count,
73 size_t size)
74{
75 int min = 0;
76 int max = count - 1;
77 int mid;
78
79 do
80 {
81 mid = (min + max) / 2;
82 const unsigned char *mid_ptr =
83 (const unsigned char *)ptr + mid * size;
84 utf32_t mid_start = *(const utf32_t *)mid_ptr;
85 utf32_t mid_end = *((const utf32_t *)mid_ptr + 1);
86
87 if (ch < mid_start)
88 {
89 max = mid - 1;
90 }
91 else if (ch > mid_end)
92 {
93 min = mid + 1;
94 }
95 else
96 {
97 return mid_ptr;
98 }
99 } while (min <= max);
100
101 return NULL;
102}
103
104#ifdef __cplusplus
105}
106#endif
107
108#endif /* UNIBREAKDEF_H */
Header file for common definitions in the libunibreak library.
unsigned short utf16_t
Type for UTF-16 data points.
Definition: unibreakbase.h:48
unsigned int utf32_t
Type for UTF-32 data points.
Definition: unibreakbase.h:49
unsigned char utf8_t
Type for UTF-8 data points.
Definition: unibreakbase.h:47
__inline const void * ub_bsearch(utf32_t ch, const void *ptr, size_t count, size_t size)
Definition: unibreakdef.h:72
utf32_t ub_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip)
Gets the next Unicode character in a UTF-8 sequence.
Definition: unibreakdef.c:50
utf32_t(* get_next_char_t)(const void *, size_t, size_t *)
Abstract function interface for ub_get_next_char_utf8, ub_get_next_char_utf16, and ub_get_next_char_u...
Definition: unibreakdef.h:65
utf32_t ub_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip)
Gets the next Unicode character in a UTF-32 sequence.
Definition: unibreakdef.c:159
utf32_t ub_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip)
Gets the next Unicode character in a UTF-16 sequence.
Definition: unibreakdef.c:118