Skip to content

Commit 8a4cd4c

Browse files
authored
reduce lenencode bits (#232)
1 parent 462093b commit 8a4cd4c

File tree

3 files changed

+7076
-7065
lines changed

3 files changed

+7076
-7065
lines changed

data/data_generator.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,13 +174,13 @@ def cpary2c(array)
174174
return "UINT16_MAX" if array.nil? || array.length == 0
175175
lencode = array.length - 1 #no sequence has len 0, so we encode len 1 as 0, len 2 as 1, ...
176176
array = cpary2utf16encoded(array)
177-
if lencode >= 7 #we have only 3 bits for the length (which is already cutting it close. might need to change it to 2 bits in future Unicode versions)
177+
if lencode >= 3 #we have only 2 bits for the length
178178
array = [lencode] + array
179-
lencode = 7
179+
lencode = 3
180180
end
181181
idx = pushary(array)
182-
raise "Array index out of bound" if idx > 0x1FFF
183-
return "#{idx | (lencode << 13)}"
182+
raise "Array index out of bound" if idx > 0x3FFF
183+
return "#{idx | (lencode << 14)}"
184184
end
185185
def singlecpmap(cp)
186186
return "UINT16_MAX" if cp == nil

utf8proc.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -356,9 +356,9 @@ static utf8proc_int32_t seqindex_decode_index(const utf8proc_uint32_t seqindex)
356356

357357
static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
358358
utf8proc_ssize_t written = 0;
359-
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
360-
int len = seqindex >> 13;
361-
if (len >= 7) {
359+
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x3FFF];
360+
int len = seqindex >> 14;
361+
if (len >= 3) {
362362
len = *entry;
363363
entry++;
364364
}

0 commit comments

Comments
 (0)