Skip to content

Commit 6265ee0

Browse files
tannewtdhalbert
authored andcommitted
cherry-pick jepler's huffman size reduction
1 parent 3d14c32 commit 6265ee0

File tree

2 files changed

+37
-22
lines changed

2 files changed

+37
-22
lines changed

py/makeqstrdata.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -103,14 +103,10 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
103103
# go through each qstr and print it out
104104
for _, _, qstr in qstrs.values():
105105
all_strings.append(qstr)
106-
all_strings_concat = "".join(all_strings).encode("utf-8")
106+
all_strings_concat = "".join(all_strings)
107107
counts = collections.Counter(all_strings_concat)
108-
# add other values
109-
for i in range(256):
110-
if i not in counts:
111-
counts[i] = 0
112108
cb = huffman.codebook(counts.items())
113-
values = bytearray()
109+
values = []
114110
length_count = {}
115111
renumbered = 0
116112
last_l = None
@@ -124,26 +120,27 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
124120
if last_l:
125121
renumbered <<= (l - last_l)
126122
canonical[ch] = '{0:0{width}b}'.format(renumbered, width=l)
127-
if chr(ch) in C_ESCAPES:
128-
s = C_ESCAPES[chr(ch)]
129-
else:
130-
s = chr(ch)
131-
print("//", ch, s, counts[ch], canonical[ch], renumbered)
123+
s = C_ESCAPES.get(ch, ch)
124+
print("//", ord(ch), s, counts[ch], canonical[ch], renumbered)
132125
renumbered += 1
133126
last_l = l
134127
lengths = bytearray()
135-
for i in range(1, max(length_count) + 1):
128+
print("// length count", length_count)
129+
for i in range(1, max(length_count) + 2):
136130
lengths.append(length_count.get(i, 0))
131+
print("// values", values, "lengths", len(lengths), lengths)
132+
print("// estimated total memory size", len(lengths) + 2*len(values) + sum(len(cb[u]) for u in all_strings_concat))
137133
print("//", values, lengths)
134+
values_type = "uint16_t" if max(ord(u) for u in values) > 255 else "uint8_t"
138135
with open(compression_filename, "w") as f:
139136
f.write("const uint8_t lengths[] = {{ {} }};\n".format(", ".join(map(str, lengths))))
140-
f.write("const uint8_t values[256] = {{ {} }};\n".format(", ".join(map(str, values))))
137+
f.write("const {} values[] = {{ {} }};\n".format(values_type, ", ".join(str(ord(u)) for u in values)))
141138
return values, lengths
142139

143140
def decompress(encoding_table, length, encoded):
144141
values, lengths = encoding_table
145142
#print(l, encoded)
146-
dec = bytearray(length)
143+
dec = []
147144
this_byte = 0
148145
this_bit = 7
149146
b = encoded[this_byte]
@@ -173,14 +170,14 @@ def decompress(encoding_table, length, encoded):
173170
searched_length += lengths[bit_length]
174171

175172
v = values[searched_length + bits - max_code]
176-
dec[i] = v
177-
return dec
173+
dec.append(v)
174+
return ''.join(dec)
178175

179176
def compress(encoding_table, decompressed):
180-
if not isinstance(decompressed, bytes):
177+
if not isinstance(decompressed, str):
181178
raise TypeError()
182179
values, lengths = encoding_table
183-
enc = bytearray(len(decompressed))
180+
enc = bytearray(len(decompressed) * 3)
184181
#print(decompressed)
185182
#print(lengths)
186183
current_bit = 7
@@ -227,6 +224,8 @@ def compress(encoding_table, decompressed):
227224
current_bit -= 1
228225
if current_bit != 7:
229226
current_byte += 1
227+
if current_byte > len(decompressed):
228+
print("Note: compression increased length", repr(decompressed), len(decompressed), current_byte, file=sys.stderr)
230229
return enc[:current_byte]
231230

232231
def qstr_escape(qst):
@@ -345,9 +344,9 @@ def print_qstr_data(encoding_table, qcfgs, qstrs, i18ns):
345344
total_text_compressed_size = 0
346345
for original, translation in i18ns:
347346
translation_encoded = translation.encode("utf-8")
348-
compressed = compress(encoding_table, translation_encoded)
347+
compressed = compress(encoding_table, translation)
349348
total_text_compressed_size += len(compressed)
350-
decompressed = decompress(encoding_table, len(translation_encoded), compressed).decode("utf-8")
349+
decompressed = decompress(encoding_table, len(translation_encoded), compressed)
351350
for c in C_ESCAPES:
352351
decompressed = decompressed.replace(c, C_ESCAPES[c])
353352
print("TRANSLATION(\"{}\", {}, {{ {} }}) // {}".format(original, len(translation_encoded)+1, ", ".join(["0x{:02x}".format(x) for x in compressed]), decompressed))

supervisor/shared/translate.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,28 @@ void serial_write_compressed(const compressed_string_t* compressed) {
4242
serial_write(decompressed);
4343
}
4444

45+
STATIC int put_utf8(char *buf, int u) {
46+
if(u <= 0x7f) {
47+
*buf = u;
48+
return 1;
49+
} else if(u <= 0x07ff) {
50+
*buf++ = 0b11000000 | (u >> 6);
51+
*buf = 0b10000000 | (u & 0b00111111);
52+
return 2;
53+
} else { // u <= 0xffff)
54+
*buf++ = 0b11000000 | (u >> 12);
55+
*buf = 0b10000000 | ((u >> 6) & 0b00111111);
56+
*buf = 0b10000000 | (u & 0b00111111);
57+
return 3;
58+
}
59+
}
60+
4561
char* decompress(const compressed_string_t* compressed, char* decompressed) {
4662
uint8_t this_byte = 0;
4763
uint8_t this_bit = 7;
4864
uint8_t b = compressed->data[this_byte];
4965
// Stop one early because the last byte is always NULL.
50-
for (uint16_t i = 0; i < compressed->length - 1; i++) {
66+
for (uint16_t i = 0; i < compressed->length - 1;) {
5167
uint32_t bits = 0;
5268
uint8_t bit_length = 0;
5369
uint32_t max_code = lengths[0];
@@ -72,7 +88,7 @@ char* decompress(const compressed_string_t* compressed, char* decompressed) {
7288
max_code = (max_code << 1) + lengths[bit_length];
7389
searched_length += lengths[bit_length];
7490
}
75-
decompressed[i] = values[searched_length + bits - max_code];
91+
i += put_utf8(decompressed + i, values[searched_length + bits - max_code]);
7692
}
7793

7894
decompressed[compressed->length-1] = '\0';

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy