HANXIN ECI conversion, GB 18030 LIBICONV port; some codeword fixes; optimized encoding modes

This commit is contained in:
gitlost 2019-12-08 16:15:34 +00:00
parent ce8aa92846
commit 889e786d95
35 changed files with 67955 additions and 23877 deletions

View file

@ -1499,23 +1499,17 @@ static const Summary16 gb2312_uni2indx_pageff[15] = {
{ 7441, 0x0000 }, { 7441, 0x0000 }, { 7441, 0x002b },
};
int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
int gb2312_wctomb_zint(unsigned int* r, unsigned int wc) {
const Summary16 *summary = NULL;
if (wc >= 0x0000 && wc < 0x0460) {
if (wc == 0x00b7) { /* ZINT: Patched to duplicate map to 0xA1A4 */
if (n < 2) {
return -1;
}
r[0] = 0xA1; r[1] = 0xA4;
*r = 0xA1A4;
return 2;
}
summary = &gb2312_uni2indx_page00[(wc>>4)];
} else if (wc >= 0x2000 && wc < 0x2650) {
if (wc == 0x2014) { /* ZINT: Patched to duplicate map to 0xA1AA */
if (n < 2) {
return -1;
}
r[0] = 0xA1; r[1] = 0xAA;
*r = 0xA1AA;
return 2;
}
summary = &gb2312_uni2indx_page20[(wc>>4)-0x200];
@ -1532,10 +1526,6 @@ int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
unsigned short used = summary->used;
unsigned int i = wc & 0x0f;
if (used & ((unsigned short) 1 << i)) {
unsigned short c;
if (n < 2) {
return -1;
}
/* Keep in 'used' only the bits 0..i-1. */
used &= ((unsigned short) 1 << i) - 1;
/* Add 'summary->indx' and the number of bits set in 'used'. */
@ -1543,8 +1533,7 @@ int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
used = (used & 0x3333) + ((used & 0xcccc) >> 2);
used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);
used = (used & 0x00ff) + (used >> 8);
c = gb2312_2charset[summary->indx + used];
r[0] = (c >> 8); r[1] = (c & 0xff);
*r = gb2312_2charset[summary->indx + used];
return 2;
}
}
@ -1553,16 +1542,15 @@ int gb2312_wctomb_zint(unsigned char *r, unsigned int wc, size_t n) {
/* Convert UTF-8 string to GB 2312 (EUC-CN) and place in array of ints */
int gb2312_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], size_t* p_length, unsigned int* gbdata) {
int i, error_number, ret;
int i, error_number;
unsigned int length;
unsigned char buf[2];
#ifndef _MSC_VER
int utfdata[*p_length + 1]; /* Leave signed for the moment until `utf8toutf16()` signature changed */
unsigned int utfdata[*p_length + 1];
#else
int* utfdata = (int*) _alloca((*p_length + 1) * sizeof(int));
unsigned int* utfdata = (unsigned int*) _alloca((*p_length + 1) * sizeof(unsigned int));
#endif
error_number = utf8toutf16(symbol, source, utfdata, p_length);
error_number = utf8_to_unicode(symbol, source, utfdata, p_length, 1 /*disallow_4byte*/);
if (error_number != 0) {
return error_number;
}
@ -1571,12 +1559,10 @@ int gb2312_utf8tomb(struct zint_symbol *symbol, const unsigned char source[], si
if (utfdata[i] < 0x80) {
gbdata[i] = utfdata[i];
} else {
ret = gb2312_wctomb_zint(buf, utfdata[i], 2);
if (ret != 2) {
if (!gb2312_wctomb_zint(gbdata + i, utfdata[i])) {
strcpy(symbol->errtxt, "810: Invalid character in input data");
return ZINT_ERROR_INVALID_DATA;
}
gbdata[i] = (buf[0] << 8) | buf[1];
}
}