mirror of
https://git.code.sf.net/p/zint/code
synced 2025-05-24 20:14:28 -04:00
ECI: Update ECIs to AIM ITS/04-023:2022, adding UTF-16BE (was USC-2BE),
UTF-16LE, GBK, separate GB18030, UTF-32BE, UTF-32LE add examples to tests for DATAMATRIX, HANXIN, QRCODE HANXIN: Remove alternating filler in function information; GB 18030 now ECI 32 (previously used ECI 29); fix gate-posts on codeword limits use new ZXing-C++ HanXin detector (diagnostics2 branch) for tests check against ISO/IEC 20830:2021 (no substantive changes) backend_tcl: update ECIs; NOTE: changed names "unicode" -> "utf-16be", "euc-cn" -> "gb2312" GRIDMATRIX/HANXIN/QRCODE/RMQR: warn if auto-conversion (i.e. no ECI given) occurs to resp. specialized char sets (GB 2312/GB 18030/Shift JIS)
This commit is contained in:
parent
69876619dd
commit
624d40021e
38 changed files with 5761 additions and 835 deletions
|
@ -1,7 +1,7 @@
|
|||
/* eci.c - Extended Channel Interpretations
|
||||
|
||||
libzint - the open source barcode library
|
||||
Copyright (C) 2009 - 2021 Robin Stuart <rstuart114@gmail.com>
|
||||
Copyright (C) 2009-2022 Robin Stuart <rstuart114@gmail.com>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
@ -28,7 +28,6 @@
|
|||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
*/
|
||||
/* vim: set ts=4 sw=4 et : */
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <malloc.h>
|
||||
|
@ -40,6 +39,7 @@
|
|||
#include "big5.h"
|
||||
#include "gb2312.h"
|
||||
#include "ksx1001.h"
|
||||
#include "gb18030.h"
|
||||
|
||||
/* ECI 20 Shift JIS */
|
||||
static int sjis_wctomb(unsigned char *r, const unsigned int wc) {
|
||||
|
@ -126,6 +126,47 @@ static int euc_kr_wctomb(unsigned char *r, const unsigned int wc) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* ECI 31 GBK Chinese */
|
||||
static int gbk_wctomb(unsigned char *r, const unsigned int wc) {
|
||||
unsigned int c;
|
||||
|
||||
if (wc < 0x80) {
|
||||
*r = (unsigned char) wc;
|
||||
return 1;
|
||||
}
|
||||
if (gbk_wctomb_zint(&c, wc)) {
|
||||
r[0] = (unsigned char) (c >> 8);
|
||||
r[1] = (unsigned char) (c & 0xff);
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ECI 32 GB 18030 Chinese */
|
||||
static int gb18030_wctomb(unsigned char *r, const unsigned int wc) {
|
||||
unsigned int c1, c2;
|
||||
int ret;
|
||||
|
||||
if (wc < 0x80) {
|
||||
*r = (unsigned char) wc;
|
||||
return 1;
|
||||
}
|
||||
ret = gb18030_wctomb_zint(&c1, &c2, wc);
|
||||
if (ret == 2) {
|
||||
r[0] = (unsigned char) (c1 >> 8);
|
||||
r[1] = (unsigned char) (c1 & 0xff);
|
||||
return 2;
|
||||
}
|
||||
if (ret == 4) {
|
||||
r[0] = (unsigned char) (c1 >> 8);
|
||||
r[1] = (unsigned char) (c1 & 0xff);
|
||||
r[2] = (unsigned char) (c2 >> 8);
|
||||
r[3] = (unsigned char) (c2 & 0xff);
|
||||
return 4;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper to count the number of chars in a string within a range */
|
||||
static int chr_range_cnt(const unsigned char string[], const int length, const unsigned char c1,
|
||||
const unsigned char c2) {
|
||||
|
@ -149,8 +190,8 @@ static int chr_range_cnt(const unsigned char string[], const int length, const u
|
|||
|
||||
/* Is ECI convertible from UTF-8? */
|
||||
INTERNAL int is_eci_convertible(const int eci) {
|
||||
if (eci == 26 || (eci > 30 && eci != 170)) { /* Exclude ECI 170 - ASCII Invariant */
|
||||
/* UTF-8 (26) or 8-bit binary data (899) or undefined (> 30 and < 899) or not character set (> 899) */
|
||||
if (eci == 26 || (eci > 35 && eci != 170)) { /* Exclude ECI 170 - ASCII Invariant */
|
||||
/* UTF-8 (26) or 8-bit binary data (899) or undefined (> 35 and < 899) or not character set (> 899) */
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
|
@ -162,16 +203,21 @@ INTERNAL int get_eci_length(const int eci, const unsigned char source[], int len
|
|||
/* Only ASCII backslash (reverse solidus) exceeds UTF-8 length */
|
||||
length += chr_cnt(source, length, '\\');
|
||||
|
||||
} else if (eci == 25) { /* UCS-2BE */
|
||||
} else if (eci == 25 || eci == 33) { /* UTF-16 */
|
||||
/* All ASCII chars take 2 bytes */
|
||||
length += chr_range_cnt(source, length, 0, 0x7F);
|
||||
/* Surrogate pairs are 4 UTF-8 bytes long so fit */
|
||||
|
||||
} else if (eci == 29) { /* GB 2312 (and GB 18030 if Han Xin) */
|
||||
/* Not needed for GB 2312 but allow for GB 18030 4 byters */
|
||||
} else if (eci == 32) { /* GB 18030 */
|
||||
/* Allow for GB 18030 4 byters */
|
||||
length *= 2;
|
||||
|
||||
} else if (eci == 34 || eci == 35) { /* UTF-32 */
|
||||
/* Quadruple-up ASCII and double-up non-ASCII */
|
||||
length += chr_range_cnt(source, length, 0, 0x7F) * 2 + length;
|
||||
}
|
||||
|
||||
/* Big5 and EUC-KR fit in UTF-8 length */
|
||||
/* Big5, GB 2312, EUC-KR and GBK fit in UTF-8 length */
|
||||
|
||||
return length;
|
||||
}
|
||||
|
@ -180,14 +226,15 @@ INTERNAL int get_eci_length(const int eci, const unsigned char source[], int len
|
|||
INTERNAL int utf8_to_eci(const int eci, const unsigned char source[], unsigned char dest[], int *p_length) {
|
||||
|
||||
typedef int (*eci_func_t)(unsigned char *r, const unsigned int wc);
|
||||
static const eci_func_t eci_funcs[31] = {
|
||||
NULL, NULL, NULL, NULL, iso8859_2_wctosb,
|
||||
iso8859_3_wctosb, iso8859_4_wctosb, iso8859_5_wctosb, iso8859_6_wctosb, iso8859_7_wctosb,
|
||||
iso8859_8_wctosb, iso8859_9_wctosb, iso8859_10_wctosb, iso8859_11_wctosb, NULL,
|
||||
iso8859_13_wctosb, iso8859_14_wctosb, iso8859_15_wctosb, iso8859_16_wctosb, NULL,
|
||||
sjis_wctomb, cp1250_wctosb, cp1251_wctosb, cp1252_wctosb, cp1256_wctosb,
|
||||
ucs2be_wctomb, NULL, ascii_wctosb, big5_wctomb, gb2312_wctomb,
|
||||
euc_kr_wctomb,
|
||||
static const eci_func_t eci_funcs[36] = {
|
||||
NULL, NULL, NULL, NULL, iso8859_2_wctosb, /*0-4*/
|
||||
iso8859_3_wctosb, iso8859_4_wctosb, iso8859_5_wctosb, iso8859_6_wctosb, iso8859_7_wctosb, /*5-9*/
|
||||
iso8859_8_wctosb, iso8859_9_wctosb, iso8859_10_wctosb, iso8859_11_wctosb, NULL, /*10-14*/
|
||||
iso8859_13_wctosb, iso8859_14_wctosb, iso8859_15_wctosb, iso8859_16_wctosb, NULL, /*15-19*/
|
||||
sjis_wctomb, cp1250_wctosb, cp1251_wctosb, cp1252_wctosb, cp1256_wctosb, /*20-24*/
|
||||
utf16be_wctomb, NULL, ascii_wctosb, big5_wctomb, gb2312_wctomb, /*25-29*/
|
||||
euc_kr_wctomb, gbk_wctomb, gb18030_wctomb, utf16le_wctomb, utf32be_wctomb, /*30-34*/
|
||||
utf32le_wctomb,
|
||||
};
|
||||
eci_func_t eci_func;
|
||||
unsigned int codepoint, state;
|
||||
|
@ -277,3 +324,5 @@ INTERNAL int get_best_eci(const unsigned char source[], int length) {
|
|||
|
||||
return 26; // If all of these fail, use Unicode!
|
||||
}
|
||||
|
||||
/* vim: set ts=4 sw=4 et : */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue