diff --git a/.vs/rufus.vcxproj b/.vs/rufus.vcxproj index 99adc9f0..f1047b25 100644 --- a/.vs/rufus.vcxproj +++ b/.vs/rufus.vcxproj @@ -410,6 +410,7 @@ + @@ -446,6 +447,7 @@ + diff --git a/.vs/rufus.vcxproj.filters b/.vs/rufus.vcxproj.filters index cd02009c..7a980a63 100644 --- a/.vs/rufus.vcxproj.filters +++ b/.vs/rufus.vcxproj.filters @@ -93,6 +93,9 @@ Source Files + + Source Files + @@ -197,6 +200,9 @@ Header Files + + Header Files + diff --git a/src/Makefile.am b/src/Makefile.am index 59cf87f7..ab03cd2b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -18,7 +18,7 @@ AM_V_WINDRES = $(AM_V_WINDRES_$(V)) $(AM_V_WINDRES) $(AM_RCFLAGS) -i $< -o $@ rufus_SOURCES = badblocks.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c hash.c icon.c iso.c \ - localization.c net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c + localization.c net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c xml.c rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio -I$(srcdir)/../res $(AM_CFLAGS) \ -DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus rufus_LDFLAGS = $(AM_LDFLAGS) -mwindows -L $(srcdir)/../.mingw diff --git a/src/Makefile.in b/src/Makefile.in index c0fa14e6..5f27da84 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -98,7 +98,8 @@ am_rufus_OBJECTS = rufus-badblocks.$(OBJEXT) rufus-dev.$(OBJEXT) \ rufus-rufus.$(OBJEXT) rufus-smart.$(OBJEXT) \ rufus-stdfn.$(OBJEXT) rufus-stdio.$(OBJEXT) \ rufus-stdlg.$(OBJEXT) rufus-syslinux.$(OBJEXT) \ - rufus-ui.$(OBJEXT) rufus-vhd.$(OBJEXT) rufus-wue.$(OBJEXT) + rufus-ui.$(OBJEXT) rufus-vhd.$(OBJEXT) rufus-wue.$(OBJEXT) \ + rufus-xml.$(OBJEXT) rufus_OBJECTS = $(am_rufus_OBJECTS) am__DEPENDENCIES_1 = rufus_DEPENDENCIES = rufus_rc.o bled/libbled.a ext2fs/libext2fs.a \ @@ -285,7 +286,7 @@ AM_V_WINDRES_1 = $(WINDRES) AM_V_WINDRES_ = $(AM_V_WINDRES_$(AM_DEFAULT_VERBOSITY)) AM_V_WINDRES = $(AM_V_WINDRES_$(V)) rufus_SOURCES = badblocks.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c hash.c icon.c iso.c \ - localization.c net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c + localization.c net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c xml.c rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio -I$(srcdir)/../res $(AM_CFLAGS) \ -DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus @@ -504,6 +505,12 @@ rufus-wue.o: wue.c rufus-wue.obj: wue.c $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-wue.obj `if test -f 'wue.c'; then $(CYGPATH_W) 'wue.c'; else $(CYGPATH_W) '$(srcdir)/wue.c'; fi` +rufus-xml.o: xml.c + $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-xml.o `test -f 'xml.c' || echo '$(srcdir)/'`xml.c + +rufus-xml.obj: xml.c + $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-xml.obj `if test -f 'xml.c'; then $(CYGPATH_W) 'xml.c'; else $(CYGPATH_W) '$(srcdir)/xml.c'; fi` + # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, diff --git a/src/license.h b/src/license.h index 9a59e9f5..a14d7131 100644 --- a/src/license.h +++ b/src/license.h @@ -81,6 +81,10 @@ const char* additional_copyrights = "http://www.ridgecrop.demon.co.uk/index.htm?fat32format.htm\\line\n" "GNU General Public License (GPL) v2 or later\\line\n" "\\line\n" +"XML parsing support from ezxml by Aaron Voisine:\\line\n" +"https://ezxml.sourceforge.net/\\line\n" +"MIT license\\line\n" +"\\line\n" "fmifs.dll usage based on Formatx by Mark Russinovich:\\line\n" "https://svn.reactos.org/reactos/trunk/reactos/dll/win32/fmifs\\line\n" "Public Domain\\line\n" diff --git a/src/msapi_utf8.h b/src/msapi_utf8.h index e6ab9ad2..3daef720 100644 --- a/src/msapi_utf8.h +++ b/src/msapi_utf8.h @@ -55,9 +55,9 @@ extern "C" { #define LTEXT(txt) _LTEXT(txt) #define wchar_to_utf8_no_alloc(wsrc, dest, dest_size) \ - WideCharToMultiByte(CP_UTF8, 0, wsrc, -1, dest, dest_size, NULL, NULL) + WideCharToMultiByte(CP_UTF8, 0, wsrc, -1, dest, (int)(dest_size), NULL, NULL) #define utf8_to_wchar_no_alloc(src, wdest, wdest_size) \ - MultiByteToWideChar(CP_UTF8, 0, src, -1, wdest, wdest_size) + MultiByteToWideChar(CP_UTF8, 0, src, -1, wdest, (int)(wdest_size)) #define Edit_ReplaceSelU(hCtrl, str) ((void)SendMessageLU(hCtrl, EM_REPLACESEL, (WPARAM)FALSE, str)) #define ComboBox_AddStringU(hCtrl, str) ((int)(DWORD)SendMessageLU(hCtrl, CB_ADDSTRING, (WPARAM)FALSE, str)) #define ComboBox_InsertStringU(hCtrl, index, str) ((int)(DWORD)SendMessageLU(hCtrl, CB_INSERTSTRING, (WPARAM)index, str)) @@ -83,7 +83,7 @@ extern "C" { #define wfree(p) sfree(w ## p) /* - * Converts an UTF-16 string to UTF8 (allocate returned string) + * Converts an UTF-16 string to UTF8 (allocates returned string) * Returns NULL on error */ static __inline char* wchar_to_utf8(const wchar_t* wstr) @@ -115,7 +115,7 @@ static __inline char* wchar_to_utf8(const wchar_t* wstr) } /* - * Converts an UTF8 string to UTF-16 (allocate returned string) + * Converts an UTF8 string to UTF-16 (allocates returned string) * Returns NULL on error */ static __inline wchar_t* utf8_to_wchar(const char* str) @@ -146,7 +146,7 @@ static __inline wchar_t* utf8_to_wchar(const char* str) } /* -* Converts an non NUL-terminated UTF-16 string of length len to UTF8 (allocate returned string) +* Converts a non NUL-terminated UTF-16 string of length wlen to NUL-terminated UTF8 (allocates returned string) * Returns NULL on error */ static __inline char* wchar_len_to_utf8(const wchar_t* wstr, int wlen) diff --git a/src/rufus.rc b/src/rufus.rc index 53a60431..86fd5f39 100644 --- a/src/rufus.rc +++ b/src/rufus.rc @@ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL IDD_DIALOG DIALOGEX 12, 12, 232, 326 STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU EXSTYLE WS_EX_ACCEPTFILES -CAPTION "Rufus 4.8.2233" +CAPTION "Rufus 4.8.2234" FONT 9, "Segoe UI Symbol", 400, 0, 0x0 BEGIN LTEXT "Drive Properties",IDS_DRIVE_PROPERTIES_TXT,8,6,53,12,NOT WS_GROUP @@ -407,8 +407,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 4,8,2233,0 - PRODUCTVERSION 4,8,2233,0 + FILEVERSION 4,8,2234,0 + PRODUCTVERSION 4,8,2234,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -426,13 +426,13 @@ BEGIN VALUE "Comments", "https://rufus.ie" VALUE "CompanyName", "Akeo Consulting" VALUE "FileDescription", "Rufus" - VALUE "FileVersion", "4.8.2233" + VALUE "FileVersion", "4.8.2234" VALUE "InternalName", "Rufus" VALUE "LegalCopyright", "© 2011-2025 Pete Batard (GPL v3)" VALUE "LegalTrademarks", "https://www.gnu.org/licenses/gpl-3.0.html" VALUE "OriginalFilename", "rufus-4.8.exe" VALUE "ProductName", "Rufus" - VALUE "ProductVersion", "4.8.2233" + VALUE "ProductVersion", "4.8.2234" END END BLOCK "VarFileInfo" diff --git a/src/wue.c b/src/wue.c index 8281a6f4..c2815abf 100644 --- a/src/wue.c +++ b/src/wue.c @@ -25,6 +25,7 @@ #include "rufus.h" #include "vhd.h" +#include "xml.h" #include "drive.h" #include "format.h" #include "missing.h" @@ -418,24 +419,24 @@ out: /// /// Populate the img_report Window version from an install[.wim|.esd] XML index /// -/// The path of the extracted index XML. +/// The index XML data. +/// The length of the index XML data. /// The index of the occurrence to look for. -static void PopulateWindowsVersionFromXml(const char* xml_file, int index) +static void PopulateWindowsVersionFromXml(const char* xml, size_t xml_len, int index) { char* val; + ezxml_t pxml = ezxml_parse_str((char*)xml, xml_len); + if (pxml == NULL) + return; - val = get_token_data_file_indexed("MAJOR", xml_file, index); + val = ezxml_get_val(pxml, "IMAGE", index, "WINDOWS", 0, "VERSION", 0, "MAJOR", -1); img_report.win_version.major = (uint16_t)safe_atoi(val); - free(val); - val = get_token_data_file_indexed("MINOR", xml_file, index); + val = ezxml_get_val(pxml, "IMAGE", index, "WINDOWS", 0, "VERSION", 0, "MINOR", -1); img_report.win_version.minor = (uint16_t)safe_atoi(val); - free(val); - val = get_token_data_file_indexed("BUILD", xml_file, index); + val = ezxml_get_val(pxml, "IMAGE", index, "WINDOWS", 0, "VERSION", 0, "BUILD", -1); img_report.win_version.build = (uint16_t)safe_atoi(val); - free(val); - val = get_token_data_file_indexed("SPBUILD", xml_file, index); + val = ezxml_get_val(pxml, "IMAGE", index, "WINDOWS", 0, "VERSION", 0, "SPBUILD", -1); img_report.win_version.revision = (uint16_t)safe_atoi(val); - free(val); // Adjust versions so that we produce a more accurate report in the log // (and yeah, I know we won't properly report Server, but I don't care) if (img_report.win_version.major <= 5) { @@ -463,10 +464,11 @@ static void PopulateWindowsVersionFromXml(const char* xml_file, int index) if (img_report.win_version.build > 20000) img_report.win_version.major = 11; } + ezxml_free(pxml); } /// -/// Populate the img_report Window version from an an install[.wim|.esd], mounting the +/// Populate the img_report Window version from an install[.wim|.esd]. /// ISO if needed. Requires Windows 8 or later. /// /// (none) @@ -474,7 +476,8 @@ static void PopulateWindowsVersionFromXml(const char* xml_file, int index) BOOL PopulateWindowsVersion(void) { char *mounted_iso, mounted_image_path[128]; - char xml_file[MAX_PATH] = ""; + char xml_file[MAX_PATH] = "", *xml; + size_t xml_len; memset(&img_report.win_version, 0, sizeof(img_report.win_version)); @@ -506,7 +509,12 @@ BOOL PopulateWindowsVersion(void) goto out; } - PopulateWindowsVersionFromXml(xml_file, 1); + xml_len = read_file(xml_file, (uint8_t**)&xml); + if (xml_len == 0) { + uprintf("Could not read WIM index XML"); + goto out; + } + PopulateWindowsVersionFromXml(xml, xml_len, 1); out: DeleteFileU(xml_file); @@ -549,12 +557,14 @@ BOOL CopySKUSiPolicy(const char* drive_name) /// -2 on user cancel, -1 on other error, >=0 on success. int SetWinToGoIndex(void) { - char* mounted_iso, mounted_image_path[128]; - char xml_file[MAX_PATH] = ""; - char* install_names[MAX_WININST]; - StrArray version_name, version_index; int i; + char* mounted_iso, mounted_image_path[128]; + char* install_names[MAX_WININST]; + char xml_file[MAX_PATH] = "", *xml = NULL; + size_t xml_len; + StrArray version_name, version_index; BOOL bNonStandard = FALSE; + ezxml_t index = NULL, image = NULL; // Sanity checks wintogo_index = -1; @@ -600,22 +610,31 @@ int SetWinToGoIndex(void) goto out; } + xml_len = read_file(xml_file, (uint8_t**)&xml); + if (xml_len == 0) { + uprintf("Could not read WIM XML"); + goto out; + } + StrArrayCreate(&version_name, 16); StrArrayCreate(&version_index, 16); - for (i = 0; StrArrayAdd(&version_index, get_token_data_file_indexed("IMAGE INDEX", xml_file, i + 1), FALSE) >= 0; i++) { + index = ezxml_parse_str((char*)xml, xml_len); + if (index == NULL) { + uprintf("Could not parse WIM XML"); + goto out; + } + + for (i = 0, image = ezxml_child(index, "IMAGE"); + StrArrayAdd(&version_index, ezxml_attr(image, "INDEX"), TRUE) >= 0; + image = image->next, i++) { // Some people are apparently creating *unofficial* Windows ISOs that don't have DISPLAYNAME elements. - // If we are parsing such an ISO, try to fall back to using DESCRIPTION. Of course, since we don't use - // a formal XML parser, if an ISO mixes entries with both DISPLAYNAME and DESCRIPTION and others with - // only DESCRIPTION, the version names we report will be wrong. - // But hey, there's only so far I'm willing to go to help people who, not content to have demonstrated - // their utter ignorance on development matters, are also trying to lecture experienced developers - // about specific "noob mistakes"... that don't exist in the code they are trying to criticize. - if (StrArrayAdd(&version_name, get_token_data_file_indexed("DISPLAYNAME", xml_file, i + 1), FALSE) < 0) { - bNonStandard = TRUE; - if (StrArrayAdd(&version_name, get_token_data_file_indexed("DESCRIPTION", xml_file, i + 1), FALSE) < 0) { + // If we are parsing such an ISO, try to fall back to using DESCRIPTION. + if (StrArrayAdd(&version_name, ezxml_child_val(image, "DISPLAYNAME"), TRUE) < 0) { + if (StrArrayAdd(&version_name, ezxml_child_val(image, "DESCRIPTION"), TRUE) < 0) { uprintf("Warning: Could not find a description for image index %d", i + 1); StrArrayAdd(&version_name, "Unknown Windows Version", TRUE); } + bNonStandard = TRUE; } } if (bNonStandard) @@ -632,7 +651,7 @@ int SetWinToGoIndex(void) wintogo_index = atoi(version_index.String[i - 1]); if (i > 0) { // re-populate the version data from the selected XML index - PopulateWindowsVersionFromXml(xml_file, i); + PopulateWindowsVersionFromXml(xml, xml_len, i); // If we couldn't obtain the major and build, we have a problem if (img_report.win_version.major == 0 || img_report.win_version.build == 0) uprintf("Warning: Could not obtain version information from XML index (Nonstandard Windows image?)"); @@ -656,6 +675,8 @@ int SetWinToGoIndex(void) StrArrayDestroy(&version_index); out: + ezxml_free(index); + free(xml); DeleteFileU(xml_file); if (!img_report.is_windows_img) VhdUnmountImage(); diff --git a/src/xml.c b/src/xml.c index 82b11fb9..b344bb73 100644 --- a/src/xml.c +++ b/src/xml.c @@ -24,20 +24,31 @@ #include #include +#include #include #include #include #include #include -#ifndef EZXML_NOMMAP -#include -#endif // EZXML_NOMMAP #include -#include "ezxml.h" +#include "xml.h" +#include "rufus.h" +#include "msapi_utf8.h" +/* Memory leaks detection - define _CRTDBG_MAP_ALLOC as preprocessor macro */ +#ifdef _CRTDBG_MAP_ALLOC +#include +#include +#endif + +#define EZXML_NOMMAP #define EZXML_WS "\t\r\n " // whitespace -#define EZXML_ERRL 128 // maximum error string length +#define EZXML_ERRL 256 // maximum error string length +#ifdef _MSC_VER +#pragma warning(disable:6011) +#pragma warning(disable:4267) +#endif typedef struct ezxml_root *ezxml_root_t; struct ezxml_root { // additional data for the root tag struct ezxml xml; // is a super-struct built on top of ezxml struct @@ -56,6 +67,15 @@ struct ezxml_root { // additional data for the root tag char *EZXML_NIL[] = { NULL }; // empty, null terminated array of strings +// what realloc should be doing all along +static inline void* _realloc(void* ptr, unsigned int size) { + void* old_ptr = ptr; + ptr = realloc(ptr, size); + if (ptr == NULL) + free(old_ptr); + return ptr; +} + // returns the first child tag with the given name or NULL if not found ezxml_t ezxml_child(ezxml_t xml, const char *name) { @@ -64,6 +84,14 @@ ezxml_t ezxml_child(ezxml_t xml, const char *name) return xml; } +// returns the value of the first child tag with the given name or NULL if not found +char* ezxml_child_val(ezxml_t xml, const char* name) +{ + xml = (xml) ? xml->child : NULL; + while (xml && strcmp(name, xml->name)) xml = xml->sibling; + return xml ? xml->txt : NULL; +} + // returns the Nth tag with the same name in the same subsection or NULL if not // found ezxml_t ezxml_idx(ezxml_t xml, int idx) @@ -119,6 +147,19 @@ ezxml_t ezxml_get(ezxml_t xml, ...) return r; } +// Same as above but returns the text value or NULL if not found +char* ezxml_get_val(ezxml_t xml, ...) +{ + va_list ap; + ezxml_t r; + + va_start(ap, xml); + r = ezxml_vget(xml, ap); + va_end(ap); + return r ? r->txt : NULL; +} + + // returns a null terminated array of processing instructions for the given // target const char **ezxml_pi(ezxml_t xml, const char *target) @@ -138,13 +179,14 @@ ezxml_t ezxml_err(ezxml_root_t root, char *s, const char *err, ...) va_list ap; int line = 1; char *t, fmt[EZXML_ERRL]; - + for (t = root->s; t < s; t++) if (*t == '\n') line++; - snprintf(fmt, EZXML_ERRL, "[error near line %d]: %s", line, err); + snprintf(fmt, EZXML_ERRL, "xml parsing error near line %d: %s", line, err); va_start(ap, err); vsnprintf(root->err, EZXML_ERRL, fmt, ap); va_end(ap); + uprintf("%s", root->err); return &root->xml; } @@ -168,7 +210,7 @@ char *ezxml_decode(char *s, char **ent, char t) } for (s = r; ; ) { - while (*s && *s != '&' && (*s != '%' || t != '%') && !isspace(*s)) s++; + while (*s && *s != '&' && (*s != '%' || t != '%') && !isspace((uint8_t)*s)) s++; if (! *s) break; else if (t != 'c' && ! strncmp(s, "&#", 2)) { // character reference @@ -176,11 +218,13 @@ char *ezxml_decode(char *s, char **ent, char t) else c = strtol(s + 2, &e, 10); // base 10 if (! c || *e != ';') { s++; continue; } // not a character ref - if (c < 0x80) *(s++) = c; // US-ASCII subset + if (c < 0x80) *(s++) = (char)c; // US-ASCII subset else { // multi-byte UTF-8 sequence for (b = 0, d = c; d; d /= 2) b++; // number of bits in c + // UTF-8 can ecode max 36 bits (standard says 21) - noop on 32 bit. + if (b > 36) { s++; continue; } b = (b - 2) / 5; // number of bytes in payload - *(s++) = (0xFF << (7 - b)) | (c >> (6 * b)); // head + *(s++) = (char)(0xFF << (7 - b)) | (char)(c >> (6 * b)); // head while (b) *(s++) = 0x80 | ((c >> (6 * --b)) & 0x3F); // payload } @@ -192,24 +236,24 @@ char *ezxml_decode(char *s, char **ent, char t) b += 2); // find entity in entity list if (ent[b++]) { // found a match - if ((c = strlen(ent[b])) - 1 > (e = strchr(s, ';')) - s) { - l = (d = (s - r)) + c + strlen(e); // new length - r = (r == m) ? strcpy(malloc(l), r) : realloc(r, l); + if ((c = (long)strlen(ent[b])) - 1 > (e = strchr(s, ';')) - s) { + l = (d = (long)(s - r)) + c + (long)strlen(e); // new length + r = (r == m) ? strcpy(malloc(l), r) : _realloc(r, l); e = strchr((s = r + d), ';'); // fix up pointers } memmove(s + c, e + 1, strlen(e)); // shift rest of string - strncpy(s, ent[b], c); // copy in replacement text + strncpy_s(s, c, ent[b], _TRUNCATE); // copy in replacement text } else s++; // not a known entity } - else if ((t == ' ' || t == '*') && isspace(*s)) *(s++) = ' '; + else if ((t == ' ' || t == '*') && isspace((uint8_t)*s)) *(s++) = ' '; else s++; // no decoding needed } if (t == '*') { // normalize spaces for non-cdata attributes for (s = r; *s; s++) { - if ((l = strspn(s, " "))) memmove(s, s + l, strlen(s + l) + 1); + if ((l = (long)strspn(s, " "))) memmove(s, s + l, strlen(s + l) + 1); while (*s && *s != ' ') s++; } if (--s >= r && *s == ' ') *s = '\0'; // trim any trailing space @@ -300,16 +344,16 @@ void ezxml_proc_inst(ezxml_root_t root, char *s, size_t len) while (root->pi[i] && strcmp(target, root->pi[i][0])) i++; // find target if (! root->pi[i]) { // new target - root->pi = realloc(root->pi, sizeof(char **) * (i + 2)); + root->pi = _realloc(root->pi, sizeof(char **) * (i + 2)); root->pi[i] = malloc(sizeof(char *) * 3); root->pi[i][0] = target; root->pi[i][1] = (char *)(root->pi[i + 1] = NULL); // terminate pi list - root->pi[i][2] = strdup(""); // empty document position list + root->pi[i][2] = _strdup(""); // empty document position list } while (root->pi[i][j]) j++; // find end of instruction list for this target - root->pi[i] = realloc(root->pi[i], sizeof(char *) * (j + 3)); - root->pi[i][j + 2] = realloc(root->pi[i][j + 1], j + 1); + root->pi[i] = _realloc(root->pi[i], sizeof(char *) * (j + 3)); + root->pi[i][j + 2] = _realloc(root->pi[i][j + 1], j + 1); strcpy(root->pi[i][j + 2] + j - 1, (root->xml.name) ? ">" : "<"); root->pi[i][j + 1] = NULL; // null terminate pi list for this target root->pi[i][j] = s; // set instruction @@ -320,6 +364,7 @@ short ezxml_internal_dtd(ezxml_root_t root, char *s, size_t len) { char q, *c, *t, *n = NULL, *v, **ent, **pe; int i, j; + size_t n_len, n_off; pe = memcpy(malloc(sizeof(EZXML_NIL)), EZXML_NIL, sizeof(EZXML_NIL)); @@ -330,7 +375,13 @@ short ezxml_internal_dtd(ezxml_root_t root, char *s, size_t len) else if (! strncmp(s, "= n_len) { + ezxml_err(root, NULL, "write past buffer (ent; ent[i]; i++); - ent = realloc(ent, (i + 3) * sizeof(char *)); // space for next ent + ent = _realloc(ent, (i + 3) * sizeof(char *)); // space for next ent if (*c == '%') pe = ent; else root->ent = ent; @@ -359,9 +410,10 @@ short ezxml_internal_dtd(ezxml_root_t root, char *s, size_t len) if (! *t) { ezxml_err(root, t, "unclosed ")) == '>') continue; else *s = '\0'; // null terminate tag name - for (i = 0; root->attr[i] && strcmp(n, root->attr[i][0]); i++); + for (i = 0; n && root->attr[i] && strcmp(n, root->attr[i][0]); i++); - while (*(n = ++s + strspn(s, EZXML_WS)) && *n != '>') { + ++s; + while (*(n = s + strspn(s, EZXML_WS)) && *n != '>') { if (*(s = n + strcspn(n, EZXML_WS))) *s = '\0'; // attr name else { ezxml_err(root, t, "malformed attr[i]) { // new tag name root->attr = (! i) ? malloc(2 * sizeof(char **)) - : realloc(root->attr, + : _realloc(root->attr, (i + 2) * sizeof(char **)); root->attr[i] = malloc(2 * sizeof(char *)); root->attr[i][0] = t; // set tag name @@ -394,7 +446,7 @@ short ezxml_internal_dtd(ezxml_root_t root, char *s, size_t len) } for (j = 1; root->attr[i][j]; j += 3); // find end of list - root->attr[i] = realloc(root->attr[i], + root->attr[i] = _realloc(root->attr[i], (j + 4) * sizeof(char *)); root->attr[i][j + 3] = NULL; // null terminate list @@ -438,23 +490,23 @@ char *ezxml_str2utf8(char **s, size_t *len) c = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000; } - while (l + 6 > max) u = realloc(u, max += EZXML_BUFSIZE); - if (c < 0x80) u[l++] = c; // US-ASCII subset + while (l + 6 > max) u = _realloc(u, max += EZXML_BUFSIZE); + if (c < 0x80) u[l++] = (char)c; // US-ASCII subset else { // multi-byte UTF-8 sequence for (b = 0, d = c; d; d /= 2) b++; // bits in c b = (b - 2) / 5; // bytes in payload - u[l++] = (0xFF << (7 - b)) | (c >> (6 * b)); // head + u[l++] = (char)(0xFF << (7 - b)) | (char)(c >> (6 * b)); // head while (b) u[l++] = 0x80 | ((c >> (6 * --b)) & 0x3F); // payload } } - return *s = realloc(u, *len = l); + return *s = _realloc(u, *len = l); } // frees a tag attribute list void ezxml_free_attr(char **attr) { int i = 0; char *m; - + if (! attr || attr == EZXML_NIL) return; // nothing to free while (attr[i]) i += 2; // find end of attribute list m = attr[i + 1]; // list of which names and values are malloced @@ -473,6 +525,7 @@ ezxml_t ezxml_parse_str(char *s, size_t len) char q, e, *d, **attr, **a = NULL; // initialize a to avoid compile warning int l, i, j; + if (!root) return NULL; root->m = s; if (! len) return ezxml_err(root, NULL, "root tag missing"); root->u = ezxml_str2utf8(&s, &len); // convert utf-16 to utf-8 @@ -493,7 +546,7 @@ ezxml_t ezxml_parse_str(char *s, size_t len) return ezxml_err(root, d, "markup outside of root element"); s += strcspn(s, EZXML_WS "/>"); - while (isspace(*s)) *(s++) = '\0'; // null terminate tag name + while (isspace((uint8_t)*s)) *(s++) = '\0'; // null terminate tag name if (*s && *s != '/' && *s != '>') // find tag in default attr list for (i = 0; (a = root->attr[i]) && strcmp(a[0], d); i++); @@ -509,7 +562,7 @@ ezxml_t ezxml_parse_str(char *s, size_t len) attr[l] = s; // set attribute name s += strcspn(s, EZXML_WS "=/>"); - if (*s == '=' || isspace(*s)) { + if (*s == '=' || isspace((uint8_t)*s)) { *(s++) = '\0'; // null terminate tag attribute name q = *(s += strspn(s, EZXML_WS "=")); if (q == '"' || q == '\'') { // attribute value @@ -528,7 +581,7 @@ ezxml_t ezxml_parse_str(char *s, size_t len) attr[l + 3][l / 2] = EZXML_TXTM; // value malloced } } - while (isspace(*s)) s++; + while (isspace((uint8_t)*s)) s++; } if (*s == '/') { // self closing tag @@ -555,7 +608,7 @@ ezxml_t ezxml_parse_str(char *s, size_t len) if (! (q = *s) && e != '>') return ezxml_err(root, d, "missing >"); *s = '\0'; // temporarily null terminate tag name if (ezxml_close_tag(root, d, s)) return &root->xml; - if (isspace(*s = q)) s += strspn(s, EZXML_WS); + if (isspace((uint8_t)(*s = q))) s += strspn(s, EZXML_WS); } else if (! strncmp(s, "!--", 3)) { // xml comment if (! (s = strstr(s + 3, "--")) || (*(s += 2) != '>' && *s) || @@ -611,11 +664,11 @@ ezxml_t ezxml_parse_fp(FILE *fp) if (! (s = malloc(EZXML_BUFSIZE))) return NULL; do { len += (l = fread((s + len), 1, EZXML_BUFSIZE, fp)); - if (l == EZXML_BUFSIZE) s = realloc(s, len + EZXML_BUFSIZE); + if (l == EZXML_BUFSIZE) s = _realloc(s, len + EZXML_BUFSIZE); } while (s && l == EZXML_BUFSIZE); if (! s) return NULL; - root = (ezxml_root_t)ezxml_parse_str(s, len); + if (!(root = (ezxml_root_t)ezxml_parse_str(s, len))) { free(s); return NULL; }; root->len = -1; // so we know to free s in ezxml_free() return &root->xml; } @@ -639,12 +692,14 @@ ezxml_t ezxml_parse_fd(int fd) MAP_FAILED) { madvise(m, l, MADV_SEQUENTIAL); // optimize for sequential access root = (ezxml_root_t)ezxml_parse_str(m, st.st_size); + if (!root) { munmap(m,l); return NULL; }; madvise(m, root->len = l, MADV_NORMAL); // put it back to normal } else { // mmap failed, read file into memory #endif // EZXML_NOMMAP - l = read(fd, m = malloc(st.st_size), st.st_size); + l = (long)_read(fd, m = malloc((size_t)st.st_size), (unsigned int)st.st_size); root = (ezxml_root_t)ezxml_parse_str(m, l); + if (!root) { free(m); return NULL; }; root->len = -1; // so we know to free s in ezxml_free() #ifndef EZXML_NOMMAP } @@ -655,10 +710,10 @@ ezxml_t ezxml_parse_fd(int fd) // a wrapper for ezxml_parse_fd that accepts a file name ezxml_t ezxml_parse_file(const char *file) { - int fd = open(file, O_RDONLY, 0); + int fd = _openU(file, O_RDONLY, 0); ezxml_t xml = ezxml_parse_fd(fd); - if (fd >= 0) close(fd); + if (fd >= 0) _close(fd); return xml; } @@ -670,7 +725,7 @@ char *ezxml_ampencode(const char *s, size_t len, char **dst, size_t *dlen, const char *e; for (e = s + len; s != e; s++) { - while (*dlen + 10 > *max) *dst = realloc(*dst, *max += EZXML_BUFSIZE); + while (*dlen + 10 > *max) *dst = _realloc(*dst, *max += EZXML_BUFSIZE); switch (*s) { case '\0': return *dst; @@ -701,13 +756,13 @@ char *ezxml_toxml_r(ezxml_t xml, char **s, size_t *len, size_t *max, *s = ezxml_ampencode(txt + start, xml->off - start, s, len, max, 0); while (*len + strlen(xml->name) + 4 > *max) // reallocate s - *s = realloc(*s, *max += EZXML_BUFSIZE); + *s = _realloc(*s, *max += EZXML_BUFSIZE); *len += sprintf(*s + *len, "<%s", xml->name); // open tag for (i = 0; xml->attr[i]; i += 2) { // tag attributes if (ezxml_attr(xml, xml->attr[i]) != xml->attr[i + 1]) continue; while (*len + strlen(xml->attr[i]) + 7 > *max) // reallocate s - *s = realloc(*s, *max += EZXML_BUFSIZE); + *s = _realloc(*s, *max += EZXML_BUFSIZE); *len += sprintf(*s + *len, " %s=\"", xml->attr[i]); ezxml_ampencode(xml->attr[i + 1], -1, s, len, max, 1); @@ -719,7 +774,7 @@ char *ezxml_toxml_r(ezxml_t xml, char **s, size_t *len, size_t *max, if (! attr[i][j + 1] || ezxml_attr(xml, attr[i][j]) != attr[i][j + 1]) continue; // skip duplicates and non-values while (*len + strlen(attr[i][j]) + 7 > *max) // reallocate s - *s = realloc(*s, *max += EZXML_BUFSIZE); + *s = _realloc(*s, *max += EZXML_BUFSIZE); *len += sprintf(*s + *len, " %s=\"", attr[i][j]); ezxml_ampencode(attr[i][j + 1], -1, s, len, max, 1); @@ -731,7 +786,7 @@ char *ezxml_toxml_r(ezxml_t xml, char **s, size_t *len, size_t *max, : ezxml_ampencode(xml->txt, -1, s, len, max, 0); //data while (*len + strlen(xml->name) + 4 > *max) // reallocate s - *s = realloc(*s, *max += EZXML_BUFSIZE); + *s = _realloc(*s, *max += EZXML_BUFSIZE); *len += sprintf(*s + *len, "", xml->name); // close tag @@ -747,10 +802,12 @@ char *ezxml_toxml(ezxml_t xml) ezxml_t p = (xml) ? xml->parent : NULL, o = (xml) ? xml->ordered : NULL; ezxml_root_t root = (ezxml_root_t)xml; size_t len = 0, max = EZXML_BUFSIZE; - char *s = strcpy(malloc(max), ""), *t, *n; + char *s = malloc(max), *t, *n; int i, j, k; - if (! xml || ! xml->name) return realloc(s, len + 1); + if (!s) return (NULL); + s = strcpy(s, ""); + if (! xml || ! xml->name) return _realloc(s, len + 1); while (root->xml.parent) root = (ezxml_root_t)root->xml.parent; // root tag for (i = 0; ! p && root->pi[i]; i++) { // pre-root processing instructions @@ -758,7 +815,7 @@ char *ezxml_toxml(ezxml_t xml) for (j = 1; (n = root->pi[i][j]); j++) { if (root->pi[i][k][j - 1] == '>') continue; // not pre-root while (len + strlen(t = root->pi[i][0]) + strlen(n) + 7 > max) - s = realloc(s, max += EZXML_BUFSIZE); + s = _realloc(s, max += EZXML_BUFSIZE); len += sprintf(s + len, "\n", t, *n ? " " : "", n); } } @@ -773,11 +830,11 @@ char *ezxml_toxml(ezxml_t xml) for (j = 1; (n = root->pi[i][j]); j++) { if (root->pi[i][k][j - 1] == '<') continue; // not post-root while (len + strlen(t = root->pi[i][0]) + strlen(n) + 7 > max) - s = realloc(s, max += EZXML_BUFSIZE); + s = _realloc(s, max += EZXML_BUFSIZE); len += sprintf(s + len, "\n", t, *n ? " " : "", n); } } - return realloc(s, len + 1); + return _realloc(s, len + 1); } // free the memory allocated for the ezxml structure @@ -835,12 +892,15 @@ ezxml_t ezxml_new(const char *name) { static char *ent[] = { "lt;", "<", "gt;", ">", "quot;", """, "apos;", "'", "amp;", "&", NULL }; - ezxml_root_t root = (ezxml_root_t)memset(malloc(sizeof(struct ezxml_root)), - '\0', sizeof(struct ezxml_root)); + ezxml_root_t root; + char **p_ent; + if (!(root = malloc(sizeof(struct ezxml_root)))) return NULL; + if (!(p_ent = malloc(sizeof(ent)))) { free(root); return NULL; }; + root = (ezxml_root_t)memset(root, '\0', sizeof(struct ezxml_root)); root->xml.name = (char *)name; root->cur = &root->xml; strcpy(root->err, root->xml.txt = ""); - root->ent = memcpy(malloc(sizeof(ent)), ent, sizeof(ent)); + root->ent = memcpy(p_ent, ent, sizeof(ent)); root->attr = root->pi = (char ***)(root->xml.attr = EZXML_NIL); return &root->xml; } @@ -924,15 +984,17 @@ ezxml_t ezxml_set_attr(ezxml_t xml, const char *name, const char *value) if (! xml->attr[l]) { // not found, add as new attribute if (! value) return xml; // nothing to do if (xml->attr == EZXML_NIL) { // first attribute + if (l >= 1) return NULL; xml->attr = malloc(4 * sizeof(char *)); - xml->attr[1] = strdup(""); // empty list of malloced names/vals + xml->attr[1] = _strdup(""); // empty list of malloced names/vals } - else xml->attr = realloc(xml->attr, (l + 4) * sizeof(char *)); + else xml->attr = _realloc(xml->attr, (l + 4) * sizeof(char *)); xml->attr[l] = (char *)name; // set attribute name xml->attr[l + 2] = NULL; // null terminate attribute list xml->attr[l + 3] = realloc(xml->attr[l + 1], - (c = strlen(xml->attr[l + 1])) + 2); + (c =(long)strlen(xml->attr[l + 1])) + 2); + if (!xml->attr[l + 3]) return NULL; strcpy(xml->attr[l + 3] + c, " "); // set name/value as not malloced if (xml->flags & EZXML_DUP) xml->attr[l + 3][c] = EZXML_NAMEM; } @@ -947,7 +1009,7 @@ ezxml_t ezxml_set_attr(ezxml_t xml, const char *name, const char *value) else { // remove attribute if (xml->attr[c + 1][l / 2] & EZXML_NAMEM) free(xml->attr[l]); memmove(xml->attr + l, xml->attr + l + 2, (c - l + 2) * sizeof(char*)); - xml->attr = realloc(xml->attr, (c + 2) * sizeof(char *)); + xml->attr = _realloc(xml->attr, (c + 2) * sizeof(char *)); memmove(xml->attr[c + 1] + (l / 2), xml->attr[c + 1] + (l / 2) + 1, (c / 2) - (l / 2)); // fix list of which name/vals are malloced } @@ -1006,8 +1068,8 @@ int main(int argc, char **argv) if (argc != 2) return fprintf(stderr, "usage: %s xmlfile\n", argv[0]); xml = ezxml_parse_file(argv[1]); - printf("%s\n", (s = ezxml_toxml(xml))); - free(s); + s = ezxml_toxml(xml); + if (s) { printf("%s\n", s); free(s); } i = fprintf(stderr, "%s", ezxml_error(xml)); ezxml_free(xml); return (i) ? 1 : 0; diff --git a/src/xml.h b/src/xml.h index 3e020788..1b5bf131 100644 --- a/src/xml.h +++ b/src/xml.h @@ -76,6 +76,10 @@ ezxml_t ezxml_parse_fp(FILE *fp); // if not found ezxml_t ezxml_child(ezxml_t xml, const char *name); +// returns the value of the first child tag (one level deeper) with the given +// name or NULL if not found +char* ezxml_child_val(ezxml_t xml, const char* name); + // returns the next tag of the same name in the same section and depth or NULL // if not found #define ezxml_next(xml) ((xml) ? xml->next : NULL) @@ -101,6 +105,9 @@ const char *ezxml_attr(ezxml_t xml, const char *attr); // Returns NULL if not found. ezxml_t ezxml_get(ezxml_t xml, ...); +// Same as above but returns the text value or NULL if not found +char* ezxml_get_val(ezxml_t xml, ...); + // Converts an ezxml structure back to xml. Returns a string of xml data that // must be freed. char *ezxml_toxml(ezxml_t xml);