...
 
Commits (3)
......@@ -92,7 +92,7 @@ enum tag_identifier decode_descriptor_tag(const uint8_t *buf)
}
if (checksum != buf[4]) {
return ECMA_TAG_NONE;
return ECMA_TAG_INVALID;
}
return (enum tag_identifier)id;
......
......@@ -99,7 +99,7 @@ enum tag_identifier {
ECMA_FileEntry = 261,
ECMA_ExtendedFileEntry = 266,
ECMA_TAG_NONE = -1,
ECMA_TAG_INVALID = -1, /* checksum failed */
};
enum tag_identifier decode_descriptor_tag(const uint8_t *buf);
......
......@@ -140,9 +140,16 @@ static void *_safe_realloc(void *p, size_t s)
* Decoding
*/
#define utf16lo_to_utf8(out, out_pos, out_size, ch) \
/*
* outputs Modified UTF-8 (MUTF-8).
* The null character (U+0000) uses the two-byte overlong encoding 11000000 10000000 (hexadecimal C0 80), instead of 00000000 (hexadecimal 00).
*
* - not strictly UTF-8 compilant, but works with C str*() functions and Java, while \0 bytes in middle of strings won't.
*/
#define utf16lo_to_mutf8(out, out_pos, out_size, ch) \
do { \
if (ch < 0x80) { \
if (ch != 0 && ch < 0x80) { \
out[out_pos++] = (uint8_t)ch; \
} else { \
out_size++; \
......@@ -154,10 +161,10 @@ static void *_safe_realloc(void *p, size_t s)
} \
} while (0)
#define utf16_to_utf8(out, out_pos, out_size, ch) \
#define utf16_to_mutf8(out, out_pos, out_size, ch) \
do { \
if (ch < 0x7ff) { \
utf16lo_to_utf8(out, out_pos, out_size, ch); \
utf16lo_to_mutf8(out, out_pos, out_size, ch); \
} else { \
out_size += 2; \
out = (uint8_t *)_safe_realloc(out, out_size); \
......@@ -171,7 +178,7 @@ static void *_safe_realloc(void *p, size_t s)
} while (0)
/* Strings, CS0 (UDF 2.1.1) */
static char *_cs0_to_utf8(const uint8_t *cs0, size_t size)
static char *_cs0_to_mutf8(const uint8_t *cs0, size_t size)
{
size_t out_pos = 0;
size_t out_size = size;
......@@ -193,13 +200,13 @@ static char *_cs0_to_utf8(const uint8_t *cs0, size_t size)
case 8:
/*udf_trace("string in utf-8\n");*/
for (i = 1; i < size; i++) {
utf16lo_to_utf8(out, out_pos, out_size, cs0[i]);
utf16lo_to_mutf8(out, out_pos, out_size, cs0[i]);
}
break;
case 16:
for (i = 1; i < size - 1; i+=2) {
uint16_t ch = cs0[i + 1] | (cs0[i] << 8);
utf16_to_utf8(out, out_pos, out_size, ch);
utf16_to_mutf8(out, out_pos, out_size, ch);
}
break;
default:
......@@ -666,8 +673,8 @@ static int _parse_udf_partition_maps(udfread_block_input *input,
*/
struct udf_file_identifier {
char *filename;
struct long_ad icb;
char *filename; /* MUTF-8 */
struct long_ad icb; /* location of file entry */
uint8_t characteristic; /* CHAR_FLAG_* */
};
......@@ -954,7 +961,7 @@ static int _parse_dir(const uint8_t *data, uint32_t length, struct udf_dir *dir)
dir->files[dir->num_entries].characteristic = fid.characteristic;
dir->files[dir->num_entries].icb = fid.icb;
dir->files[dir->num_entries].filename = _cs0_to_utf8(fid.filename, fid.filename_len);
dir->files[dir->num_entries].filename = _cs0_to_mutf8(fid.filename, fid.filename_len);
if (!dir->files[dir->num_entries].filename) {
continue;
......@@ -1222,10 +1229,12 @@ int udfread_open_input(udfread *udf, udfread_block_input *input/*, int partition
}
/* Volume Identifier. CS0, UDF 2.1.1 */
udf->volume_identifier = _cs0_to_utf8(vds.pvd.volume_identifier, vds.pvd.volume_identifier_length);
udf->volume_identifier = _cs0_to_mutf8(vds.pvd.volume_identifier, vds.pvd.volume_identifier_length);
if (udf->volume_identifier) {
udf_log("Volume Identifier: %s\n", udf->volume_identifier);
}
memcpy(udf->volume_set_identifier, vds.pvd.volume_set_identifier, 128);
udf_log("Volume Identifier: %s\n", udf->volume_identifier);
/* map partitions */
if (_parse_udf_partition_maps(input, &udf->part, &vds) < 0) {
......
......@@ -34,6 +34,15 @@ extern "C" {
* external API header
*/
/*
* NOTE:
*
* UDF filesystem file identifiers may contain nul bytes (0x00).
*
* In libudfread API file and directory names are encoded as Modified UTF-8 (MUTF-8).
* The null character (U+0000) uses two-byte overlong encoding 11000000 10000000
* (hexadecimal C0 80) instead of 00000000 (hexadecimal 00).
*/
/*
* UDF volume access
......@@ -80,7 +89,7 @@ void udfread_close (udfread *);
* Get UDF Volume Identifier
*
* @param p udfread object
* @return Volume ID as null-terminated UTF-8 string, NULL if error
* @return Volume ID as null-terminated MUTF-8 string, NULL if error. Returned pointer is valid until udfread_close().
*/
const char *udfread_get_volume_id (udfread *);
......@@ -109,7 +118,7 @@ enum {
/* Directory stream entry */
struct udfread_dirent {
unsigned int d_type; /* UDF_DT_* */
const char *d_name; /* UTF-8 */
const char *d_name; /* MUTF-8 */
};
/* opaque handle for directory stream */
......@@ -119,8 +128,8 @@ typedef struct udfread_dir UDFDIR;
* Open directory stream
*
* @param p udfread object
* @param path path to the directory
* @return directory stream on the directory, or NULL if it could not be opened.
* @param path path to the directory (MUTF-8)
* @return directory stream handle on the directory, or NULL if it could not be opened.
*/
UDFDIR *udfread_opendir (udfread *, const char *path);
......@@ -129,9 +138,9 @@ UDFDIR *udfread_opendir (udfread *, const char *path);
*
* Directory name may contain special chars (/, \, ...).
*
* @param dir parent directory handle (NULL for root directory)
* @param name name of the directory to open from dir
* @return directory stream on the directory, or NULL if it could not be opened.
* @param dir parent directory stream handle
* @param name name of the directory to open from dir (MUTF-8)
* @return directory stream handle on the directory, or NULL if it could not be opened.
*/
UDFDIR *udfread_opendir_at(UDFDIR *dir, const char *name);
......@@ -188,7 +197,7 @@ typedef struct udfread_file UDFFILE;
* Path may not contain "." or ".." directory components.
*
* @param p udfread object
* @param path path to the file
* @param path path to the file (MUTF-8)
* @return file object, or NULL if it could not be opened.
*/
UDFFILE *udfread_file_open (udfread *, const char *path);
......@@ -198,8 +207,8 @@ UDFFILE *udfread_file_open (udfread *, const char *path);
*
* File name may contain special chars (/, \, ...).
*
* @param dir parent directory handle
* @param name name of the file
* @param dir parent directory stream handle
* @param name name of the file (MUTF-8)
* @return file object, or NULL if it could not be opened.
*/
UDFFILE *udfread_file_openat (UDFDIR *dir, const char *name);
......