Commit 22146219 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

text: fix decoding of 4-bytes UTF-8 sequences

parent 093c7430
...@@ -142,7 +142,7 @@ size_t vlc_towc (const char *str, uint32_t *restrict pwc) ...@@ -142,7 +142,7 @@ size_t vlc_towc (const char *str, uint32_t *restrict pwc)
break; break;
case 4: case 4:
cp = (c & 0x07) << 16; cp = (c & 0x07) << 18;
break; break;
default: default:
...@@ -154,18 +154,18 @@ size_t vlc_towc (const char *str, uint32_t *restrict pwc) ...@@ -154,18 +154,18 @@ size_t vlc_towc (const char *str, uint32_t *restrict pwc)
{ {
case 4: case 4:
c = *++ptr; c = *++ptr;
if (unlikely((c >> 6) != 2)) // not a continuation byte if (unlikely((c & 0xC0) != 0x80)) // not a continuation byte
return -1; return -1;
cp |= (c & 0x3f) << 12; cp |= (c & 0x3F) << 12;
if (unlikely(cp >= 0x110000)) // beyond Unicode range if (unlikely(cp >= 0x110000)) // beyond Unicode range
return -1; return -1;
/* fall through */ /* fall through */
case 3: case 3:
c = *++ptr; c = *++ptr;
if (unlikely((c >> 6) != 2)) // not a continuation byte if (unlikely((c & 0xC0) != 0x80)) // not a continuation byte
return -1; return -1;
cp |= (c & 0x3f) << 6; cp |= (c & 0x3F) << 6;
if (unlikely(cp >= 0xD800 && cp < 0xE000)) // UTF-16 surrogate if (unlikely(cp >= 0xD800 && cp < 0xE000)) // UTF-16 surrogate
return -1; return -1;
...@@ -174,9 +174,9 @@ size_t vlc_towc (const char *str, uint32_t *restrict pwc) ...@@ -174,9 +174,9 @@ size_t vlc_towc (const char *str, uint32_t *restrict pwc)
/* fall through */ /* fall through */
case 2: case 2:
c = *++ptr; c = *++ptr;
if (unlikely((c >> 6) != 2)) // not a continuation byte if (unlikely((c & 0xC0) != 0x80)) // not a continuation byte
return -1; return -1;
cp |= (c & 0x3f); cp |= (c & 0x3F);
break; break;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment