diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog index 8931199cb..aca51ddb0 100644 --- a/winsup/cygwin/ChangeLog +++ b/winsup/cygwin/ChangeLog @@ -1,3 +1,9 @@ +2009-04-01 Corinna Vinschen + + * strfuncs.cc (sys_cp_mbstowcs): Check if ASCII SO is followed by at + least two chars and the next byte is a valid UTF-8 start byte before + trying to convert the followup bytes as UTF-8 sequence. + 2009-03-31 Corinna Vinschen * shm.cc (struct shm_attached_list): Convert access type to ULONG. diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index bb3682225..2af6bf3ca 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -425,8 +425,12 @@ sys_cp_mbstowcs (UINT cp, PWCHAR dst, size_t dlen, const char *src, size_t nms) len = (size_t)-1; while (len > 0 && nms > 0) { - /* ASCII SO. Convert following UTF-8 sequence (if not UTF-8 anyway). */ - if (*pmbs == 0x0e && *charset != 'U'/*TF-8*/) + /* ASCII SO. Sanity check: If this is a lead SO byte for a following + UTF-8 sequence, there must be at least two more bytes left, and the + next byte must be a valid UTF-8 start byte. If the charset isn't + UTF-8 anyway, try to convert the following bytes as UTF-8 sequence. */ + if (*pmbs == 0x0e && nms > 2 && *(unsigned char *) (pmbs + 1) >= 0xc2 + && *(unsigned char *) (pmbs + 1) <= 0xf4 && *charset != 'U'/*TF-8*/) { pmbs++; --nms;