diff -ur gawk-3.1.3.orig/builtin.c gawk-3.1.3/builtin.c --- gawk-3.1.3.orig/builtin.c 2004-10-19 15:53:00.196994888 +0300 +++ gawk-3.1.3/builtin.c 2004-10-19 15:59:58.231443992 +0300 @@ -1595,7 +1595,7 @@ do_tolower(NODE *tree) { NODE *t1, *t2; - register unsigned char *cp, *cp2; + register unsigned char *cp, *cp2, *cp3; #ifdef MBS_SUPPORT size_t mbclen = 0; mbstate_t mbs, prev_mbs; @@ -1608,8 +1608,9 @@ lintwarn(_("tolower: received non-string argument")); t1 = force_string(t1); t2 = tmp_string(t1->stptr, t1->stlen); - for (cp = (unsigned char *)t2->stptr, - cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++) + for (cp = (unsigned char *)t1->stptr, + cp2 = (unsigned char *)(t1->stptr + t1->stlen), + cp3 = (unsigned char *)(t2->stptr); cp < cp2; cp++, cp3++) #ifdef MBS_SUPPORT if (gawk_mb_cur_max > 1) { wchar_t wc; @@ -1621,14 +1622,39 @@ /* a multibyte character. */ if (iswupper(wc)) { wc = towlower(wc); - wcrtomb((char *) cp, wc, &prev_mbs); - } - /* Adjust the pointer. */ - cp += mbclen - 1; + wcrtomb((char *) cp3, wc, &prev_mbs); + + /* Adjust the pointer. + Lowercasing some multibyte chars (such as 0x0130 in tr_TR locale) can produce a singlebyte char */ + if (!isalpha(*cp3)) { + cp += mbclen - 1; + cp3 += mbclen -1; + } + } else + *cp3 = *cp; } else { /* Otherwise we treat it as a singlebyte character. */ - if (ISUPPER(*cp)) - *cp = tolower(*cp); + if (ISUPPER(*cp)) { + unsigned char tc; + wchar_t wc2; + wc2 = btowc(*cp); + wc2 = towlower(wc2); + tc = wctob(wc2); + /* lowercasing some singlebyte chars (such as I in tr_TR locale) can produce a multibyte character */ + if ( tc > (1<<7) ) { + char *tmp; + tmp = strdup(t2->stptr); + realloc(t2->stptr, + (cp2-cp)+sizeof(char)); + t2->stlen++; + strncpy(t2->stptr, tmp, strlen(tmp)); + wcrtomb((char*) cp3, + wc2, &prev_mbs); + cp3++; + } else + *cp3 = tolower(*cp); + } else + *cp3 = *cp; } } else #endif @@ -1644,7 +1670,7 @@ do_toupper(NODE *tree) { NODE *t1, *t2; - register unsigned char *cp, *cp2; + register unsigned char *cp, *cp2, *cp3; #ifdef MBS_SUPPORT size_t mbclen = 0; mbstate_t mbs, prev_mbs; @@ -1657,8 +1683,9 @@ lintwarn(_("toupper: received non-string argument")); t1 = force_string(t1); t2 = tmp_string(t1->stptr, t1->stlen); - for (cp = (unsigned char *)t2->stptr, - cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++) + for (cp = (unsigned char *)t1->stptr, + cp2 = (unsigned char *)(t1->stptr + t1->stlen), + cp3 = (unsigned char *)(t2->stptr); cp < cp2; cp++, cp3++) #ifdef MBS_SUPPORT if (gawk_mb_cur_max > 1) { wchar_t wc; @@ -1670,14 +1697,39 @@ /* a multibyte character. */ if (iswlower(wc)) { wc = towupper(wc); - wcrtomb((char *) cp, wc, &prev_mbs); - } - /* Adjust the pointer. */ - cp += mbclen - 1; + wcrtomb((char *) cp3, wc, &prev_mbs); + + /* Adjust the pointers. + Uppercasing some multibyte chars (such as 0x0131 in tr_TR locale) can produce a singlebyte char */ + if (!isalpha(*cp3)) { + cp += mbclen - 1; + cp3 += mbclen -1; + } + } else + *cp3 = *cp; } else { /* Otherwise we treat it as a singlebyte character. */ - if (ISLOWER(*cp)) - *cp = toupper(*cp); + if (ISLOWER(*cp)) { + unsigned char tc; + wchar_t wc2; + wc2 = btowc(*cp); + wc2 = towupper(wc2); + tc = wctob(wc2); + /* uppercasing some singlebyte chars (such as i in tr_TR locale) can produce a multibyte character */ + if ( tc > (1<<7) ) { + char *tmp; + tmp = strdup(t2->stptr); + realloc(t2->stptr, + (cp2-cp)+sizeof(char)); + t2->stlen++; + strncpy(t2->stptr, tmp, strlen(tmp)); + wcrtomb((char*) cp3, + wc2, &prev_mbs); + cp3++; + } else + *cp3 = toupper(*cp); + } else + *cp3 = *cp; } } else #endif