mysql:1790
From: <takeshi@xxxxxxxxxx>
Date: Tue, 21 Mar 2000 20:18:42 +0900
Subject: [mysql 01790] sjis & order by
MySQL を --with-charset=sjis で作成して、 char 型(binary 属性はつけない)に sjis データを入れた時、 order by が一部、正しく出なかったのを修正するパッチです。 検証してみてください。 3.22.32用のパッチです 現在の MySQL のコードでは、char 型に代入された SJIS 文字の 2byte 目が 0x41-0x5a (=[a-z]), 0x61-0x7a (=[A-Z]) の時に、 2byte 目が ケース非依存でソートされてしまいます。 これを正しく処理するパッチです EUC は、2byte 目も 8bit 文字なので、この問題はおきません。 なお、本パッチは、2バイトコードの SJIS しか考慮していません。 半角かなの場合の処理は手つかずです。 ソースコードは、ほとんど GBK 用のソースのコピーです. -- 村上 毅 takeshi@xxxxxxxxxx --- include/m_ctype.h.in.orig Tue Mar 21 16:43:54 2000 +++ include/m_ctype.h.in Tue Mar 21 15:03:51 2000 @@ -138,6 +138,8 @@ #define ismbhead(c) issjishead(c) #define mbcharlen(c) (issjishead(c)? 2: 0) #define MBMAXLEN 2 +#undef USE_STRCOLL +#define USE_STRCOLL #endif /* Support for Chinese(BIG5) characters, by jou@xxxxxxxxxx --- strings/ctype-sjis.c.orig Tue Mar 21 16:44:12 2000 +++ strings/ctype-sjis.c Tue Mar 21 17:55:23 2000 @@ -1,7 +1,15 @@ /* This file is for Shift JIS charset, and created by tommy@xxxxxxxxxx. */ +#include <stdio.h> #include <global.h> #include "m_string.h" +#include "m_ctype.h" + +#define issjiscode(c,d) (issjishead(c) && issjistail(d)) +#define gbkcode(c,d) (((uchar)(c) <<8) | (uchar)(d)) +#define gbkhead(e) ((uchar)(e>>8)) +#define gbktail(e) ((uchar)(e&0xff)) + uchar NEAR ctype_sjis[257] = { @@ -37,7 +45,7 @@ 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020, - 0020, 0020, 0020, 0020, 0020, 0000, 0000, 0000, + 0020, 0020, 0020, 0020, 0020, 0000, 0000, 0000 }; uchar NEAR to_lower_sjis[]= @@ -73,7 +81,7 @@ '\340','\341','\342','\343','\344','\345','\346','\347', '\350','\351','\352','\353','\354','\355','\356','\357', '\360','\361','\362','\363','\364','\365','\366','\367', - '\370','\371','\372','\373','\374','\375','\376','\377', + '\370','\371','\372','\373','\374','\375','\376','\377' }; uchar NEAR to_upper_sjis[]= @@ -109,7 +117,7 @@ '\340','\341','\342','\343','\344','\345','\346','\347', '\350','\351','\352','\353','\354','\355','\356','\357', '\360','\361','\362','\363','\364','\365','\366','\367', - '\370','\371','\372','\373','\374','\375','\376','\377', + '\370','\371','\372','\373','\374','\375','\376','\377' }; uchar NEAR sort_order_sjis[]= @@ -145,5 +153,146 @@ '\340','\341','\342','\343','\344','\345','\346','\347', '\350','\351','\352','\353','\354','\355','\356','\357', '\360','\361','\362','\363','\364','\365','\366','\367', - '\370','\371','\372','\373','\374','\375','\376','\377', + '\370','\371','\372','\373','\374','\375','\376','\377' }; + +/* +uint16 gbksortorder(uint16 i) +{ + uint index=gbktail(i); + if (index>0x7f) index-=0x41; + else index-=0x40; + index+=(gbkhead(i)-0x81)*0xbe; + return 0x8100+gbk_order[index]; +} +*/ + +uint16 sjissortorder(uint16 i) +{ + return (i); +} + +uint MY_STRXFRM_MULTIPLY=1; + +int my_strnncoll(const uchar * s1, int len1, const uchar * s2, int len2) +{ + uint len,c1,c2; + + len = min(len1,len2); + while (len--) + { + if ((len > 0) && issjiscode(*s1,*(s1+1)) && issjiscode(*s2, *(s2+1))) + { + c1=gbkcode(*s1,*(s1+1)); + c2=gbkcode(*s2,*(s2+1)); + if (c1!=c2) + return ((int) sjissortorder(c1) - (int) sjissortorder(c2)); + s1+=2; + s2+=2; + --len; + } else if (my_sort_order[(uchar) *s1++] != my_sort_order[(uchar) *s2++]) + return ((int) my_sort_order[(uchar) s1[-1]] - + (int) my_sort_order[(uchar) s2[-1]]); + } + return (int) (len1-len2); +} + +int my_strcoll(const uchar * s1, const uchar * s2) +{ + return my_strnncoll(s1,strlen(s1),s2,strlen(s2)); +} + +int my_strnxfrm(uchar * dest, uchar * src, int len, int srclen) +{ + uint16 e; + + len = srclen; + while (len--) + { + if ((len > 0) && issjiscode(*src, *(src+1))) + { + e = sjissortorder(gbkcode(*src, *(src+1))); + *dest++ = gbkhead(e); + *dest++ = gbktail(e); + src+=2; + len--; + } else + *dest++ = my_sort_order[(uchar) *src++]; + } + return srclen; +} + +int my_strxfrm(uchar * dest, uchar * src, int len) +{ + return my_strnxfrm(dest,src,len,strlen(src)); +} + +/* +** Calculate min_str and max_str that ranges a LIKE string. +** Arguments: +** ptr Pointer to LIKE string. +** ptr_length Length of LIKE string. +** escape Escape character in LIKE. (Normally '\'). +** All escape characters should be removed from min_str and max_str +** res_length Length of min_str and max_str. +** min_str Smallest case sensitive string that ranges LIKE. +** Should be space padded to res_length. +** max_str Largest case sensitive string that ranges LIKE. +** Normally padded with the biggest character sort value. +** +** The function should return 0 if ok and 1 if the LIKE string can't be +** optimized ! +*/ + +#define max_sort_char 255 +#define wild_one '_' +#define wild_many '%' + +extern my_bool my_like_range(const char *ptr,uint ptr_length,pchar escape, + uint res_length, char *min_str,char *max_str, + uint *min_length,uint *max_length) +{ + const char *end=ptr+ptr_length; + char *min_org=min_str; + char *min_end=min_str+res_length; + + for (; ptr != end && min_str != min_end ; ptr++) + { + if (ptr+1 != end && issjiscode(ptr[0],ptr[1])) + { + *min_str++= *max_str++ = *ptr++; + *min_str++= *max_str++ = *ptr; + continue; + } + if (*ptr == escape && ptr+1 != end) + { + ptr++; /* Skipp escape */ + *min_str++= *max_str++ = *ptr; + continue; + } + if (*ptr == wild_one) /* '_' in SQL */ + { + *min_str++='\0'; /* This should be min char */ + *max_str++=max_sort_char; + continue; + } + if (*ptr == wild_many) /* '%' in SQL */ + { + *min_length= (uint) (min_str - min_org); + *max_length= res_length; + do { + *min_str++ = '\0'; /* Because if key compression */ + *max_str++ = max_sort_char; + } while (min_str != min_end); + return 0; + } + *min_str++= *max_str++ = *ptr; + } + *min_length= *max_length = (uint) (min_str - min_org); + while (min_str != min_end) + { + *min_str++ = ' '; /* Because if key compression */ + *max_str++ = ' '; + } + return 0; +}
-> 1790 2000-03-21 20:18 [<takeshi@xxxxxxxxxx>] sjis & order by 1792 2000-03-22 01:52 ┗[とみたまさひろ <tomm] 1794 2000-03-22 04:23 ┗[<takeshi@xxxxxxxxxx>]