2016-02-10 12 views
5

異なるアルファベットのシンボル(ロシア語と英語など)を含む2つの文字列を比較したいと思います。私は、同様に見えるシンボルがお互いに等しいと考えてほしい。文字列を別のアルファベットのシンボルと比較する

など。 「ママ」の文字「o」は英語のアルファベット(Unicodeではコード043E)から、「Mоm」の文字「о」はロシア語のアルファベット(Unicodeのコード006F)からのものです。だから("Mom" = "Mоm") =>偽ですが、私はそれが真実であることを望みます。いくつかの標準的なSAS機能がありますか、それを行うためにマクロをライトする必要があります。

ありがとうございます!

答えて

1

私はそのようになります:

まず地図を作ります。私はロシア語のどの手紙が英語の手紙に対応しているかを意味します。例:
б= B
в= V
...

私は別のテーブルにまたはmacroVarsとしてこのマップを格納します。 次に、tranwrd関数を使ってマクロループを作成します。このループは作成されたマップをループします。

ここでの例はそうかもしれません。

この変換の後、私はあなたの文字列を比較することができると思います。

0

また、キーボードのレイアウトミスプリントを処理するための関数もコーディングしました。コードは次のとおりです。

/***************************************************************************/ 
/* FUNCTION count_rus_letters RETURNS NUMBER OF CYRILLIC LETTERS IN STRING */ 
/***************************************************************************/ 
proc fcmp outlib=sasuser.userfuncs.mystring; 
FUNCTION count_rus_letters(string $); 
length letter $2; 

rus_count=0; 

len=klength(string); 

do i=1 to len; 
    letter=ksubstr(string,i,1); 
    if letter in ("А","а","Б","б","В","в","Г","г","Д","д","Е","е","Ё","ё","Ж","ж" 
     "З","з","И","и","Й","й","К","к","Л","л","М","м","Н","н","О","о","П","п","Р","р", 
     "С","с","Т","т","У","у","Ф","ф","Х","х","Ц","ц","Ч","ч","Ш","ш","Щ","щ","Ъ","ъ" 
     "Ы","ы","Ь","ь","Э","э","Ю","ю","Я","я") 
    then rus_count+1; 
end; 

return(rus_count); 
endsub; 
run; 

/**************************************************************************/ 
/* FUNCTION count_eng_letters RETURNS NUMBER OF ENGLISH LETTERS IN STRING */ 
/**************************************************************************/ 
proc fcmp outlib=sasuser.userfuncs.mystring; 
FUNCTION count_eng_letters(string $); 
length letter $2; 

eng_count=0; 

len=klength(string); 

do i=1 to len; 
    letter=ksubstr(string,i,1); 
    if rank('A') <= rank(letter) <=rank('z') 
    then eng_count+1; 
end; 

return(eng_count); 
endsub; 
run; 

/**************************************************************************/ 
/* FUNCTION is_string_russian RETURNS 1 IF NUMBER OF RUSSIAN SYMBOLS IN */ 
/* STRING >= NUMBER OF ENGLISH SYMBOLS         */ 
/**************************************************************************/ 
proc fcmp outlib=sasuser.userfuncs.mystring; 
FUNCTION is_string_russian(string $); 
length letter $2 result 8; 

eng_count=0; 
rus_count=0; 

len=klength(string); 

do i=1 to len; 
    letter=ksubstr(string,i,1); 
    if letter in ("А","а","Б","б","В","в","Г","г","Д","д","Е","е","Ё","ё","Ж","ж" 
     "З","з","И","и","Й","й","К","к","Л","л","М","м","Н","н","О","о","П","п","Р","р", 
     "С","с","Т","т","У","у","Ф","ф","Х","х","Ц","ц","Ч","ч","Ш","ш","Щ","щ","Ъ","ъ" 
     "Ы","ы","Ь","ь","Э","э","Ю","ю","Я","я") 
    then rus_count+1; 
    if rank('A') <= rank(letter) <=rank('z') 
    then eng_count+1; 
end; 

if rus_count>=eng_count 
then result=1; 
else result=0; 

return(result); 
endsub; 
run; 

/**************************************************************************/ 
/* FUNCTION fix_layout_misprints REPLACES MISPRINTED SYMBOLS BY ANALYSING */ 
/* LANGUAGE OF THE STRING (FOR ENGLISH STRING RUSSIAN SYMBOLS ARE   */ 
/* REPLACED BY ENGLISH COPIES AND FOR RUSSIAN STRING SYMBOLS ARE   */ 
/* REPLACED BY RUSSIAN COPIES)           */ 
/**************************************************************************/ 
proc fcmp outlib=sasuser.userfuncs.mystring; 
FUNCTION fix_layout_misprints(string $) $ 1000; 
length letter $2 result $1000; 

eng_count=0; 
rus_count=0; 

len=klength(string); 

do i=1 to len; 
    letter=ksubstr(string,i,1); 
    if letter in ("А","а","Б","б","В","в","Г","г","Д","д","Е","е","Ё","ё","Ж","ж" 
     "З","з","И","и","Й","й","К","к","Л","л","М","м","Н","н","О","о","П","п","Р","р", 
     "С","с","Т","т","У","у","Ф","ф","Х","х","Ц","ц","Ч","ч","Ш","ш","Щ","щ","Ъ","ъ" 
     "Ы","ы","Ь","ь","Э","э","Ю","ю","Я","я") 
    then rus_count+1; 
    if rank('A') <= rank(letter) <=rank('z') 
    then eng_count+1; 
end; 

if rus_count>=eng_count 
then result=ktranslate(string,"АаВЕеКкМОоРрСсТХх","AaBEeKkMOoPpCcTXx"); 
else result=ktranslate(string,"AaBEeKkMOoPpCcTXx","АаВЕеКкМОоРрСсТХх"); 

return(result); 
endsub; 
run; 

/***********/ 
/* EXAMPLE */ 
/***********/ 
options cmplib=sasuser.userfuncs; 
data _null_; 
good_str="Иванов"; 
err_str="Ивaнов"; 
fixed_str=fix_layout_misprints(err_str); 

put "Good string=" good_str; 
put "Error string=" err_str; 
put "Fixed string=" fixed_str; 

rus_count_in_err=count_rus_letters(err_str); 
put "Count or Cyrillic symbols in error string=" rus_count_in_err; 

eng_count_in_err=count_eng_letters(err_str); 
put "Count or English symbols in error string=" eng_count_in_err; 

is_error_str_russian=is_string_russian(err_str); 
put "Is error string language Russian=" is_error_str_russian; 

if (good_str ne err_str) 
then put "Before clearing - strings are not equal to each other"; 

if (good_str = fixed_str) 
then put "After clearing - strings are equal to each other"; 
run;