大文本导入失败产生乱码的原因可能是文本编码导致,改成ansi编码方式重试。从其他库导入的数据有乱码,如下操作。
%global vars;
/*gbk encoding:4e00-9fa5*/
%macro checktable(checktname);
proc contents data=&checktname nodetails noprint
out=_odetial(where=(type=2));
quit;
%let names=;
proc sql noprint;
select name into :names separated by "|" from _odetial ;
quit;
%put names=&names;
%if %quote(&names)^=%quote() %then %do;
%let _i=1;
data &checktname._check;
set &checktname;
%do %while(%quote(%scan(&names,&_i,|))^=%quote());
_tmp&_i=%scan(&names,&_i,|);
_n=length(compress(_tmp&_i,collate(30,126)));
yxgn=_n_;
len=_n;
error=%quote(%scan(&names,&_i,|));
if mod(_n,2)=1 and _n>1 then do;
put "warnning::第" _n_ "行【%scan(&names,&_i,|)】字段存在乱码!具体内容请查看输出结果。" ;
output;
drop _:;
end;
%let _i=%eval(&_i+1);
%end;
;
run;
%end;
%mend;
%macro GenTbl2(checktname);
proc contents data=&checktname nodetails noprint
out=_odetial(where=(type=2));
quit;
%let names=;
proc sql noprint;
select name into :names separated by "||" from _odetial ;
quit;
%let vars=&names;
data &checktname._pass &checktname._err;
set &checktname;
_len=length(compress(&vars,collate(30,126)));
if _len^=1 and mod(_len,2)=1 then output &checktname._err;
else output &checktname._pass;
run;
%mend;
%macro GenTbl2_new(checktname);
proc contents data=&checktname nodetails noprint
out=_odetial(where=(type=2));
quit;
%let names=;
proc sql noprint;
select name into :names separated by "||" from _odetial ;
quit;
%let vars=&names;
data &checktname._pass &checktname._err;
set &checktname;
_ERR=0;
_varyxg=compress(&vars,collate(1,126));
_len=length(compress(&vars,collate(1,126)));
%LET N=1;
%LET VAR=%scan(&vars,&N);
%do %while (%quote(&var)^=%quote());
if _varyxg^='' and mod(_len,2)=1 then do;
_err=_err+1;
return;
end;
%let n=%eval(&n+1);
%let var=%scan(&vars,&n);
%end;
if _err^=0 then output &checktname._err;
else output &checktname._pass;
run;
%mend;
%macro updateTbl(tblname,vars);
data &tblname._chg;
set &tblname;
%let n=1;
%let var=%scan(&vars,&n);
%do %while (%quote(&var)^=%quote());
length _&var $100.;
_&var="";
if compress(&vars,collate(1,126))^='' then do;
do i=1 to length(&var);
if mod(i,2)=1 then do;
x1=rank(substr(&var,i,1));
end;else do;
x2=rank(substr(&var,i,1));
h1=x1;h2=x2;
*put x1= x2= h1= h2=;
if 129<=x1<=254 and 64<=x2<=254 then do;
x=byte(x1)||byte(x2);
*put x=;
_&var=trim(_&var)||x;
end;else do;
put "碰到非法编码范围,则跳过不处理";
leave;
end;
end;
end;
end;
else _&var=&var;
drop &var;
%let n=%eval(&n+1);
%let var=%scan(&vars,&n);
%end;
drop i x1 x2 x h1 h2 _len;
run;
data &tblname._chg;
set &tblname._chg;
%let n=1;
%let var=%scan(&vars,&n);
%do %while (%quote(&var)^=%quote());
&var=_&var;
drop _&var;
%let n=%eval(&n+1);
%let var=%scan(&vars,&n);
%end;
run;
%mend;
*%GenTbl2(mscmbcm.LNBKA);
*%checktable(mscmbcm.LNBKA_err);/*MTUS PERD USLN CUNM NUID UIFG*/
*%updateTbl(mscmbcm.LNBKA_err,MTUS PERD USLN CUNM NUID);
data _null_;
length y $100.;
a="abc我 的 小学校 xx、";
n=length(a);put n;
do i=1 to n;
put i=;
y1=substr(a,i,1);
x1=rank(y);
if x1<=128 then do;
y=trim(y)||y1;
end;
else do;
x2=rank(substr(a,i+1,1));
if 129<=x1<=254 and 64<=x2<=254 then do;
x=byte(x1)||byte(x2);
put x=;
y=trim(y)||x;
i=i+1;
end;
else do;
put "碰到非法编码范围,则跳过不处理";
end;
end;
end;
put y;
run;