用java POI进行excel数据导入时,总会发现有一堆隐藏的脏数据,如全角的空格、编码转换后变成乱码等,通过trim()等函数不容易进行清理,所以就整理了下需要清除的项目,并写成工具类,以便以后使用。
import java.util.regex.Matcher;
import java.util.regex.Pattern;import org.springframework.stereotype.Controller;
/**
* Created by liuhh on 2018/1/1.
* 编码转换 全、半角转换、字符集转换
*/
public class transitionChar {
//转码去除\s:空格、\n:换行符、\t:tab、\?:问号
public static void main(String[] ags){
String str = " 。。。。。。gg g ggg gg gg ggg ?";
System.out.print(transition(ToDBC(str),"GB2312").replaceAll("\\?","").replaceAll("\n","").replaceAll("\t","").replaceAll("\\s",""));
System.out.print(transition(ToSBC(str),"GB2312").replaceAll("\\?",""));
}
/**
* 统一转码为一种格式
* @param str 文本
* @param Encode 目标字符集
* @return
*/
public static String transition(String str,String Encode){
try {
String strEncode = getEncoding(str);
String temp = new String(str.getBytes(strEncode), Encode);
return temp;
} catch (java.io.IOException ex) {
return null;
}
}
/**
* 获取文本所用字符集
* @param str 文本
* @return
*/
public static String getEncoding(String str) {
String encode = "ISO-8859-1";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
return encode;
}
} catch (Exception exception) {
}
encode = "GB2312";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
return encode;
}
} catch (Exception exception1) {
}
encode = "UTF-8";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
return encode;
}
} catch (Exception exception2) {
}
encode = "GBK";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
return encode;
}
} catch (Exception exception3) {
}
encode = "ASCII";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
return encode;
}
} catch (Exception exception3) {
}
encode = "GB18030";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
return encode;
}
} catch (Exception exception3) {
}
encode = "Unicode";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
return encode;
}
} catch (Exception exception3) {
}
return "";
}
/**
* 判断字符串中是否有中文
* @param str
* @return
*/
public static boolean isContainChinese(String str) {
Pattern p = Pattern.compile("[\u4e00-\u9fa5]");
Matcher m = p.matcher(str);
if (m.find()) {
return true;
}
return false;
}
/**
* 半角转全角
* @param input String.
* @return 全角字符串.
*/
public static String ToSBC(String input) {
char c[] = input.toCharArray();
for (int i = 0; i < c.length; i++) {
if (c[i] == ' ') {
c[i] = '\u3000';
} else if (c[i] < '\177') {
c[i] = (char) (c[i] + 65248);
}
}
return new String(c);
}
/**
* 全角转半角
* @param input String.
* @return 半角字符串
*/
public static String ToDBC(String input) {
char c[] = input.toCharArray();
for (int i = 0; i < c.length; i++) {
if (c[i] == '\u3000') {
c[i] = ' ';
} else if (c[i] > '\uFF00' && c[i] < '\uFF5F') {
c[i] = (char) (c[i] - 65248);
}
}
String returnString = new String(c);
return returnString;
}
}