import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TextUtils {
public static String clearFormat(String str){
if (str.length()>120){
return "";
}
str = str.replace(" ", "");
// if (str.contains("|")){
// str = str.split("\\|")[0];
// }
str = str.replace("|", "");
str = str.replace("”", "");
str = str.replace("“", "");
str = str.replace("\n", "");
return str;
}
/**
* 检查输入的数据中是否有除了汉字字母和数字以外的字符
* @param qString 要检查的数据
* @return boolean 如果包含正则表达式 <code> regx </code> 中定义的特殊字符,返回true;
* 否则返回false
*/
public static boolean hasCrossScriptRisk(String qString) {
if (qString!=null) {
qString = qString.trim();
// String regex = "^[a-zA-Z0-9\u4E00-\u9FA5]+$";
String regex = "^[a-zA-Z0-9\\u4E00-\\u9FA5]{2,}";
Pattern pattern = Pattern.compile(regex);
Matcher match = pattern.matcher(qString);
String r = qString.replaceAll("([^(0-9\\u4E00-\\u9FA5)]{2,})|(\\s)", "");
System.out.println(r);
boolean b = match.matches();
if (b) {
System.out.println(">>> 1");
return false;
} else {
System.out.println(">>> 2");
return true;
}
}
return false;
}
/**
* 去除汉字、数字、单个英文字母以外的内容
* 去除空格
*
* @param address
* @return
*/
public static String formatAddress(String address){
if (!address.isEmpty()){
//去除汉字、数字、单个英文字母以外的内容 + 去除空格
address = address.replaceAll("([^(0-9\\u4E00-\\u9FA5)]{2,})|(\\s)", "");
//判断是否包含中文,如果没有就返回空
Pattern pattern = Pattern.compile("[\\u4E00-\\u9FA5]");
Matcher matcher = pattern.matcher(address);
boolean b = matcher.find();
if (b){
return address;
} else {
return "";
}
}
return "";
}
/**
* 如果同时包含中文+字母+符号,则置空(只要带标点就置空)
*
* @param shareInfo
* @return
*/
public static String formatShare(String shareInfo){
if (!shareInfo.isEmpty()){
//去除空白内容
shareInfo = shareInfo.replaceAll("\\s", "");
//判断是否有除中文、字母以外的内容,有的话就置空
Pattern pattern = Pattern.compile("[^(\\w\\u4E00-\\u9FA5)]");
Matcher matcher = pattern.matcher(shareInfo);
if (matcher.find()){
return "";
} else {
return shareInfo;
}
}
return "";
}
/**
* 取头部不动产权号
*
* @param header
* @return
*/
public static String formatHeader(String header){
if (!header.isEmpty()){
header = header.replaceAll("O|o", "0");
header = header.replaceAll("l", "1");
header = header.replaceAll("s|S", "3");
header = header.replaceAll("一|二", "");
//去除汉字、数字、单个英文字母以外的内容 + 去除空格
header = header.replaceAll("([^(0-9\\u4E00-\\u9FA5)])|(\\s)", "");
//取省份首字
String s = "";
Pattern pattern = Pattern.compile("(.*)\\(");
Matcher matcher = pattern.matcher(header);
if (matcher.find()){
s = matcher.group();
} else {
pattern = Pattern.compile("(.*)\\)");
matcher = pattern.matcher(header);
if (matcher.find()){
s = matcher.group();
}
}
String regExp1 = "(京|津|冀|晋|蒙|辽|吉|黑|沪|苏|浙|皖|闽|赣|鲁|豫|鄂|湘|粤|桂|琼|渝|川|蜀|黔|贵|滇|云|藏|陕|秦|甘|陇|青|宁|新|港|澳|台)";
matcher = pattern.compile(regExp1).matcher(s.isEmpty()?header:s);
if (matcher.find()){
s = matcher.group();
} else {
s="";
}
System.out.println("s= " + s);
//取年份
String y = "";
matcher = Pattern.compile("\\([0-9]+\\)").matcher(header);
if (matcher.find()){
y = matcher.group();
} else {
matcher = Pattern.compile("\\([0-9]+").matcher(header);
if (matcher.find()) {
y = matcher.group()+")";
} else {
matcher = Pattern.compile("[0-9]+\\)").matcher(header);
if (matcher.find()) {
y = "("+matcher.group();
}
}
}
if (y.isEmpty()){
matcher = Pattern.compile(".*(不|动|产|权|第)").matcher(header);
if (matcher.find()) {
y = matcher.group();
matcher = Pattern.compile("[0-9]+").matcher(y);
if (matcher.find()) {
y = "("+matcher.group()+")";
} else {
y = "";
}
}
}
System.out.println("y= " + y);
//取不动产权
String c = "";
matcher = Pattern.compile("[(\\u4E00-\\u9FA5)]+?(不|动|产|权|第)").matcher(header);
if (matcher.find()) {
c = matcher.group();
c = c.substring(0, c.length()-1);
c = c.replaceAll("\\(|\\)", "");
c += "不动产权第";
}
System.out.println("c= " + c);
//取产权号
String n = "";
matcher = Pattern.compile("(不|动|产|权|第)([0-9]+)").matcher(header);
if (matcher.find()){
n = matcher.group(2) + "号";
}
System.out.println("n= " + n);
return s+y+c+n;
}
return header;
}
public static void main(String args[]){
String str = "一伟(iegyPRieee4";
System.out.println(str);
System.out.println(formatHeader(str));
}
}