import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TextUtils {
public static String clearFormat(String str) {
if (str.length() > 120) {
return "";
}
str = str.replace(" ", "");
// if (str.contains("|")){
// str = str.split("\\|")[0];
// }
str = str.replace("|", "");
str = str.replace("”", "");
str = str.replace("“", "");
str = str.replace("\n", "");
return str;
}
/**
* 比较两个字符串的相同文字
*
* @param str1
* @param str2
* @param sameNum
* @return
*/
public static String compareToSame(String str1, String str2, int sameNum) {
char[] s1 = str1.toCharArray();
char[] s2 = str2.toCharArray();
int flag = 0;
for (char b1 : s1) {
for (char b2 : s2) {
if (b1 == b2) {
flag++;
}
}
}
if (flag >= sameNum) {
return str2;
} else {
return str1;
}
}
/**
* 取头部不动产权号
*
* @param header
* @return
*/
public static String formatHeader(String header) {
if (header.isEmpty()) return header;
header = header.replaceAll("O|o", "0");
header = header.replaceAll("l", "1");
header = header.replaceAll("s|S", "3");
header = header.replaceAll("一|二", "");
//去除汉字、数字、单个英文字母以外的内容 + 去除空格
header = header.replaceAll("([^(0-9\\u4E00-\\u9FA5)])|(\\s)", "");
//取省份首字
String s = "";
Pattern pattern = Pattern.compile("(.*)\\(");
Matcher matcher = pattern.matcher(header);
if (matcher.find()) {
s = matcher.group();
} else {
pattern = Pattern.compile("(.*)\\)");
matcher = pattern.matcher(header);
if (matcher.find()) {
s = matcher.group();
}
}
String regExp1 = "(京|津|冀|晋|蒙|辽|吉|黑|沪|苏|浙|皖|闽|赣|鲁|豫|鄂|湘|粤|桂|琼|渝|川|蜀|黔|贵|滇|云|藏|陕|秦|甘|陇|青|宁|新|港|澳|台)";
matcher = pattern.compile(regExp1).matcher(s.isEmpty() ? header : s);
if (matcher.find()) {
s = matcher.group();
} else {
s = "";
}
System.out.println("s= " + s);
//取年份
String y = "";
matcher = Pattern.compile("\\([0-9]+\\)").matcher(header);
if (matcher.find()) {
y = matcher.group();
} else {
matcher = Pattern.compile("\\([0-9]+").matcher(header);
if (matcher.find()) {
y = matcher.group() + ")";
} else {
matcher = Pattern.compile("[0-9]+\\)").matcher(header);
if (matcher.find()) {
y = "(" + matcher.group();
}
}
}
if (y.isEmpty()) {
matcher = Pattern.compile(".*(不|动|产|权|第)").matcher(header);
if (matcher.find()) {
y = matcher.group();
matcher = Pattern.compile("[0-9]+").matcher(y);
if (matcher.find()) {
y = "(" + matcher.group() + ")";
} else {
y = "";
}
}
}
System.out.println("y= " + y);
//取不动产权
String c = "";
matcher = Pattern.compile("[(\\u4E00-\\u9FA5)]+?(不|动|产|权|第)").matcher(header);
if (matcher.find()) {
c = matcher.group();
c = c.substring(0, c.length() - 1);
c = c.replaceAll("\\(|\\)", "");
c += "不动产权第";
}
System.out.println("c= " + c);
//取产权号
String n = "";
matcher = Pattern.compile("(不|动|产|权|第)([0-9]+)").matcher(header);
if (matcher.find()) {
n = matcher.group(2) + "号";
}
System.out.println("n= " + n);
return s + y + c + n;
}
/**
* 权利人姓名
*
* @param name
* @return
*/
public static String formatName(String name) {
if (name.isEmpty()) return name;
String n = "";
//既有数字又有字母
Pattern pattern = Pattern.compile("([\\d]+)([\\w]+)");
Matcher matcher = pattern.matcher(name);
if (matcher.find()) {
return "";
}
pattern = Pattern.compile("([\\u4E00-\\u9FA5]+)");
matcher = pattern.matcher(name);
while (matcher.find()) {
n += matcher.group(0);
}
if (!n.isEmpty() && n.length()<=5) {
return n;
} else {
return "";
}
}
/**
* 如果同时包含中文+字母+符号,则置空(只要带标点就置空)
*
* @param shareInfo
* @return
*/
public static String formatShare(String shareInfo) {
if (shareInfo.isEmpty()) return shareInfo;
//去除空白内容
shareInfo = shareInfo.replaceAll("\\s", "");
//判断是否有除中文、字母以外的内容,有的话就置空
Pattern pattern = Pattern.compile("[^(\\w\\u4E00-\\u9FA5)]");
Matcher matcher = pattern.matcher(shareInfo);
if (matcher.find()) {
return "";
} else {
return shareInfo;
}
}
/**
* 去除汉字、数字、单个英文字母以外的内容
* 去除空格
*
* @param address
* @return
*/
public static String formatAddress(String address) {
if (address.isEmpty()) return "";
//去除汉字、数字、单个英文字母以外的内容 + 去除空格
address = address.replaceAll("([^(0-9\\u4E00-\\u9FA5)]{2,})|(\\s)", "");
//判断是否包含中文,如果没有就返回空
Pattern pattern = Pattern.compile("[\\u4E00-\\u9FA5]");
Matcher matcher = pattern.matcher(address);
boolean b = matcher.find();
if (b) {
return address;
} else {
return "";
}
}
/**
* 不动产单元号
*
* @param num
* @return
*/
public static String formatNum(String num) {
if (num.isEmpty()) return num;
String n = "";
//只取数字和G、B、F字符
return num.replaceAll("([^0-9GBF])", "");
}
/**
* 权利类型
*
* @param type
* @return
*/
public static String formatType(String type) {
if (type.isEmpty()) return type;
//去除字母、数字
String n = type.replaceAll("([\\w,.-;\\s])", "");
return compareToSame(n, "国有建设用地使用权/房屋(构筑物)所有权", 5);
}
/**
* 权利性质
*
* @param house
* @return
*/
public static String formatHouse(String house) {
if (house.isEmpty()) return house;
//去除字母、数字
String n = house.replaceAll("([^\\u4E00-\\u9FA5/])", "");
//取前两个字
String lStr = compareToSame(n, "出让", 1);
lStr = compareToSame(lStr, "划拨", 1);
//取右侧的内容
String rStr = compareToSame(n, "市场化", 1);
if (!rStr.equals("市场化")) {
rStr = compareToSame(rStr, "商品", 1);
if (rStr.equals("商品")) {
rStr = "商品房";
}
} else {
rStr = "市场化商品房";
}
rStr = compareToSame(rStr, "经济适用住房", 2);
rStr = compareToSame(rStr, "征地拆迁安置房", 2);
rStr = compareToSame(rStr, "其它", 1);
if (n.equals(lStr)) {
lStr = "";
} else {
lStr += "/";
}
if (n.equals(rStr)) {
rStr = "";
}
return lStr + rStr;
}
/**
* 用途
*
* @param use
* @return
*/
public static String formatUse(String use) {
if (use.isEmpty()) return use;
//去除字母、数字
String n = use.replaceAll("([^城镇单一住宅用地产权调换般改房成套/()])", "");
//以斜杠为判断依据
if (n.contains("/")) {
String[] ns = n.split("/");
if (ns.length == 2) {
//取斜杠左字符串
String l1 = compareToSame(ns[0], "城镇单一住宅用地", 3);
if (l1.equals(ns[0])) {
l1 = "";
}
String l2 = compareToSame(ns[0], "产权调换", 1);
if (l2.equals(ns[0])) {
l2 = compareToSame(ns[0], "改", 1);
if (!l2.equals(ns[0])) {
l2 = "(房改房)";
}
} else {
l2 = "(产权调换房)";
}
//取斜杠右字符串
String r1 = compareToSame(ns[1], "一般", 1);
if (r1.equals(ns[1])) {
r1 = compareToSame(ns[1], "成套", 1);
if (!r1.equals(ns[1])) {
r1 += "住宅";
} else {
r1 = "";
}
} else {
r1 += "住宅";
}
return l1 + l2 + "/" + r1;
}
}
return n;
}
/**
* 面积
*
* @param area
* @return
*/
public static String formatArea(String area) {
if (area.isEmpty()) return area;
area = area.replaceAll("[^0-9m\\u4E00-\\u9FA5/:.]", "");
area = area.replaceAll("(宗.)|(.地)", "宗地");
area = area.replaceAll("(面.)|(.积)", "面积");
area = area.replaceAll("(建.)|(.筑)", "建筑");
area = area.replaceAll("平|方|米", "平方米");
//重复的“平方米”只取一个
area = area.replaceAll("(平方米)\\1+", "$1");
return area;
}
/**
* 使用期限
*
* @param limit
* @return
*/
public static String formatLimit(String limit) {
if (limit.isEmpty()) return limit;
//判断是否有关键字“年月日起止”
if (Pattern.compile("[年月日起止]").matcher(limit).find()) {
//去除非关键字的内容
limit = limit.replaceAll("[^0-9年月日起止]", "");
} else {
return "";
}
return limit;
}
/**
* 权利其他状况
*
* @param others
* @return
*/
public static String formatOthers(String others){
if (others.isEmpty()) return others;
others = others.replaceAll("([0-9])屋", "$1层");
others = others.replaceAll("(买.|.受)", "买受");
others = others.replaceAll("士", "土");
others = others.replaceAll("屎", "层");
others = others.replaceAll("[^0-9A-Zm分摊土地面积平方米房屋结构钢筋混凝合专有建筑总层数所在丘权号来源买受市场化商品经济适用住征拆迁安置其它/:.-]", "");
return others;
}
public static void main(String args[]) {
String str = ": - “ 厂\n" +
"丨 一\n" +
"\n" +
"AY.\n" +
"\n" +
"~ 力 发 棣 。 4 一 东 一 志";
System.out.println(str);
System.out.println(formatShare(str));
}
}