package utils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TextUtils {
public static String clearFormat(String str) {
if (str.length() > 120) {
return "";
}
str = str.replace(" ", "");
// if (str.contains("|")){
// str = str.split("\\|")[0];
// }
str = str.replace("|", "");
str = str.replace("”", "");
str = str.replace("“", "");
str = str.replace("\n", "");
return str;
}
/**
* 比较两个字符串的相同文字
*
* @param str1
* @param str2
* @param sameNum
* @return
*/
public static String compareToSame(String str1, String str2, int sameNum) {
char[] s1 = str1.toCharArray();
char[] s2 = str2.toCharArray();
int flag = 0;
for (char b1 : s1) {
for (char b2 : s2) {
if (b1 == b2) {
flag++;
}
}
}
if (flag >= sameNum) {
return str2;
} else {
return str1;
}
}
/**
* 取头部不动产权号
*
* @param header
* @return
*/
public static String formatHeader(String header) {
System.out.println("0: " + header);
if (header.isEmpty()) return header;
header = header.replaceAll("O|o", "0");
header = header.replaceAll("l", "1");
header = header.replaceAll("s|S", "3");
header = header.replaceAll("一|二", "");
//去除汉字、数字、单个英文字母以外的内容 + 去除空格
header = header.replaceAll("([^(0-9\\u4E00-\\u9FA5)])|(\\s)", "");
//取省份首字
String s = "";
Pattern pattern = Pattern.compile("(.*)\\(");
Matcher matcher = pattern.matcher(header);
if (matcher.find()) {
s = matcher.group();
} else {
pattern = Pattern.compile("(.*)\\)");
matcher = pattern.matcher(header);
if (matcher.find()) {
s = matcher.group();
}
}
String regExp1 = "(京|津|冀|晋|蒙|辽|吉|黑|沪|苏|浙|皖|闽|赣|鲁|豫|鄂|湘|粤|桂|琼|渝|川|蜀|黔|贵|滇|云|藏|陕|秦|甘|陇|青|宁|新|港|澳|台)";
matcher = pattern.compile(regExp1).matcher(s.isEmpty() ? header : s);
if (matcher.find()) {
s = matcher.group();
} else {
s = "";
}
//取年份
String y = "";
matcher = Pattern.compile("\\([0-9]+\\)").matcher(header);
if (matcher.find()) {
y = matcher.group();
} else {
matcher = Pattern.compile("\\([0-9]+").matcher(header);
if (matcher.find()) {
y = matcher.group() + ")";
} else {
matcher = Pattern.compile("[0-9]+\\)").matcher(header);
if (matcher.find()) {
y = "(" + matcher.group();
}
}
}
if (y.isEmpty()) {
matcher = Pattern.compile(".*(不|动|产|权|第)").matcher(header);
if (matcher.find()) {
y = matcher.group();
matcher = Pattern.compile("[0-9]+").matcher(y);
if (matcher.find()) {
y = "(" + matcher.group() + ")";
} else {
y = "";
}
}
}
//取不动产权
String c = "";
matcher = Pattern.compile("[(\\u4E00-\\u9FA5)]+?(不|动|产|权|第)").matcher(header);
if (matcher.find()) {
c = matcher.group();
c = c.substring(0, c.length() - 1);
c = c.replaceAll("\\(|\\)", "");
c += "不动产权第";
}
//取产权号
String n = "";
matcher = Pattern.compile("(不|动|产|权|第)([0-9]+)").matcher(header);
if (matcher.find()) {
n = matcher.group(2) + "号";
}
return s + y + c + n;
}
/**
* 权利人姓名
*
* @param name
* @return
*/
public static String formatName(String name) {
System.out.println("1: " + name);
if (name.isEmpty()) return name;
name = name.replaceAll("[^\\dXx:()\\u4E00-\\u9FA5]", "");
return name;
}
/**
* 如果同时包含中文+字母+符号,则置空(只要带标点就置空)
*
* @param shareInfo
* @return
*/
public static String formatShare(String shareInfo) {
System.out.println("2: " + shareInfo);
if (shareInfo.isEmpty()) return shareInfo;
//去除空白内容
shareInfo = shareInfo.replaceAll("\\s", "");
//判断是否有除中文、字母以外的内容,有的话就置空
Pattern pattern = Pattern.compile("[^(\\w\\u4E00-\\u9FA5)]");
Matcher matcher = pattern.matcher(shareInfo);
if (matcher.find()) {
return "";
} else {
return shareInfo;
}
}
/**
* 去除汉字、数字、单个英文字母以外的内容
* 去除空格
*
* @param address
* @return
*/
public static String formatAddress(String address) {
System.out.println("3: " + address);
if (address.isEmpty()) return "";
//去除汉字、数字、单个英文字母以外的内容 + 去除空格
address = address.replaceAll("([^(0-9\\u4E00-\\u9FA5)]{2,})|(\\s)", "");
//判断是否包含中文,如果没有就返回空
Pattern pattern = Pattern.compile("[\\u4E00-\\u9FA5]");
Matcher matcher = pattern.matcher(address);
boolean b = matcher.find();
if (b) {
return address;
} else {
return "";
}
}
/**
* 不动产单元号
*
* @param num
* @return
*/
public static String formatNum(String num) {
System.out.println("4: " + num);
if (num.isEmpty()) return num;
String n = "";
//只取数字和G、B、F字符
return num.replaceAll("([^0-9GBF])", "");
}
/**
* 权利类型
*
* @param type
* @return
*/
public static String formatType(String type) {
System.out.println("5: " + type);
if (type.isEmpty()) return type;
//去除字母、数字
String n = type.replaceAll("([\\w,.-;\\s])", "");
return compareToSame(n, "国有建设用地使用权/房屋(构筑物)所有权", 5);
}
/**
* 权利性质
*
* @param house
* @return
*/
public static String formatHouse(String house) {
System.out.println("6: " + house);
if (house.isEmpty()) return house;
//去除字母、数字
String n = house.replaceAll("([^\\u4E00-\\u9FA5/])", "");
//取前两个字
String l1 = compareToSame(n, "土地", 1);
if (l1.equals(n)){
l1 = "";
} else {
l1 += ":";
}
String lStr = compareToSame(n, "出让", 1);
lStr = compareToSame(lStr, "划拨", 1);
if (lStr.equals(n)){
lStr = "";
} else {
lStr = l1 + lStr + "/";
}
//取右侧的内容
String r1 = compareToSame(n, "房屋", 2);
if (r1.equals(n)){
r1 = "";
} else {
r1 += ":";
}
String rStr = compareToSame(n, "市场化", 1);
if (!rStr.equals("市场化")) {
rStr = compareToSame(rStr, "商品", 1);
if (rStr.equals("商品")) {
rStr = "商品房";
}
} else {
rStr = "市场化商品房";
}
rStr = compareToSame(rStr, "经济适用住房", 2);
rStr = compareToSame(rStr, "征地拆迁安置房", 2);
rStr = compareToSame(rStr, "其它", 1);
/*if (n.equals(lStr)) {
lStr = "";
} else {
lStr += "/";
}*/
if (n.equals(rStr)) {
rStr = "";
} else {
rStr = r1 + rStr;
}
return lStr + rStr;
}
/**
* 用途
*
* @param use
* @return
*/
public static String formatUse(String use) {
System.out.println("7: " + use);
if (use.isEmpty()) return use;
//去除字母、数字
String n = use.replaceAll("([^城镇单一住宅用地产权调换般改房成套土屋:/()])", "");
//以斜杠为判断依据
if (n.contains("/")) {
String[] ns = n.split("/");
if (ns.length == 2) {
//取斜杠左字符串
String l1 = compareToSame(ns[0], "城镇单一住宅用地", 3);
if (l1.equals(ns[0])) {
l1 = "";
}
String l2 = compareToSame(ns[0], "产权调换", 1);
if (l2.equals(ns[0])) {
l2 = compareToSame(ns[0], "改", 1);
if (!l2.equals(ns[0])) {
l2 = "(房改房)";
} else {
l2 = "";
}
} else {
l2 = "(产权调换房)";
}
//取斜杠右字符串
String r1 = compareToSame(ns[1], "房屋", 1);
if (r1.equals(ns[1])){
r1 = "";
} else {
r1+=":";
}
String r2 = compareToSame(ns[1], "一般", 1);
if (r2.equals(ns[1])) {
r2 = compareToSame(ns[1], "成套", 1);
if (!r2.equals(ns[1])) {
r2 += "住宅";
}
} else {
r2 += "住宅";
}
String s = l1 + l2 + "/" + r1 + r2;
s = s.replaceAll("(房屋:)\\1+", "$1");
return s;
}
}
return n;
}
/**
* 面积
*
* @param area
* @return
*/
public static String formatArea(String area) {
System.out.println("8: " + area);
if (area.isEmpty()) return area;
area = area.replaceAll("[^0-9m\\u4E00-\\u9FA5/:.]", "");
area = area.replaceAll("(宗.)|(.地)", "宗地");
area = area.replaceAll("(面.)|(.积)", "面积");
area = area.replaceAll("(建.)|(.筑)", "建筑");
area = area.replaceAll("平|方|米", "平方米");
//重复的“平方米”只取一个
area = area.replaceAll("(平方米)\\1+", "$1");
return area;
}
/**
* 使用期限
*
* @param limit
* @return
*/
public static String formatLimit(String limit) {
System.out.println("9: " + limit);
if (limit.isEmpty()) return limit;
//判断是否有关键字“年月日起止”
if (Pattern.compile("[年月日起止(详见附记)]").matcher(limit).find()) {
//去除非关键字的内容
limit = limit.replaceAll("[^0-9年月日起止(详见附记)]", "");
} else {
return "";
}
return limit;
}
/**
* 权利其他状况
*
* @param others
* @return
*/
public static String formatOthers(String others){
System.out.println("11: " + others);
if (others.isEmpty()) return others;
others = others.replaceAll("\\s", "");
others = others.replaceAll("士", "土");
others = others.replaceAll("屎", "层");
others = others.replaceAll("屈", "屋");
others = others.replaceAll("尿", "屋");
others = others.replaceAll("([0-9])屋", "$1层");
others = others.replaceAll("(房..构)", "房屋结构");
others = others.replaceAll("(买.|.受)", "买受");
others = others.replaceAll("(分.土)", "分摊土");
others = others.replaceAll("(面.|.积)", "面积");
others = others.replaceAll("[^0-9XIxim\\u4E00-\\u9FA5/:.-]", "");
return others;
}
public static void main(String args[]) {
String str = "地 铭 鞍 一 住 守 用 坝 / 一 航 住 守";
System.out.println(str);
System.out.println(formatUse(str));
}
}