import com.alibaba.fastjson.JSON;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import utils.ImageUtils;
import utils.TextUtils;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
public class Main_1 {
public static void main(String[] args) throws Exception {
long start = System.currentTimeMillis();
//结果
List<String> resultList = new ArrayList<>();
int areaWidth = 1978; //每行截取的高度
int areaHeight = 245; //每行截取的宽度
int fromX = 476; //表格截取的原点X
int fromY = 285; //表格截取的原点Y
File imageFile = new File("/home/ubt/ocr/bdc60.jpg");
if (!imageFile.exists()) {
System.out.println("======== don't find this file! ========");
return;
}
BufferedImage image = ImageIO.read(imageFile);
if (image.getWidth() != 2590) {
imageFile = ImageUtils.zoomImage(imageFile, 2590, 3903);
}
ITesseract iTesseract = new Tesseract();
iTesseract.setDatapath("./tessdata/");
iTesseract.setLanguage("eng+chi_sim");
try {
String result1 = iTesseract.doOCR(ImageUtils.cutAreaImage(imageFile, 50, 40, 2425, areaHeight));
// String result1 = iTesseract.doOCR(imageFile, new Rectangle(50, 40, 2425, areaHeight));
resultList.add("0-不动产权= "+TextUtils.formatHeader(result1));
for (int i = 0; i < 9; i++) {
String result = iTesseract.doOCR(ImageUtils.cutAreaImage(imageFile, fromX, fromY + (i * areaHeight) +10, areaWidth, areaHeight-20));
// String result = iTesseract.doOCR(imageFile, new Rectangle(fromX, fromY + (i * areaHeight), areaWidth, areaHeight));
switch (i) {
case 0:
resultList.add("1-权利人= "+TextUtils.formatName(result));
break;
case 1:
resultList.add("2-共有情况= "+TextUtils.formatShare(result));
break;
case 2:
resultList.add("3-坐落= "+TextUtils.formatAddress(result));
break;
case 3:
resultList.add("4-不动产单元号= "+TextUtils.formatNum(result));
break;
case 4:
resultList.add("5-权利类型= "+TextUtils.formatType(result));
break;
case 5:
resultList.add("6-权利性质= "+TextUtils.formatHouse(result));
break;
case 6:
resultList.add("7-用途= "+TextUtils.formatUse(result));
break;
case 7:
resultList.add("8-面积= "+TextUtils.formatArea(result));
break;
case 8:
resultList.add("9-使用期限= "+TextUtils.formatLimit(result));
break;
}
}
String result11 = iTesseract.doOCR(ImageUtils.cutAreaImage(imageFile, 476, 2490, areaWidth, 1300));
// String result11 = iTesseract.doOCR(imageFile, new Rectangle(476, 2490, areaWidth, 1300));
resultList.add("10-权利其他状况= "+TextUtils.formatOthers(result11));
System.out.println(JSON.toJSONString(resultList));
long end = System.currentTimeMillis();
System.out.println("用时: " + (end - start));
// iTesseract.createDocuments(imageFile.getPath(), "a", Collections.singletonList(ITesseract.RenderedFormat.TEXT));
} catch (TesseractException e) {
e.printStackTrace();
}
}
}