import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
public class Main {
public static void main(String[] args) throws Exception {
long start = System.currentTimeMillis();
//结果
Map<String, String> resultMap = new HashMap<>();
int areaWidth = 1978; //每行截取的高度
int areaHeight = 245; //每行截取的宽度
int fromX = 476; //表格截取的原点X
int fromY = 285; //表格截取的原点Y
File imageFile = new File("/home/ubt/ocr/bdc2.jpg");
if (!imageFile.exists()) {
System.out.println("======== don't find this file! ========");
return;
}
BufferedImage image = ImageIO.read(imageFile);
if (image.getWidth() != 2590) {
imageFile = ImageUtils.zoomImage(imageFile, 2590, 3903);
}
ITesseract iTesseract = new Tesseract();
iTesseract.setDatapath("./tessdata/");
iTesseract.setLanguage("eng+chi_sim");
try {
// String result1 = iTesseract.doOCR(ImageUtils.cutAreaImage(imageFile, 50, 40, 2425, areaHeight));
String result1 = iTesseract.doOCR(imageFile, new Rectangle(50, 40, 2425, areaHeight));
resultMap.put("0-不动产权", TextUtils.formatHeader(result1));
System.out.println(TextUtils.clearFormat(result1));
for (int i = 0; i < 9; i++) {
// String result = iTesseract.doOCR(ImageUtils.cutAreaImage(imageFile, fromX, fromY + (i * areaHeight), areaWidth, areaHeight));
String result = iTesseract.doOCR(imageFile, new Rectangle(fromX, fromY + (i * areaHeight), areaWidth, areaHeight));
switch (i) {
case 0:
resultMap.put("1-权利人", TextUtils.clearFormat(result));
break;
case 1:
resultMap.put("2-共有情况", TextUtils.formatShare(result));
break;
case 2:
resultMap.put("3-坐落", TextUtils.formatAddress(result));
break;
case 3:
resultMap.put("4-不动产单元号", TextUtils.clearFormat(result));
break;
case 4:
resultMap.put("5-权利类型", TextUtils.clearFormat(result));
break;
case 5:
resultMap.put("6-权利性质", TextUtils.clearFormat(result));
break;
case 6:
resultMap.put("7-用途", TextUtils.clearFormat(result));
break;
case 7:
resultMap.put("8-面积", TextUtils.clearFormat(result));
break;
case 8:
resultMap.put("9-使用期限", TextUtils.clearFormat(result));
break;
}
}
// String result11 = iTesseract.doOCR(ImageUtils.cutAreaImage(imageFile, 476, 2490, areaWidth, 1300));
String result11 = iTesseract.doOCR(imageFile, new Rectangle(476, 2490, areaWidth, 1300));
resultMap.put("10-权利其他状况", TextUtils.clearFormat(result11));
System.out.println(resultMap.toString());
long end = System.currentTimeMillis();
System.out.println("用时: " + (end - start));
// iTesseract.createDocuments(imageFile.getPath(), "a", Collections.singletonList(ITesseract.RenderedFormat.TEXT));
} catch (TesseractException e) {
e.printStackTrace();
}
}
}