Java从PDF中根据关键字实现截图
package com.utils;
import java.awt.Color;
import java.awt.Image;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import javax.imageio.ImageIO;
import org.apache.poi.xssf.usermodel.XSSFClientAnchor;
import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import com.entity.Estatement;
import com.entity.Ref;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.PdfPageBase;
import com.spire.pdf.general.find.PdfTextFind;
import com.sun.image.codec.jpeg.JPEGCodec;
import com.sun.image.codec.jpeg.JPEGEncodeParam;
import com.sun.image.codec.jpeg.JPEGImageEncoder;
import com.sun.pdfview.PDFFile;
import com.sun.pdfview.PDFPage;
import temp.CustomRenderListener;
public class PictureShot {
static int page = 0;
public static void main(String[] args) throws Exception {
PictureShot p = new PictureShot();
//String sourcefile="D:/Project/e-Statement/estatement_pdf/2021_05_10_estatement/8000024987_est_6bdda4bdddf8a800355a924157b5408e.pdf";
String sourcefile="D:\\Project\\e-Statement\\estatement_pdf\\2021_05_10_estatement\\8000024987_est_6bdda4bdddf8a800355a924157b5408e.pdf";
//String targetfile="D:\\Project\\e-Statement\\estatement_pdf\\2021_05_10_estatement\\out88.jpg";
String flag = "FT21098B376L";
int index= sourcefile.lastIndexOf("\\");
String Dir = sourcefile.substring(0, index);
String filename=sourcefile.substring(index+1);
String Customer_ID = filename.substring(0,10);
String target_path = Dir+"//Image";
File f = new File(target_path);
if(!f.exists()) {
f.mkdirs();
}
String targetfile=target_path+"//"+Customer_ID+"_"+flag+".jpg";
Ref r= p.getKeyWordsAttribute(sourcefile,flag);
p.createScreenShootbyLocation(sourcefile, targetfile, r.getY(),r.getPage());
}
public XSSFWorkbook Generate_picture(ArrayList<Estatement> Estatement_List,ArrayList<String> fail_Customer_id_list,XSSFWorkbook wk,String folder_path) {
for(int i=0;i<Estatement_List.size();i++) {
Generate_picture_for_file(Estatement_List.get(i));
}
String picture_path = folder_path+"//Image";
for(int i=0;i<fail_Customer_id_list.size();i++) {
String ID = fail_Customer_id_list.get(i);
File file = new File(picture_path);
File[] fs = file.listFiles();
int count=0;
for(File f:fs) {
String file_name = f.getName();
if(file_name.startsWith(ID)&&f.getName().contains(".jpg")) {
PictureWriteToExcel(f.getAbsolutePath().trim(),wk,wk.getSheet(ID),count);
count++;
}
}
}
return wk;
}
public void PictureWriteToExcel(String picture_path,XSSFWorkbook wb,XSSFSheet sheet,int count) {
FileOutputStream fileOut = null;
BufferedImage bufferImg = null;//图片
try {
// 先把读进来的图片放到一个ByteArrayOutputStream中,以便产生ByteArray
ByteArrayOutputStream byteArrayOut = new ByteArrayOutputStream();
//将图片读到BufferedImage
bufferImg = ImageIO.read(new File(picture_path));
// 将图片写入流中
ImageIO.write(bufferImg, "png", byteArrayOut);
// 创建一个工作薄
//XSSFWorkbook wb = new XSSFWorkbook();
//创建一个sheet
//XSSFSheet sheet = wb.createSheet("out put excel");
// 利用HSSFPatriarch将图片写入EXCEL
//HSSFPatriarch patriarch = sheet.createDrawingPatriarch();
XSSFDrawing patriarch = sheet.createDrawingPatriarch();
/**
* 该构造函数有8个参数
* 前四个参数是控制图片在单元格的位置,分别是图片距离单元格left,top,right,bottom的像素距离
* 后四个参数,前两个表示图片左上角所在的cellNum和 rowNum,后两个参数对应的表示图片右下角所在的cellNum和 rowNum,
* excel中的cellNum和rowNum的index都是从0开始的
*
*/
//图片一导出到单元格中
//XSSFClientAnchor anchor = new XSSFClientAnchor(0, 0, 0, 0,
//(short) 0, 0, (short) 6, 12);
XSSFClientAnchor anchor = new XSSFClientAnchor(0, 0, 0, 0,
(short) 0, count*4, (short) 10, count*4+3);
// 插入图片
patriarch.createPicture(anchor, wb.addPicture(byteArrayOut
.toByteArray(), XSSFWorkbook.PICTURE_TYPE_JPEG));
//生成的excel文件地址
//fileOut = new FileOutputStream("D:\\123.xlsx");
// 写入excel文件
//wb.write(fileOut);
} catch (IOException io) {
io.printStackTrace();
System.out.println("io erorr : " + io.getMessage());
} finally {
if (fileOut != null) {
try {
fileOut.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void Generate_picture_for_file(Estatement e) {
ArrayList<String> all_fail_ref_list = e.getAll_fail_Ref_list();
PictureShot p = new PictureShot();
String target_path = e.getAbsolute_path()+"//Image";
File f = new File(target_path);
if(!f.exists()) {
f.mkdirs();
}
for(int i=0;i<all_fail_ref_list.size();i++) {
String ref_str = all_fail_ref_list.get(i);
Ref r= getKeyWordsAttribute(e.getAbsolute_filename(),ref_str);
String targetfile = target_path+"/"+e.getCustomer_id()+"_"+ref_str+".jpg";
p.createScreenShootbyLocation(e.getAbsolute_filename(), targetfile, r.getY(),r.getPage());
}
}
public PdfDocument High_Light_Ref(String sourcefile,String flag,Ref ref) {
PdfDocument pdf = new PdfDocument();
pdf.loadFromFile(sourcefile);
PdfTextFind[] result = null;
//获取特定页
PdfPageBase page = pdf.getPages().get(ref.getPage());
result = page.findText(flag).getFinds();
for (PdfTextFind find : result) {
//高亮显示查找结果
find.applyHighLight(Color.yellow);
}
return pdf;
}
public Ref getKeyWordsAttribute( String filepath, String keyWords) {
Ref ref = null;
try{
PdfReader pdfReader = new PdfReader(filepath);
ref = getKeyWords(pdfReader, keyWords);
} catch (IOException e) {
e.printStackTrace();
}
return ref;
}
private Ref getKeyWords(PdfReader pdfReader, String keyWords) {
Ref ref= new Ref();
float[] coordinate = null;
try{
int pageNum = pdfReader.getNumberOfPages();
PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader);
CustomRenderListener renderListener = new CustomRenderListener();
renderListener.setKeyWord(keyWords);
for (page = 1; page <= pageNum; page++) {
renderListener.setPage(page);
pdfReaderContentParser.processContent(page, renderListener);
coordinate = renderListener.getPcoordinate();
//System.out.println("Page: "+ page+" coordinate: "+coordinate);
if (coordinate != null) {
ref.setPage(page);
ref.setValue(keyWords);
ref.setY((int)coordinate[1]);
break;
}
}
} catch (IOException e) {
e.printStackTrace();
}
return ref;
}
public boolean createScreenShootbyLocation(String source, String target,int y,int page_index) {
File file = new File(source);
if (!file.exists()) {
System.err.println("路径[" + source + "]对应的pdf文件不存在!");
return false;
}
try{
@SuppressWarnings("resource")
RandomAccessFile raf = new RandomAccessFile(file, "r");
FileChannel channel = raf.getChannel();
ByteBuffer buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
PDFFile pdffile = new PDFFile(buf);
//int num = pdffile.getNumPages();
PDFPage page = pdffile.getPage(page_index);
//获得y轴的坐标后减去7,高度设置为30
Rectangle rect = new Rectangle(0, y-7, (int) page.getBBox().getWidth(), 30);
// generate the image
//PDFPage pa= generate_new_page(page, pdffile);
//page.addFillAlpha(1.5f);
Image img = page.getImage(rect.width, rect.height, // width &
rect, // clip rect
null, // null for the ImageObserver
true, // fill background with white
true // block until drawing is done
);
BufferedImage tag = new BufferedImage(rect.width, rect.height, BufferedImage.TYPE_INT_RGB);
tag.getGraphics().drawImage(img, 0, 0, rect.width, rect.height,null);
FileOutputStream out = new FileOutputStream(target);
JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);
JPEGEncodeParam jep = JPEGCodec.getDefaultJPEGEncodeParam(tag);
jep.setQuality(10f, true); //压缩质量, 1 是最高值
//encoder.setJPEGEncodeParam(jep);
encoder.encode(tag, jep);
//encoder.encode(tag); // JPEG编码
out.close();
return true;
}catch(Exception e){
e.printStackTrace();
return true;
}
}
}
相关文章
- Java使用itextpdf生成PDF文件
- Word处理控件Aspose.Words功能演示:使用 Java 将 RTF 转换为 PDF
- 【java细节】Java代码忽略https证书:No subject alternative names present
- 备战金九银十,Java 研发面试题整理 PDF,走到哪刷
- Java 内存溢出(java.lang.OutOfMemoryError)的常见情况和处理方式总结
- 【Java】java数据库连接池配置的几种方法
- 【Java】整理关于java的String类,equals函数和比较操作符的区别
- 【Java】Hibernate4实战 之 第一部分Hibernate入门
- 第58节:Java中的图形界面编程-GUI
- 第七节:详细讲解Java中的日期,java.util.date
- django生成文件txt、pdf(在生成 PDF 文件之前,需要安装 ReportLab 库)
- Java实现QQ邮件发送
- Tomcat 奔溃:java.lang.OutOfMemoryError: Java heap space
- java提取字符串数字,Java获取字符串中的数字
- Java编程常用数据转换:String与int互转、Date与String互转、BigDecimal与int比较(报错operator > cannot be applied to java.math.BigDecimal,int)
- 『Java练习生的自我修养』java-se进阶⁵ • 常用IO流
- Java 实现 HTML 转 PDF 文件
- Java连接MySQL8.0以上版本数据库方式(将驱动改成新版本的解决办法)
- JAVA学习(三):Java基础语法(变量、常量、数据类型、运算符与数据类型转换)
- 蓝桥杯2016年java C组真题及答案
- How to improve Java's I/O performance( 提升 java i/o 性能)
- Java中Integer类的方法
- 2013编程之美资格赛之树上的三角形(Java实现)
- A large-scale study on the usage of Java’s concurrent programming constructs
- Java实现给PDF文件加文字水印和图片水印(可以自定义水印格式)
- java 6大设计原则 一:观察者模式