zl程序教程

您现在的位置是:首页 >  后端

当前栏目

Java从PDF中根据关键字实现截图

JAVAPDF 实现 关键字 根据 截图
2023-09-11 14:22:10 时间

package com.utils;

import java.awt.Color;

import java.awt.Image;

import java.awt.Rectangle;

import java.awt.image.BufferedImage;

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.RandomAccessFile;

import java.nio.ByteBuffer;

import java.nio.channels.FileChannel;

import java.util.ArrayList;

import javax.imageio.ImageIO;

import org.apache.poi.xssf.usermodel.XSSFClientAnchor;

import org.apache.poi.xssf.usermodel.XSSFDrawing;

import org.apache.poi.xssf.usermodel.XSSFSheet;

import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import com.entity.Estatement;

import com.entity.Ref;

import com.itextpdf.text.pdf.PdfReader;

import com.itextpdf.text.pdf.parser.PdfReaderContentParser;

import com.spire.pdf.PdfDocument;

import com.spire.pdf.PdfPageBase;

import com.spire.pdf.general.find.PdfTextFind;

import com.sun.image.codec.jpeg.JPEGCodec;

import com.sun.image.codec.jpeg.JPEGEncodeParam;

import com.sun.image.codec.jpeg.JPEGImageEncoder;

import com.sun.pdfview.PDFFile;

import com.sun.pdfview.PDFPage;

import temp.CustomRenderListener;

public class PictureShot {

static int page = 0;

public static void main(String[] args) throws Exception {

PictureShot p = new PictureShot();

//String sourcefile="D:/Project/e-Statement/estatement_pdf/2021_05_10_estatement/8000024987_est_6bdda4bdddf8a800355a924157b5408e.pdf";

String sourcefile="D:\\Project\\e-Statement\\estatement_pdf\\2021_05_10_estatement\\8000024987_est_6bdda4bdddf8a800355a924157b5408e.pdf";

//String targetfile="D:\\Project\\e-Statement\\estatement_pdf\\2021_05_10_estatement\\out88.jpg";

String flag = "FT21098B376L";

int index= sourcefile.lastIndexOf("\\");

String Dir = sourcefile.substring(0, index);

String filename=sourcefile.substring(index+1);

String Customer_ID = filename.substring(0,10);

String target_path = Dir+"//Image";

File f = new File(target_path);

if(!f.exists()) {

f.mkdirs();

}

String targetfile=target_path+"//"+Customer_ID+"_"+flag+".jpg";

Ref r= p.getKeyWordsAttribute(sourcefile,flag);

p.createScreenShootbyLocation(sourcefile, targetfile, r.getY(),r.getPage());

}

public XSSFWorkbook Generate_picture(ArrayList<Estatement> Estatement_List,ArrayList<String> fail_Customer_id_list,XSSFWorkbook wk,String folder_path) {

for(int i=0;i<Estatement_List.size();i++) {

Generate_picture_for_file(Estatement_List.get(i));

}

String picture_path = folder_path+"//Image";

for(int i=0;i<fail_Customer_id_list.size();i++) {

String ID = fail_Customer_id_list.get(i);

File file = new File(picture_path);

File[] fs = file.listFiles();

int count=0;

for(File f:fs) {

String file_name = f.getName();

if(file_name.startsWith(ID)&&f.getName().contains(".jpg")) {

PictureWriteToExcel(f.getAbsolutePath().trim(),wk,wk.getSheet(ID),count);

count++;

}

}

}

return wk;

}

public void PictureWriteToExcel(String picture_path,XSSFWorkbook wb,XSSFSheet sheet,int count) {

FileOutputStream fileOut = null;

BufferedImage bufferImg = null;//图片

try {

// 先把读进来的图片放到一个ByteArrayOutputStream中,以便产生ByteArray

ByteArrayOutputStream byteArrayOut = new ByteArrayOutputStream();

//将图片读到BufferedImage

bufferImg = ImageIO.read(new File(picture_path));

// 将图片写入流中

ImageIO.write(bufferImg, "png", byteArrayOut);

// 创建一个工作薄

//XSSFWorkbook wb = new XSSFWorkbook();

//创建一个sheet

//XSSFSheet sheet = wb.createSheet("out put excel");

// 利用HSSFPatriarch将图片写入EXCEL

//HSSFPatriarch patriarch = sheet.createDrawingPatriarch();

XSSFDrawing patriarch = sheet.createDrawingPatriarch();

/**

* 该构造函数有8个参数

* 前四个参数是控制图片在单元格的位置,分别是图片距离单元格left,top,right,bottom的像素距离

* 后四个参数,前两个表示图片左上角所在的cellNum和 rowNum,后两个参数对应的表示图片右下角所在的cellNum和 rowNum,

* excel中的cellNum和rowNum的index都是从0开始的

*

*/

//图片一导出到单元格中

//XSSFClientAnchor anchor = new XSSFClientAnchor(0, 0, 0, 0,

//(short) 0, 0, (short) 6, 12);

XSSFClientAnchor anchor = new XSSFClientAnchor(0, 0, 0, 0,

(short) 0, count*4, (short) 10, count*4+3);

// 插入图片

patriarch.createPicture(anchor, wb.addPicture(byteArrayOut

.toByteArray(), XSSFWorkbook.PICTURE_TYPE_JPEG));

//生成的excel文件地址

//fileOut = new FileOutputStream("D:\\123.xlsx");

// 写入excel文件

//wb.write(fileOut);

} catch (IOException io) {

io.printStackTrace();

System.out.println("io erorr : " + io.getMessage());

} finally {

if (fileOut != null) {

try {

fileOut.close();

} catch (IOException e) {

e.printStackTrace();

}

}

}

}

public void Generate_picture_for_file(Estatement e) {

ArrayList<String> all_fail_ref_list = e.getAll_fail_Ref_list();

PictureShot p = new PictureShot();

String target_path = e.getAbsolute_path()+"//Image";

File f = new File(target_path);

if(!f.exists()) {

f.mkdirs();

}

for(int i=0;i<all_fail_ref_list.size();i++) {

String ref_str = all_fail_ref_list.get(i);

Ref r= getKeyWordsAttribute(e.getAbsolute_filename(),ref_str);

String targetfile = target_path+"/"+e.getCustomer_id()+"_"+ref_str+".jpg";

p.createScreenShootbyLocation(e.getAbsolute_filename(), targetfile, r.getY(),r.getPage());

}

}

public PdfDocument High_Light_Ref(String sourcefile,String flag,Ref ref) {

PdfDocument pdf = new PdfDocument();

pdf.loadFromFile(sourcefile);

PdfTextFind[] result = null;

//获取特定页

PdfPageBase page = pdf.getPages().get(ref.getPage());

result = page.findText(flag).getFinds();

for (PdfTextFind find : result) {

//高亮显示查找结果

find.applyHighLight(Color.yellow);

}

return pdf;

}

public Ref getKeyWordsAttribute( String filepath, String keyWords) {

Ref ref = null;

try{

PdfReader pdfReader = new PdfReader(filepath);

ref = getKeyWords(pdfReader, keyWords);

} catch (IOException e) {

e.printStackTrace();

}

return ref;

}

private Ref getKeyWords(PdfReader pdfReader, String keyWords) {

Ref ref= new Ref();

float[] coordinate = null;

try{

int pageNum = pdfReader.getNumberOfPages();

PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader);

CustomRenderListener renderListener = new CustomRenderListener();

renderListener.setKeyWord(keyWords);

for (page = 1; page <= pageNum; page++) {

renderListener.setPage(page);

pdfReaderContentParser.processContent(page, renderListener);

coordinate = renderListener.getPcoordinate();

//System.out.println("Page: "+ page+" coordinate: "+coordinate);

if (coordinate != null) {

ref.setPage(page);

ref.setValue(keyWords);

ref.setY((int)coordinate[1]);

break;

}

}

} catch (IOException e) {

e.printStackTrace();

}

return ref;

}

public boolean createScreenShootbyLocation(String source, String target,int y,int page_index) {

File file = new File(source);

if (!file.exists()) {

System.err.println("路径[" + source + "]对应的pdf文件不存在!");

return false;

}

try{

@SuppressWarnings("resource")

RandomAccessFile raf = new RandomAccessFile(file, "r");

FileChannel channel = raf.getChannel();

ByteBuffer buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());

PDFFile pdffile = new PDFFile(buf);

//int num = pdffile.getNumPages();

PDFPage page = pdffile.getPage(page_index);

//获得y轴的坐标后减去7,高度设置为30

Rectangle rect = new Rectangle(0, y-7, (int) page.getBBox().getWidth(), 30);

// generate the image

//PDFPage pa= generate_new_page(page, pdffile);

//page.addFillAlpha(1.5f);

Image img = page.getImage(rect.width, rect.height, // width &

rect, // clip rect

null, // null for the ImageObserver

true, // fill background with white

true // block until drawing is done

);

BufferedImage tag = new BufferedImage(rect.width, rect.height, BufferedImage.TYPE_INT_RGB);

tag.getGraphics().drawImage(img, 0, 0, rect.width, rect.height,null);

FileOutputStream out = new FileOutputStream(target);

JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);

JPEGEncodeParam jep = JPEGCodec.getDefaultJPEGEncodeParam(tag);

jep.setQuality(10f, true); //压缩质量, 1 是最高值

//encoder.setJPEGEncodeParam(jep);

encoder.encode(tag, jep);

//encoder.encode(tag); // JPEG编码

out.close();

return true;

}catch(Exception e){

e.printStackTrace();

return true;

}

}

}