下面列出了java.awt.DefaultFocusTraversalPolicy#org.apache.pdfbox.pdmodel.PDDocument 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* @see #testAddLikeCccompanyImproved()
*/
private static ByteArrayOutputStream generatePdfFromStringImproved(String content) throws IOException {
try ( PDDocument doc = new PDDocument();
InputStream notoSansRegularResource = AddTextWithDynamicFonts.class.getResourceAsStream("NotoSans-Regular.ttf");
InputStream notoSansCjkRegularResource = AddTextWithDynamicFonts.class.getResourceAsStream("NotoSansCJKtc-Regular.ttf") ) {
PDType0Font notoSansRegular = PDType0Font.load(doc, notoSansRegularResource);
PDType0Font notoSansCjkRegular = PDType0Font.load(doc, notoSansCjkRegularResource);
List<PDFont> fonts = Arrays.asList(notoSansRegular, notoSansCjkRegular);
List<TextWithFont> fontifiedContent = fontify(fonts, content);
PDPage page = new PDPage();
doc.addPage(page);
try ( PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
contentStream.beginText();
for (TextWithFont textWithFont : fontifiedContent) {
textWithFont.show(contentStream, 12);
}
contentStream.endText();
}
ByteArrayOutputStream os = new ByteArrayOutputStream();
doc.save(os);
return os;
}
}
private static PDImageXObject createJPEG(PDDocument document, BufferedImage image,
float quality, int dpi) throws IOException
{
// extract alpha channel (if any)
BufferedImage awtColorImage = getColorImage(image);
BufferedImage awtAlphaImage = getAlphaImage(image);
// create XObject
ByteArrayOutputStream baos = new ByteArrayOutputStream();
encodeImageToJPEGStream(awtColorImage, quality, dpi, baos);
ByteArrayInputStream byteStream = new ByteArrayInputStream(baos.toByteArray());
PDImageXObject pdImage = new PDImageXObject(document, byteStream,
COSName.DCT_DECODE, awtColorImage.getWidth(), awtColorImage.getHeight(),
awtColorImage.getColorModel().getComponentSize(0),
getColorSpaceFromAWT(awtColorImage));
// alpha -> soft mask
if (awtAlphaImage != null)
{
PDImage xAlpha = JPEGFactory.createFromImage(document, awtAlphaImage, quality);
pdImage.getCOSObject().setItem(COSName.SMASK, xAlpha);
}
return pdImage;
}
public void merge(OutputStream outputStream, Iterable<PDDocument> inputs) throws IOException
{
try
{
openDocument();
for (PDDocument input: inputs)
{
merge(input);
}
if (currentContents != null) {
currentContents.close();
currentContents = null;
}
document.save(outputStream);
}
finally
{
closeDocument();
}
}
/**
* <a href="https://stackoverflow.com/questions/59031734/get-text-color-in-pdfbox">
* Get text color in PDFBox
* </a>
* <p>
* This test has already been executed for the original color text stripper class from my answer to
* <a href="https://stackoverflow.com/questions/21430341/identifying-the-text-based-on-the-output-in-pdf-using-pdfbox">
* Identifying the text based on the output in PDF using PDFBOX
* </a>
* </p>
*
* @throws IOException
*/
@Test
public void testExtractFromFurzoSample() throws IOException {
try ( InputStream resource = getClass().getResourceAsStream("furzo Sample.pdf");
PDDocument document = Loader.loadPDF(resource) ) {
PDFTextStripper stripper = new ColorTextStripper();
String text = stripper.getText(document);
Files.write(new File(RESULT_FOLDER, "furzo Sample.txt").toPath(), text.getBytes("UTF-8"));
System.out.println("/// furzo Sample.pdf ///");
System.out.println("Stripped text with color:");
System.out.println(">>>");
System.out.println(text);
System.out.println("<<<");
}
}
@Override
public void fill(PDDocument pdDocument, PDPageContentStream contentStream,
Position upperLeft, float width, float height, Color color,
DrawListener drawListener) throws IOException {
add(pdDocument, contentStream, upperLeft, width, height);
if (color != null) {
contentStream.setNonStrokingColor(color);
}
CompatibilityHelper.fillNonZero(contentStream);
if (drawListener != null) {
drawListener.drawn(this, upperLeft, width, height);
}
}
public static void main(String[] args) throws IOException {
// Create a new empty document
PDDocument document = new PDDocument();
// Create a new blank page and add it to the document
PDPage blankPage = new PDPage();
document.addPage( blankPage );
// Save the newly created document
document.save("/home/lili/data/BlankPage.pdf");
// finally make sure that the document is properly
// closed.
document.close();
}
public static void main(String[] args) throws InvalidPasswordException, IOException {
try (PDDocument document = PDDocument.load(new File("/home/lili/data/test.pdf"))) {
if (!document.isEncrypted()) {
PDFTextStripper tStripper = new PDFTextStripper();
// 如果想抽取某一页或者某几页,可以使用下面的方法限定范围。
// 目前是抽取所有页
tStripper.setStartPage(0);
tStripper.setEndPage(document.getNumberOfPages());
String pdfFileInText = tStripper.getText(document);
String lines[] = pdfFileInText.split("\\r?\\n");
for (String line : lines) {
System.out.println(line);
}
}
}
}
/**
* Creates a COS stream from raw (encoded) data.
*/
private static COSStream createRawStream(PDDocument document, InputStream rawInput)
throws IOException
{
COSStream stream = document.getDocument().createCOSStream();
OutputStream output = null;
try
{
output = stream.createRawOutputStream();
IOUtils.copy(rawInput, output);
}
finally
{
if (output != null)
{
output.close();
}
}
return stream;
}
/**
* <a href="https://stackoverflow.com/questions/56938135/pdfbox-inconsistent-pdtextfield-autosize-behavior-after-setvalue">
* PDFBox Inconsistent PDTextField Autosize Behavior after setValue
* </a>
* <br/>
* <a href="http://www.filedropper.com/0postfontload">
* 0.pdf
* </a>
* <p>
* Indeed, some fields look weird after fill-in; for some fields
* this is due to weird pre-existing appearance streams. These can
* be fixed as in {@link #testFill0DropOldAppearance()}.
* </p>
* @see #testFill0DropOldAppearance()
* @see #testFill0DropOldAppearanceNoCombNoMax()
* @see #testFill0DropOldAppearanceNoCombNoMaxNoMultiLine()
*/
@Test
public void testFill0LikeXenyal() throws IOException {
try ( InputStream originalStream = getClass().getResourceAsStream("0.pdf");
InputStream fontStream = getClass().getResourceAsStream("Lato-Regular.ttf"))
{
PDDocument doc = Loader.loadPDF(originalStream);
PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm();
PDType0Font font = PDType0Font.load(doc, fontStream, false);
String font_name = acroForm.getDefaultResources().add(font).getName();
for (PDField field : acroForm.getFieldTree()) {
if (field instanceof PDTextField) {
PDTextField textField = (PDTextField) field;
textField.setDefaultAppearance(String.format("/%s 0 Tf 0 g", font_name));
textField.setValue("Test");
}
}
doc.save(new File(RESULT_FOLDER, "0-filledLikeXenyal.pdf"));
doc.close();
}
}
@Test
public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws IOException {
String mediaType = "application/pdf+test";
final String MEDIA_TYPE = "mediaType";
PDFParser parser = Mockito.mock(PDFParser.class);
COSDocument cosDoc = Mockito.mock(COSDocument.class);
PDFTextStripper pdfTextStripper = Mockito.mock(PDFTextStripper.class);
Mockito.doThrow(IOException.class).when(cosDoc).close();
Mockito.when(parser.getDocument()).thenReturn(new COSDocument()).thenReturn(cosDoc);
Mockito.when(pdfTextStripper.getText(new PDDocument())).thenReturn("");
PDFIndexer pdfIndexer = new PDFIndexerWrapper(parser, pdfTextStripper);
// should return the default media type when media type is not defined in file2Index
IndexDocument pdf = pdfIndexer.getIndexedDocument(file2Index);
if (!"application/pdf".equals(pdf.getFields().get(MEDIA_TYPE).get(0))) {
Assert.fail();
}
// should return the media type we have set in the file2Index even if error occurs in finally block
file2Index.mediaType = mediaType;
pdf = pdfIndexer.getIndexedDocument(file2Index);
if (!mediaType.equals(pdf.getFields().get(MEDIA_TYPE).get(0))) {
Assert.fail();
}
}
/**
* <a href="http://stackoverflow.com/questions/41071142/pdfbox-remove-a-single-field-from-pdf">
* PDFBox: Remove a single field from PDF
* </a>
* <br/>
* <a href="https://www.dropbox.com/s/oyv1vjyhkmao1t1/input.pdf?dl=0">
* input.pdf
* </a>
* <p>
* This method applies the {@link HelloSignManipulator} to the sample document
* and clears the field <code>var1001</code> (<i>address1</i>).
* </p>
*/
@Test
public void testClearAddress1Input() throws IOException
{
try ( InputStream resource = getClass().getResourceAsStream("input.pdf");
PDDocument pdDocument = Loader.loadPDF(resource) )
{
HelloSignAnalyzer helloSignAnalyzer = new HelloSignAnalyzer(pdDocument);
HelloSignManipulator helloSignManipulator = new HelloSignManipulator(helloSignAnalyzer);
helloSignManipulator.clearFields(Collections.singleton("var1001"));
pdDocument.save(new File(RESULT_FOLDER, "input-clear-address1.pdf"));
}
}
@Override
public float print(PDDocument document, PDPageContentStream stream, int pageNumber, float startX, float startY,
float allowedWidth) throws IOException {
if (title != null) {
throw new IllegalStateException("title not implemented!");
}
float y = startY;
int i = 0;
float lineY = 0;
for (ReportElement[] line : elements) {
float lineHeight = getLineHeight(line, allowedWidth) + pdfStyleSheet.getLineDistance();
y = printLine(document, stream, pageNumber, startX, y, allowedWidth, line, lineY);
placeFirstBorder = i == 0;
placeLastBorder = i == elements.length - 1;
placeBorders(stream, startY, y, startX, allowedWidth);
i++;
lineY += lineHeight;
}
return y;
}
/**
* <a href="http://stackoverflow.com/questions/41767351/create-pkcs7-signature-from-file-digest">
* Create pkcs7 signature from file digest
* </a>
* <p>
* A minimal signing frame work merely requiring a {@link SignatureInterface}
* instance.
* </p>
*/
void sign(PDDocument document, OutputStream output, SignatureInterface signatureInterface) throws IOException
{
PDSignature signature = new PDSignature();
signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE);
signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED);
signature.setName("Example User");
signature.setLocation("Los Angeles, CA");
signature.setReason("Testing");
signature.setSignDate(Calendar.getInstance());
document.addSignature(signature);
ExternalSigningSupport externalSigning =
document.saveIncrementalForExternalSigning(output);
// invoke external signature service
byte[] cmsSignature = signatureInterface.sign(externalSigning.getContent());
// set signature bytes received from the service
externalSigning.setSignature(cmsSignature);
}
@Test
public void testPdfBox() throws IOException {
File pdfFile = new File(PdfHelper.PDF_TEST_RESOURCES + "pdf/1.pdf");
File outDir = new File("target");
PDDocument document = PDDocument.load(pdfFile);
@SuppressWarnings("unchecked")
List<PDPage> pages = document.getDocumentCatalog().getAllPages();
int imageId = 0;
for (PDPage page : pages) {
for (PDXObjectImage img : page.getResources().getImages().values()) {
int height = img.getHeight();
int width = img.getWidth();
System.out.println(img.getCOSStream().toString());
img.write2file(new File(outDir, imageId++ + "."
+ img.getSuffix()));
}
}
}
/**
* Gets PDF Page Count.
*
* @param inputPdfFile input file
* @return number of pages
*/
public static int getPdfPageCount(File inputPdfFile) {
PDDocument document = null;
try {
document = PDDocument.load(inputPdfFile);
return document.getNumberOfPages();
} catch (IOException ioe) {
logger.error("Error counting PDF pages => " + ioe);
return - 1;
} finally {
if (document != null) {
try {
document.close();
} catch (Exception e) {
}
}
}
}
void printSubwords(PDDocument document, String searchTerm) throws IOException
{
System.out.printf("* Looking for '%s'\n", searchTerm);
for (int page = 1; page <= document.getNumberOfPages(); page++)
{
List<TextPositionSequence> hits = findSubwords(document, page, searchTerm);
for (TextPositionSequence hit : hits)
{
if (!searchTerm.equals(hit.toString()))
System.out.printf(" Invalid (%s) ", hit.toString());
TextPosition lastPosition = hit.textPositionAt(hit.length() - 1);
System.out.printf(" Page %s at %s, %s with width %s and last letter '%s' at %s, %s\n",
page, hit.getX(), hit.getY(), hit.getWidth(),
lastPosition.getUnicode(), lastPosition.getXDirAdj(), lastPosition.getYDirAdj());
}
}
}
private int splitByFilesNumber(PDDocument source) {
try {
int total = currentParameters.toPage - currentParameters.fromPage + 1;
int len;
if (total % filesNumber == 0) {
len = total / filesNumber;
} else {
len = total / filesNumber + 1;
}
Splitter splitter = new Splitter();
splitter.setStartPage(currentParameters.fromPage); // 1-based
splitter.setEndPage(currentParameters.toPage); // 1-based
splitter.setMemoryUsageSetting(AppVariables.pdfMemUsage);
splitter.setSplitAtPage(len);
List<PDDocument> docs = splitter.split(source);
return writeFiles(docs);
} catch (Exception e) {
logger.error(e.toString());
return 0;
}
}
/**
* <a href="https://stackoverflow.com/questions/54956720/how-to-replace-a-space-with-a-word-while-extract-the-data-from-pdf-using-pdfbox">
* How to replace a space with a word while extract the data from PDF using PDFBox
* </a>
* <br/>
* <a href="https://drive.google.com/open?id=10ZkdPlGWzMJeahwnQPzE6V7s09d1nvwq">
* test.pdf
* </a> as "testWPhromma.pdf"
* <p>
* This test shows how to, in principle, extract tagged text.
* </p>
*/
@Test
public void testExtractTestWPhromma() throws IOException {
System.out.printf("\n\n===\n%s\n===\n", "testWPhromma.pdf");
try ( InputStream resource = getClass().getResourceAsStream("testWPhromma.pdf")) {
PDDocument document = Loader.loadPDF(resource);
Map<PDPage, Map<Integer, PDMarkedContent>> markedContents = new HashMap<>();
for (PDPage page : document.getPages()) {
PDFMarkedContentExtractor extractor = new PDFMarkedContentExtractor();
extractor.processPage(page);
Map<Integer, PDMarkedContent> theseMarkedContents = new HashMap<>();
markedContents.put(page, theseMarkedContents);
for (PDMarkedContent markedContent : extractor.getMarkedContents()) {
theseMarkedContents.put(markedContent.getMCID(), markedContent);
}
}
PDStructureNode root = document.getDocumentCatalog().getStructureTreeRoot();
showStructure(root, markedContents);
}
}
private PDDocument getDocument(RandomAccessRead source) throws IOException, BleachException {
PDDocument doc;
for (String pwd : COMMON_PASSWORDS) {
ScratchFile scratchFile = new ScratchFile(MEMORY_USAGE_SETTING);
doc = testPassword(scratchFile, source, pwd);
if (doc != null) {
LOGGER.debug("Password was guessed: '{}'", pwd);
doc.protect(new StandardProtectionPolicy(pwd, pwd, doc.getCurrentAccessPermission()));
return doc;
}
scratchFile.close();
}
// @TODO: fetch password from config?
throw new BleachException("PDF is protected with an unknown password");
}
/**
* Constructor.
*
* @param doc The document that this form is part of.
*/
public PDAcroForm(PDDocument doc)
{
document = doc;
dictionary = new COSDictionary();
dictionary.setItem(COSName.FIELDS, new COSArray());
}
/**
* <a href="http://stackoverflow.com/questions/39720305/ufffd-is-not-available-in-this-fonts-encoding-winansiencoding">
* U+FFFD is not available in this font's encoding: WinAnsiEncoding
* </a>
* <p>
* The issue cannot be reproduced.
* </p>
*/
@Test
public void testFillLikeStDdt() throws IOException
{
try ( InputStream originalStream = getClass().getResourceAsStream("FillFormField.pdf") )
{
PDDocument pdfDocument = Loader.loadPDF(originalStream);
PDAcroForm acroForm = pdfDocument.getDocumentCatalog().getAcroForm();
if (acroForm != null)
{
List<PDField> fields = acroForm.getFields();
for (PDField field : fields) {
switch (field.getPartialName()) {
case "Title" /*"devices"*/:
field.setValue("Ger�t");
field.setReadOnly(true);
break;
}
}
acroForm.flatten(fields, true);
}
pdfDocument.save(new File(RESULT_FOLDER, "FillFormFieldStDdt.pdf"));
pdfDocument.close();
}
}
/**
* Constructor.
*
* @param doc The document that this form is part of.
*/
public PDAcroForm(PDDocument doc)
{
document = doc;
dictionary = new COSDictionary();
dictionary.setItem(COSName.FIELDS, new COSArray());
}
/**
* Overlay a generated PDF document with another PDF (containing the company
* stationary for example)
*
* @param context
* @param generatedDocumentMendixObject The document to overlay
* @param overlayMendixObject The document containing the overlay
* @param onTopOfContent if true, puts overlay position in the foreground, otherwise in the background
* @return boolean
* @throws IOException
*/
public static boolean overlayPdf(IContext context, IMendixObject generatedDocumentMendixObject, IMendixObject overlayMendixObject, boolean onTopOfContent) throws IOException {
LOG.trace("Retrieve generated document");
try (
PDDocument inputDoc = PDDocument.load(Core.getFileDocumentContent(context, generatedDocumentMendixObject));
PDDocument overlayDoc = PDDocument.load(Core.getFileDocumentContent(context, overlayMendixObject));
ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
LOG.trace("Overlay PDF start, retrieve overlay PDF");
LOG.trace("Perform overlay");
Overlay overlay = new Overlay();
overlay.setInputPDF(inputDoc);
overlay.setDefaultOverlayPDF(overlayDoc);
if (onTopOfContent == true) {
overlay.setOverlayPosition(Overlay.Position.FOREGROUND);
} else {
overlay.setOverlayPosition(Overlay.Position.BACKGROUND);
}
LOG.trace("Save result in output stream");
overlay.overlay(new HashMap<>()).save(baos);
LOG.trace("Duplicate result in input stream");
try (InputStream overlayedContent = new ByteArrayInputStream(baos.toByteArray())) {
LOG.trace("Store result in original document");
Core.storeFileDocumentContent(context, generatedDocumentMendixObject, overlayedContent);
}
}
LOG.trace("Overlay PDF end");
return true;
}
/**
* create the second sample document from the PDF file format specification.
*
* @param input
* The PDF path to add the information to.
* @param details
* The details to be added.
*
* @throws IOException
* If there is an error writing the data.
*/
public static void addInformation(Path input, ExportDetails details) throws IOException {
try (PDDocument doc = PDDocument.load(input.toFile())) {
if (details.getPageNumbering() != null) {
writePageNumbering(doc, PDType1Font.HELVETICA_BOLD, 16.0f, details.getPageNumbering());
}
if (details.getFrontpageDetails() != null) {
writeFrontpageDetails(doc, PDType1Font.HELVETICA_BOLD, 18.0f, details.getFrontpageDetails());
}
doc.save(input.toFile());
}
}
@Test
public void vri() throws Exception {
String path = "/validation/Signature-P-HU_MIC-3.pdf";
String vriValue = "C41B1DBFE0E816D8A6F99A9DB98FD43960A5CF45";
PDDocument pdDoc = PDDocument.load(getClass().getResourceAsStream(path));
List<PDSignature> signatureDictionaries = pdDoc.getSignatureDictionaries();
assertTrue(Utils.isCollectionNotEmpty(signatureDictionaries));
PDSignature pdSignature = signatureDictionaries.get(0);
byte[] contents = pdSignature.getContents(getClass().getResourceAsStream(path));
byte[] digest = DSSUtils.digest(DigestAlgorithm.SHA1, contents);
assertEquals(vriValue, Utils.upperCase(Utils.toHex(digest)));
// We can't use CMSSignedData, the pdSignature content is trimmed (000000)
}
public static void drawImage(final BufferedImage image,
final PDDocument document, final PDPageContentStream contentStream,
Position upperLeft, final float width, final float height)
throws IOException {
PDXObjectImage cachedImage = getCachedImage(document, image);
float x = upperLeft.getX();
float y = upperLeft.getY() - height;
contentStream.drawXObject(cachedImage, x, y, width, height);
}
private PDDocument removeText(PDPage page) throws IOException {
PDFStreamParser parser = new PDFStreamParser(page);
parser.parse();
List<Object> tokens = parser.getTokens();
List<Object> newTokens = new ArrayList<>();
for (Object token : tokens) {
if (token instanceof Operator) {
Operator op = (Operator) token;
if (op.getName().equals("TJ") || op.getName().equals("Tj")) {
//remove the one argument to this operator
newTokens.remove(newTokens.size() - 1);
continue;
}
}
newTokens.add(token);
}
PDDocument document = new PDDocument();
PDPage newPage = document.importPage(page);
newPage.setResources(page.getResources());
PDStream newContents = new PDStream(document);
OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE);
ContentStreamWriter writer = new ContentStreamWriter(out);
writer.writeTokens(newTokens);
out.close();
newPage.setContents(newContents);
return document;
}
private COSObject getByObjectNumber(PDDocument pdDocument, Long objectNumber) {
List<COSObject> objects = pdDocument.getDocument().getObjects();
for (COSObject cosObject : objects) {
if (cosObject.getObjectNumber() == objectNumber) {
return cosObject;
}
}
return null;
}
@Test
public void createTitleBlockForDocumentIterationTest() throws Exception {
DocumentTitleBlockData documentTitleData = new DocumentTitleBlockData(documentIteration, new Locale("en"));
byte[] titleBlock = new TitleBlockWriter(documentTitleData).createTitleBlock();
PDDocument loadedDocument = PDDocument.load(titleBlock);
Assert.assertNotNull(loadedDocument);
String text = new PDFTextStripper().getText(loadedDocument);
loadedDocument.close();
Assert.assertFalse(text.isEmpty());
Assert.assertTrue(text.contains(user.getLogin()));
Assert.assertTrue(text.contains(documentIteration.getId()));
Assert.assertTrue(text.contains(documentIteration.getDocumentRevision().getDescription()));
}
/**
* Creates a PDDocument and adds the page parameter to it and keeps this as a template in the
* PDF template Structure.
*
* @param page
* @throws IOException
*/
@Override
public void createTemplate(PDPage page) throws IOException
{
PDDocument template = new PDDocument();
template.addPage(page);
pdfStructure.setTemplate(template);
}