//======================================================================== // // PDFDoc.h // // Copyright 1996-2003 Glyph & Cog, LLC // //======================================================================== //======================================================================== // // Modified under the Poppler project - http://poppler.freedesktop.org // // All changes made under the Poppler project to this file are licensed // under GPL version 2 or later // // Copyright (C) 2005, 2006, 2008 Brad Hards // Copyright (C) 2005, 2009, 2014, 2015, 2017-2020 Albert Astals Cid // Copyright (C) 2008 Julien Rebetez // Copyright (C) 2008 Pino Toscano // Copyright (C) 2008 Carlos Garcia Campos // Copyright (C) 2009 Eric Toombs // Copyright (C) 2009 Kovid Goyal // Copyright (C) 2010, 2014 Hib Eris // Copyright (C) 2010 Srinivas Adicherla // Copyright (C) 2011, 2013, 2014, 2016 Thomas Freitag // Copyright (C) 2012 Fabio D'Urso // Copyright (C) 2013, 2017 Adrian Johnson // Copyright (C) 2013, 2018 Adam Reichold // Copyright (C) 2013 Adrian Perez de Castro // Copyright (C) 2015 André Guerreiro // Copyright (C) 2015 André Esser // Copyright (C) 2016 Jakub Alba // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, . Work sponsored by the LiMux project of the city of Munich // Copyright (C) 2018 Evangelos Rigas // Copyright (C) 2020 Oliver Sander // Copyright (C) 2020 Nelson Benítez León // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git // //======================================================================== #ifndef PDFDOC_H #define PDFDOC_H #include #include "poppler-config.h" #include #include "XRef.h" #include "Catalog.h" #include "Page.h" #include "Annot.h" #include "Form.h" #include "OptionalContent.h" #include "Stream.h" class GooString; class GooFile; class BaseStream; class OutputDev; class Links; class LinkAction; class LinkDest; class Outline; class Linearization; class SecurityHandler; class Hints; class StructTreeRoot; enum PDFWriteMode { writeStandard, writeForceRewrite, writeForceIncremental }; enum PDFSubtype { subtypeNull, subtypePDFA, subtypePDFE, subtypePDFUA, subtypePDFVT, subtypePDFX, subtypeNone }; enum PDFSubtypePart { subtypePartNull, subtypePart1, subtypePart2, subtypePart3, subtypePart4, subtypePart5, subtypePart6, subtypePart7, subtypePart8, subtypePartNone }; enum PDFSubtypeConformance { subtypeConfNull, subtypeConfA, subtypeConfB, subtypeConfG, subtypeConfN, subtypeConfP, subtypeConfPG, subtypeConfU, subtypeConfNone }; //------------------------------------------------------------------------ // PDFDoc //------------------------------------------------------------------------ class PDFDoc { public: PDFDoc(const GooString *fileNameA, const GooString *ownerPassword = nullptr, const GooString *userPassword = nullptr, void *guiDataA = nullptr); #ifdef _WIN32 PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword = nullptr, GooString *userPassword = nullptr, void *guiDataA = nullptr); #endif PDFDoc(BaseStream *strA, const GooString *ownerPassword = nullptr, const GooString *userPassword = nullptr, void *guiDataA = nullptr); ~PDFDoc(); PDFDoc(const PDFDoc &) = delete; PDFDoc &operator=(const PDFDoc &) = delete; static PDFDoc *ErrorPDFDoc(int errorCode, const GooString *fileNameA = nullptr); // Was PDF document successfully opened? bool isOk() const { return ok; } // Get the error code (if isOk() returns false). int getErrorCode() const { return errCode; } // Get the error code returned by fopen() (if getErrorCode() == // errOpenFile). int getFopenErrno() const { return fopenErrno; } // Get file name. const GooString *getFileName() const { return fileName; } #ifdef _WIN32 wchar_t *getFileNameU() { return fileNameU; } #endif // Get the linearization table. Linearization *getLinearization(); bool checkLinearization(); // Get the xref table. XRef *getXRef() const { return xref; } // Get catalog. Catalog *getCatalog() const { return catalog; } // Get optional content configuration OCGs *getOptContentConfig() const { return catalog->getOptContentConfig(); } // Get base stream. BaseStream *getBaseStream() const { return str; } // Get page parameters. double getPageMediaWidth(int page) { return getPage(page) ? getPage(page)->getMediaWidth() : 0.0; } double getPageMediaHeight(int page) { return getPage(page) ? getPage(page)->getMediaHeight() : 0.0; } double getPageCropWidth(int page) { return getPage(page) ? getPage(page)->getCropWidth() : 0.0; } double getPageCropHeight(int page) { return getPage(page) ? getPage(page)->getCropHeight() : 0.0; } int getPageRotate(int page) { return getPage(page) ? getPage(page)->getRotate() : 0; } // Get number of pages. int getNumPages(); // Return the contents of the metadata stream, or nullptr if there is // no metadata. const GooString *readMetadata() const { return catalog->readMetadata(); } // Return the structure tree root object. const StructTreeRoot *getStructTreeRoot() const { return catalog->getStructTreeRoot(); } // Get page. Page *getPage(int page); // Display a page. void displayPage(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data) = nullptr, void *abortCheckCbkData = nullptr, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr, bool copyXRef = false); // Display a range of pages. void displayPages(OutputDev *out, int firstPage, int lastPage, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data) = nullptr, void *abortCheckCbkData = nullptr, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr); // Display part of a page. void displayPageSlice(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, int sliceX, int sliceY, int sliceW, int sliceH, bool (*abortCheckCbk)(void *data) = nullptr, void *abortCheckCbkData = nullptr, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr, bool copyXRef = false); // Find a page, given its object ID. Returns page number, or 0 if // not found. int findPage(const Ref ref) { return catalog->findPage(ref); } // Returns the links for the current page, transferring ownership to // the caller. Links *getLinks(int page); // Find a named destination. Returns the link destination, or // nullptr if is not a destination. std::unique_ptr findDest(const GooString *name) { return catalog->findDest(name); } // Process the links for a page. void processLinks(OutputDev *out, int page); // Return the outline object. Outline *getOutline(); // Is the file encrypted? bool isEncrypted() { return xref->isEncrypted(); } std::vector getSignatureFields(); // Check various permissions. bool okToPrint(bool ignoreOwnerPW = false) { return xref->okToPrint(ignoreOwnerPW); } bool okToPrintHighRes(bool ignoreOwnerPW = false) { return xref->okToPrintHighRes(ignoreOwnerPW); } bool okToChange(bool ignoreOwnerPW = false) { return xref->okToChange(ignoreOwnerPW); } bool okToCopy(bool ignoreOwnerPW = false) { return xref->okToCopy(ignoreOwnerPW); } bool okToAddNotes(bool ignoreOwnerPW = false) { return xref->okToAddNotes(ignoreOwnerPW); } bool okToFillForm(bool ignoreOwnerPW = false) { return xref->okToFillForm(ignoreOwnerPW); } bool okToAccessibility(bool ignoreOwnerPW = false) { return xref->okToAccessibility(ignoreOwnerPW); } bool okToAssemble(bool ignoreOwnerPW = false) { return xref->okToAssemble(ignoreOwnerPW); } // Is this document linearized? bool isLinearized(bool tryingToReconstruct = false); // Return the document's Info dictionary (if any). Object getDocInfo() { return xref->getDocInfo(); } Object getDocInfoNF() { return xref->getDocInfoNF(); } // Remove the document's Info dictionary and update the trailer dictionary. void removeDocInfo() { xref->removeDocInfo(); } // Set doc info string entry. nullptr or empty value will cause a removal. // Takes ownership of value. void setDocInfoStringEntry(const char *key, GooString *value); // Set document's properties in document's Info dictionary. // nullptr or empty value will cause a removal. // Takes ownership of value. void setDocInfoTitle(GooString *title) { setDocInfoStringEntry("Title", title); } void setDocInfoAuthor(GooString *author) { setDocInfoStringEntry("Author", author); } void setDocInfoSubject(GooString *subject) { setDocInfoStringEntry("Subject", subject); } void setDocInfoKeywords(GooString *keywords) { setDocInfoStringEntry("Keywords", keywords); } void setDocInfoCreator(GooString *creator) { setDocInfoStringEntry("Creator", creator); } void setDocInfoProducer(GooString *producer) { setDocInfoStringEntry("Producer", producer); } void setDocInfoCreatDate(GooString *creatDate) { setDocInfoStringEntry("CreationDate", creatDate); } void setDocInfoModDate(GooString *modDate) { setDocInfoStringEntry("ModDate", modDate); } // Get document's properties from document's Info dictionary. // Returns nullptr on fail. // Returned GooStrings should be freed by the caller. GooString *getDocInfoStringEntry(const char *key); GooString *getDocInfoTitle() { return getDocInfoStringEntry("Title"); } GooString *getDocInfoAuthor() { return getDocInfoStringEntry("Author"); } GooString *getDocInfoSubject() { return getDocInfoStringEntry("Subject"); } GooString *getDocInfoKeywords() { return getDocInfoStringEntry("Keywords"); } GooString *getDocInfoCreator() { return getDocInfoStringEntry("Creator"); } GooString *getDocInfoProducer() { return getDocInfoStringEntry("Producer"); } GooString *getDocInfoCreatDate() { return getDocInfoStringEntry("CreationDate"); } GooString *getDocInfoModDate() { return getDocInfoStringEntry("ModDate"); } // Return the PDF subtype, part, and conformance PDFSubtype getPDFSubtype() const { return pdfSubtype; } PDFSubtypePart getPDFSubtypePart() const { return pdfPart; } PDFSubtypeConformance getPDFSubtypeConformance() const { return pdfConformance; } // Return the PDF version specified by the file. int getPDFMajorVersion() const { return pdfMajorVersion; } int getPDFMinorVersion() const { return pdfMinorVersion; } // Return the PDF ID in the trailer dictionary (if any). bool getID(GooString *permanent_id, GooString *update_id) const; // Save one page with another name. int savePageAs(const GooString *name, int pageNo); // Save this file with another name. int saveAs(const GooString *name, PDFWriteMode mode = writeStandard); // Save this file in the given output stream. int saveAs(OutStream *outStr, PDFWriteMode mode = writeStandard); // Save this file with another name without saving changes int saveWithoutChangesAs(const GooString *name); // Save this file in the given output stream without saving changes int saveWithoutChangesAs(OutStream *outStr); // Return a pointer to the GUI (XPDFCore or WinPDFCore object). void *getGUIData() { return guiData; } // rewrite pageDict with MediaBox, CropBox and new page CTM void replacePageDict(int pageNo, int rotate, const PDFRectangle *mediaBox, const PDFRectangle *cropBox); void markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set *alreadyMarkedDicts = nullptr); bool markAnnotations(Object *annots, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldPageNum, int newPageNum, std::set *alreadyMarkedDicts = nullptr); void markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum); // write all objects used by pageDict to outStr unsigned int writePageObjects(OutStream *outStr, XRef *xRef, unsigned int numOffset, bool combine = false); static void writeObject(Object *obj, OutStream *outStr, XRef *xref, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set *alreadyWrittenDicts = nullptr); static void writeObject(Object *obj, OutStream *outStr, XRef *xref, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set *alreadyWrittenDicts = nullptr); static void writeHeader(OutStream *outStr, int major, int minor); static Object createTrailerDict(int uxrefSize, bool incrUpdate, Goffset startxRef, Ref *root, XRef *xRef, const char *fileName, Goffset fileSize); static void writeXRefTableTrailer(Object &&trailerDict, XRef *uxref, bool writeAllEntries, Goffset uxrefOffset, OutStream *outStr, XRef *xRef); static void writeXRefStreamTrailer(Object &&trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream *outStr, XRef *xRef); // scans the PDF and returns whether it contains any javascript bool hasJavascript(); private: // insert referenced objects in XRef void markDictionnary(Dict *dict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set *alreadyMarkedDicts); void markObject(Object *obj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set *alreadyMarkedDicts = nullptr); static void writeDictionnary(Dict *dict, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set *alreadyWrittenDicts); // Write object header to current file stream and return its offset static Goffset writeObjectHeader(Ref *ref, OutStream *outStr); static void writeObjectFooter(OutStream *outStr); inline void writeObject(Object *obj, OutStream *outStr, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen) { writeObject(obj, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, { objNum, objGen }); } inline void writeObject(Object *obj, OutStream *outStr, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref) { writeObject(obj, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, ref); } static void writeStream(Stream *str, OutStream *outStr); static void writeRawStream(Stream *str, OutStream *outStr); void writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, bool writeAllEntries, int uxrefSize, OutStream *outStr, bool incrUpdate); static void writeString(const GooString *s, OutStream *outStr, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref); void saveIncrementalUpdate(OutStream *outStr); void saveCompleteRewrite(OutStream *outStr); Page *parsePage(int page); // Get hints. Hints *getHints(); PDFDoc(); void init(); bool setup(const GooString *ownerPassword, const GooString *userPassword); bool checkFooter(); void checkHeader(); bool checkEncryption(const GooString *ownerPassword, const GooString *userPassword); void extractPDFSubtype(); // Get the offset of the start xref table. Goffset getStartXRef(bool tryingToReconstruct = false); // Get the offset of the entries in the main XRef table of a // linearized document (0 for non linearized documents). Goffset getMainXRefEntriesOffset(bool tryingToReconstruct = false); long long strToLongLong(const char *s); const GooString *fileName; #ifdef _WIN32 wchar_t *fileNameU; #endif GooFile *file; BaseStream *str; void *guiData; int pdfMajorVersion; int pdfMinorVersion; PDFSubtype pdfSubtype; PDFSubtypePart pdfPart; PDFSubtypeConformance pdfConformance; Linearization *linearization; // linearizationState = 0: unchecked // linearizationState = 1: checked and valid // linearizationState = 2: checked and invalid int linearizationState; XRef *xref; SecurityHandler *secHdlr; Catalog *catalog; Hints *hints; Outline *outline; Page **pageCache; bool ok; int errCode; // If there is an error opening the PDF file with fopen() in the constructor, // then the POSIX errno will be here. int fopenErrno; Goffset startXRefPos; // offset of last xref table mutable std::recursive_mutex mutex; }; #endif