PdfExtractor.h
1 #pragma once
2 // Copyright (c) 2001-2024 Aspose Pty Ltd. All Rights Reserved.
3 
4 // C# preprocessor directive: #if ASPOSE_PDF_DRAWING
5 
6 // C# INACTIVE CODE:
7 // using ImageFormat = Aspose.Pdf.Drawing.ImageFormat;
8 
9 // C# preprocessor directive: #else
10 
11 
12 // C# preprocessor directive: #endif
13 
14 
15 #include <system/array.h>
16 #include <cstdint>
17 
18 #include "Aspose.PDF.Cpp/Facades/Facade.h"
19 #include "Aspose.PDF.Cpp/aspose_pdf_api_defs.h"
20 
21 namespace Aspose
22 {
23 namespace Pdf
24 {
25 class Document;
26 enum class ExtractImageMode;
27 namespace Facades
28 {
29 class PdfContentEditor;
30 } // namespace Facades
31 class FileSpecification;
32 namespace LicenseManagement
33 {
34 class VentureLicense;
35 class VentureLicenseDecoder;
36 } // namespace LicenseManagement
37 namespace Text
38 {
39 class TextSearchOptions;
40 } // namespace Text
41 class XForm;
42 class XImage;
43 } // namespace Pdf
44 } // namespace Aspose
45 namespace System
46 {
47 namespace Collections
48 {
49 namespace Generic
50 {
51 template <typename, typename> class Dictionary;
52 template <typename> class IList;
53 template <typename> class List;
54 } // namespace Generic
55 } // namespace Collections
56 namespace Drawing
57 {
58 namespace Imaging
59 {
60 class ImageFormat;
61 } // namespace Imaging
62 } // namespace Drawing
63 namespace IO
64 {
65 class MemoryStream;
66 class Stream;
67 } // namespace IO
68 namespace Text
69 {
70 class Encoding;
71 } // namespace Text
72 } // namespace System
73 
74 namespace Aspose {
75 
76 namespace Pdf {
77 
78 namespace Facades {
79 
83 class ASPOSE_PDF_SHARED_CLASS PdfExtractor final : public Aspose::Pdf::Facades::Facade
84 {
85  typedef PdfExtractor ThisType;
87 
88  typedef ::System::BaseTypesInfo<BaseType> ThisTypeBaseTypesInfo;
89  ASPOSE_PDF_SHARED_RTTI_INFO_DECL();
90 
91  friend class Aspose::Pdf::LicenseManagement::VentureLicenseDecoder;
93 
94 public:
95 
101  ASPOSE_PDF_SHARED_API int32_t get_StartPage() const;
107  ASPOSE_PDF_SHARED_API void set_StartPage(int32_t value);
113  ASPOSE_PDF_SHARED_API int32_t get_EndPage() const;
119  ASPOSE_PDF_SHARED_API void set_EndPage(int32_t value);
126  ASPOSE_PDF_SHARED_API int32_t get_ExtractTextMode() const;
133  ASPOSE_PDF_SHARED_API void set_ExtractTextMode(int32_t value);
150  ASPOSE_PDF_SHARED_API Aspose::Pdf::ExtractImageMode get_ExtractImageMode() const;
159  ASPOSE_PDF_SHARED_API void set_ExtractImageMode(Aspose::Pdf::ExtractImageMode value);
165  ASPOSE_PDF_SHARED_API bool get_IsBidi();
173  ASPOSE_PDF_SHARED_API int32_t get_Resolution() const;
181  ASPOSE_PDF_SHARED_API void set_Resolution(int32_t value);
185  ASPOSE_PDF_SHARED_API System::String get_Password() const;
189  ASPOSE_PDF_SHARED_API void set_Password(System::String value);
190 
196  ASPOSE_PDF_SHARED_API void ExtractText();
203  //[Obfuscation(Feature = "virtualization", Exclude = false)]
204  ASPOSE_PDF_SHARED_API void ExtractText(System::SharedPtr<System::Text::Encoding> encoding);
209  ASPOSE_PDF_SHARED_API void GetText(System::String outputFile);
214  ASPOSE_PDF_SHARED_API void GetText(System::SharedPtr<System::IO::Stream> outputStream);
221  ASPOSE_PDF_SHARED_API void BindPdf(System::String inputFile) override;
228  ASPOSE_PDF_SHARED_API void BindPdf(System::SharedPtr<System::IO::Stream> inputStream) override;
234  ASPOSE_PDF_SHARED_API void ExtractImage();
241  ASPOSE_PDF_SHARED_API bool HasNextImage();
249  ASPOSE_PDF_SHARED_API bool GetNextImage(System::String outputFile);
256  ASPOSE_PDF_SHARED_API bool GetNextImage(System::String outputFile, System::SharedPtr<System::Drawing::Imaging::ImageFormat> format);
263  ASPOSE_PDF_SHARED_API bool GetNextImage(System::SharedPtr<System::IO::Stream> outputStream, System::SharedPtr<System::Drawing::Imaging::ImageFormat> format);
269  ASPOSE_PDF_SHARED_API bool GetNextImage(System::SharedPtr<System::IO::Stream> outputStream);
276  ASPOSE_PDF_SHARED_API System::SharedPtr<System::Collections::Generic::IList<System::String>> GetAttachNames();
280  ASPOSE_PDF_SHARED_API void ExtractAttachment();
285  ASPOSE_PDF_SHARED_API void ExtractAttachment(System::String attachmentFileName);
293  ASPOSE_PDF_SHARED_API void GetAttachment(System::String outputPath);
300  ASPOSE_PDF_SHARED_API bool HasNextPageText();
307  ASPOSE_PDF_SHARED_API void GetNextPageText(System::String outputFile);
314  ASPOSE_PDF_SHARED_API void GetNextPageText(System::SharedPtr<System::IO::Stream> outputStream);
315 
319  ASPOSE_PDF_SHARED_API PdfExtractor();
326  ASPOSE_PDF_SHARED_API PdfExtractor(System::SharedPtr<Aspose::Pdf::Document> document);
327 
333  ASPOSE_PDF_SHARED_API void GetText(System::SharedPtr<System::IO::Stream> outputStream, bool filterNotAscii);
340  ASPOSE_PDF_SHARED_API System::ArrayPtr<System::SharedPtr<System::IO::MemoryStream>> GetAttachment();
346 
347 protected:
348 
352  bool get__IsObjectLicensed();
353 
354  void SetVentureLicense(System::SharedPtr<Aspose::Pdf::LicenseManagement::VentureLicense> license) override;
357  void InitPageImages(System::SharedPtr<System::Collections::Generic::List<System::SharedPtr<XImage>>> images, System::SharedPtr<Aspose::Pdf::Document> document, int32_t page, int32_t endPage);
358  static void InitPageXFormImages_ActuallyUsed(System::SharedPtr<System::Collections::Generic::List<System::SharedPtr<XImage>>> images, System::SharedPtr<Aspose::Pdf::Document> document, int32_t page, int32_t endPage);
359  void InitPageXFormImages_DefinedInResources(System::SharedPtr<System::Collections::Generic::List<System::SharedPtr<XImage>>> images, System::SharedPtr<Aspose::Pdf::Document> document, int32_t page, int32_t endPage);
360 
361 private:
362 
363  int32_t _startPage;
364  int32_t _endPage;
365  int32_t _currentPage;
367  int32_t _currentImage;
368  int32_t _currentNoNameAttachment;
369  System::String _attachmentFileName;
371  int32_t _extractTextMode;
372  Aspose::Pdf::ExtractImageMode _extractImageMode;
374  int32_t _resolution;
375  System::String _password;
380 
381  void WriteAttachment(System::SharedPtr<FileSpecification> file, System::String outputPath);
382  System::ArrayPtr<System::SharedPtr<System::IO::MemoryStream>> GetAttachment(System::String outputPath, bool fileOut);
384  void InitPageBounds();
385  bool IsArabic(char16_t chr);
386  bool IsHebriew(char16_t chr);
387  void Reset();
388 
389 };
390 
391 } // namespace Facades
392 } // namespace Pdf
393 } // namespace Aspose
394 
395 
String class used across the library. Is a substitute for C# System.String when translating code...
Definition: string.h:121
class ASPOSECPP_SHARED_CLASS List
Definition: ienumerable.h:17
Definition: Artifact.h:71
Base facade class.
Definition: Facade.h:57
ExtractImageMode
Defines different modes which can be used while extracting images from documents. ...
Definition: ExtractImageMode.h:11
The type that supports reliable, two-way, connection-based byte streams without duplication of data a...
ASPOSE_PDF_SHARED_API void set_TextSearchOptions(System::SharedPtr< Aspose::Pdf::Text::TextSearchOptions > value)
Sets text search options.
Pointer class to wrap types being allocated on heap. Use it to manage memory for classes inheriting O...
Definition: SideBySidePdfComparer.h:24
ASPOSE_PDF_SHARED_API void BindPdf(System::String inputFile) override
Binds a PDF file for editing.
General facade interface that defines common facades methods.
Definition: IFacade.h:33
Definition: AFRelationship.h:4
ASPOSE_PDF_SHARED_API System::SharedPtr< Aspose::Pdf::Text::TextSearchOptions > get_TextSearchOptions() const
Gets text search options.
List forward declaration.
Definition: Artifact.h:78
Class for extracting images and text from PDF document.
Definition: PdfExtractor.h:83
Represents a class to edit PDF file&#39;s content.
Definition: PdfContentEditor.h:89
ImageFormat
This enum represents image formats.
Definition: ImageFormat.h:13
bool get__IsObjectLicensed()
Gets licensed state of the system. Returns true is system works in licensed mode and false otherwise...
Whole PDF file will be submitted.