TextAbsorber.h
1 #pragma once
2 // Copyright (c) 2001-2024 Aspose Pty Ltd. All Rights Reserved.
3 
4 #include <system/object.h>
5 #include <cstdint>
6 
7 #include "Aspose.PDF.Cpp/Text/TextOptions/TextExtractionOptions.h"
8 #include "Aspose.PDF.Cpp/aspose_pdf_api_defs.h"
9 
10 namespace Aspose
11 {
12 namespace Pdf
13 {
14 class Document;
15 namespace Engine
16 {
17 namespace CommonData
18 {
19 namespace Text
20 {
21 namespace Segmenting
22 {
23 class TextSegmenter;
24 } // namespace Segmenting
25 } // namespace Text
26 } // namespace CommonData
27 } // namespace Engine
28 namespace GroupProcessor
29 {
30 class TextExtractor;
31 } // namespace GroupProcessor
32 class Page;
33 namespace Text
34 {
35 class TextExtractionError;
36 class TextSearchOptions;
37 } // namespace Text
38 class XForm;
39 } // namespace Pdf
40 } // namespace Aspose
41 namespace System
42 {
43 namespace Collections
44 {
45 namespace Generic
46 {
47 template <typename> class List;
48 } // namespace Generic
49 } // namespace Collections
50 class String;
51 namespace Text
52 {
53 class StringBuilder;
54 } // namespace Text
55 } // namespace System
56 
57 namespace Aspose {
58 
59 namespace Pdf {
60 
61 namespace Text {
62 
72 class ASPOSE_PDF_SHARED_CLASS TextAbsorber : public System::Object
73 {
74  typedef TextAbsorber ThisType;
75  typedef System::Object BaseType;
76 
77  typedef ::System::BaseTypesInfo<BaseType> ThisTypeBaseTypesInfo;
78  ASPOSE_PDF_SHARED_RTTI_INFO_DECL();
79 
80  friend class Aspose::Pdf::GroupProcessor::TextExtractor;
81 
82 public:
83 
89  virtual ASPOSE_PDF_SHARED_API System::String get_Text();
94  ASPOSE_PDF_SHARED_API bool get_HasErrors() const;
109  virtual ASPOSE_PDF_SHARED_API System::SharedPtr<TextExtractionOptions> get_ExtractionOptions();
119  virtual ASPOSE_PDF_SHARED_API void set_ExtractionOptions(System::SharedPtr<TextExtractionOptions> value);
127  virtual ASPOSE_PDF_SHARED_API System::SharedPtr<Aspose::Pdf::Text::TextSearchOptions> get_TextSearchOptions();
135  virtual ASPOSE_PDF_SHARED_API void set_TextSearchOptions(System::SharedPtr<Aspose::Pdf::Text::TextSearchOptions> value);
136 
143  virtual ASPOSE_PDF_SHARED_API void Visit(System::SharedPtr<Page> page);
150  virtual ASPOSE_PDF_SHARED_API void Visit(System::SharedPtr<XForm> form);
157  virtual ASPOSE_PDF_SHARED_API void Visit(System::SharedPtr<Document> pdf);
158 
167  ASPOSE_PDF_SHARED_API TextAbsorber();
177  ASPOSE_PDF_SHARED_API TextAbsorber(System::SharedPtr<TextExtractionOptions> extractionOptions);
194  ASPOSE_PDF_SHARED_API TextAbsorber(System::SharedPtr<Aspose::Pdf::Text::TextSearchOptions> textSearchOptions);
195 
196 protected:
197 
199 
201 
203 
204 private:
205 
206  static const int32_t EvaluationCharsCount;
209  bool _hasErrors;
212 
213  void Init();
215  System::String Deligaturize(System::String phrase);
216 
217 };
218 
219 } // namespace Text
220 } // namespace Pdf
221 } // namespace Aspose
222 
223 
String class used across the library. Is a substitute for C# System.String when translating code...
Definition: string.h:121
class ASPOSECPP_SHARED_CLASS List
Definition: ienumerable.h:17
Represents an absorber object of a text. Performs text extraction and provides access to the result v...
Definition: TextAbsorber.h:72
Base class that enables using methods available for System.Object class in C#. All non-trivial classe...
Definition: object.h:64
Definition: Artifact.h:67
TextFormattingMode
Defines different modes which can be used while converting pdf document into text. See TextDevice class.
Definition: TextExtractionOptions.h:29
Pointer class to wrap types being allocated on heap. Use it to manage memory for classes inheriting O...
Definition: fwd.h:22
Definition: AFRelationship.h:4
System::SharedPtr< System::Text::StringBuilder > extractedText
Definition: TextAbsorber.h:198
The coordinates are in the page coordinate context.
Whole PDF file will be submitted.