TextAbsorber.h
1
#pragma once
2
// Copyright (c) 2001-2024 Aspose Pty Ltd. All Rights Reserved.
3
4
#include <system/object.h>
5
#include <cstdint>
6
7
#include "Aspose.PDF.Cpp/Text/TextOptions/TextExtractionOptions.h"
8
#include "Aspose.PDF.Cpp/aspose_pdf_api_defs.h"
9
10
namespace
Aspose
11
{
12
namespace
Pdf
13
{
14
class
Document
;
15
namespace
Engine
16
{
17
namespace
CommonData
18
{
19
namespace
Text
20
{
21
namespace
Segmenting
22
{
23
class
TextSegmenter;
24
}
// namespace Segmenting
25
}
// namespace Text
26
}
// namespace CommonData
27
}
// namespace Engine
28
namespace
GroupProcessor
29
{
30
class
TextExtractor;
31
}
// namespace GroupProcessor
32
class
Page
;
33
namespace
Text
34
{
35
class
TextExtractionError;
36
class
TextSearchOptions;
37
}
// namespace Text
38
class
XForm;
39
}
// namespace Pdf
40
}
// namespace Aspose
41
namespace
System
42
{
43
namespace
Collections
44
{
45
namespace
Generic
46
{
47
template
<
typename
>
class
List
;
48
}
// namespace Generic
49
}
// namespace Collections
50
class
String;
51
namespace
Text
52
{
53
class
StringBuilder;
54
}
// namespace Text
55
}
// namespace System
56
57
namespace
Aspose
{
58
59
namespace
Pdf
{
60
61
namespace
Text
{
62
72
class
ASPOSE_PDF_SHARED_CLASS
TextAbsorber
:
public
System::Object
73
{
74
typedef
TextAbsorber
ThisType
;
75
typedef
System::Object
BaseType
;
76
77
typedef ::System::BaseTypesInfo<BaseType> ThisTypeBaseTypesInfo;
78
ASPOSE_PDF_SHARED_RTTI_INFO_DECL();
79
80
friend
class
Aspose::Pdf::GroupProcessor::TextExtractor;
81
82
public
:
83
89
virtual
ASPOSE_PDF_SHARED_API
System::String
get_Text();
94
ASPOSE_PDF_SHARED_API
bool
get_HasErrors()
const
;
99
ASPOSE_PDF_SHARED_API
System::SharedPtr<System::Collections::Generic::List<System::SharedPtr<TextExtractionError>
>> get_Errors()
const
;
109
virtual
ASPOSE_PDF_SHARED_API
System::SharedPtr<TextExtractionOptions>
get_ExtractionOptions();
119
virtual
ASPOSE_PDF_SHARED_API
void
set_ExtractionOptions(
System::SharedPtr<TextExtractionOptions>
value);
127
virtual
ASPOSE_PDF_SHARED_API
System::SharedPtr<Aspose::Pdf::Text::TextSearchOptions>
get_TextSearchOptions();
135
virtual
ASPOSE_PDF_SHARED_API
void
set_TextSearchOptions(
System::SharedPtr<Aspose::Pdf::Text::TextSearchOptions>
value);
136
143
virtual
ASPOSE_PDF_SHARED_API
void
Visit(
System::SharedPtr<Page>
page);
150
virtual
ASPOSE_PDF_SHARED_API
void
Visit(
System::SharedPtr<XForm>
form);
157
virtual
ASPOSE_PDF_SHARED_API
void
Visit(
System::SharedPtr<Document>
pdf);
158
167
ASPOSE_PDF_SHARED_API
TextAbsorber
();
177
ASPOSE_PDF_SHARED_API
TextAbsorber
(
System::SharedPtr<TextExtractionOptions>
extractionOptions);
186
ASPOSE_PDF_SHARED_API
TextAbsorber
(
System::SharedPtr<TextExtractionOptions>
extractionOptions,
System::SharedPtr<Aspose::Pdf::Text::TextSearchOptions>
textSearchOptions);
194
ASPOSE_PDF_SHARED_API
TextAbsorber
(
System::SharedPtr<Aspose::Pdf::Text::TextSearchOptions>
textSearchOptions);
195
196
protected
:
197
198
System::SharedPtr<System::Text::StringBuilder>
extractedText
;
199
200
System::SharedPtr<System::Collections::Generic::List<int32_t>
> get_PageTextLengthes()
const
;
201
202
System::String
GetTotalText(
System::SharedPtr<Aspose::Pdf::Engine::CommonData::Text::Segmenting::TextSegmenter>
segmenter,
TextExtractionOptions::TextFormattingMode
mode);
203
204
private
:
205
206
static
const
int32_t EvaluationCharsCount;
207
System::SharedPtr<TextExtractionOptions>
extractionOptions;
208
System::SharedPtr<Aspose::Pdf::Text::TextSearchOptions>
textSearchOptions;
209
bool
_hasErrors;
210
System::SharedPtr<System::Collections::Generic::List<System::SharedPtr<TextExtractionError>
>> _errors;
211
System::SharedPtr<System::Collections::Generic::List<int32_t>
> pageTextLengthes;
212
213
void
Init();
214
System::String
GetExtractedText(
System::SharedPtr<Aspose::Pdf::Engine::CommonData::Text::Segmenting::TextSegmenter>
segmenter,
TextExtractionOptions::TextFormattingMode
mode);
215
System::String
Deligaturize(
System::String
phrase);
216
217
};
218
219
}
// namespace Text
220
}
// namespace Pdf
221
}
// namespace Aspose
222
223
System::String
String class used across the library. Is a substitute for C# System.String when translating code...
Definition:
string.h:121
System::Collections::Generic::List
class ASPOSECPP_SHARED_CLASS List
Definition:
ienumerable.h:17
Aspose::Pdf::Text::TextAbsorber
Represents an absorber object of a text. Performs text extraction and provides access to the result v...
Definition:
TextAbsorber.h:72
System::Object
Base class that enables using methods available for System.Object class in C#. All non-trivial classe...
Definition:
object.h:64
System
Definition:
Artifact.h:67
System::Drawing::GraphicsUnit::Document
1/300 of an inch.
Aspose::Pdf::Text::TextExtractionOptions::TextFormattingMode
TextFormattingMode
Defines different modes which can be used while converting pdf document into text. See TextDevice class.
Definition:
TextExtractionOptions.h:29
System::SmartPtr
Pointer class to wrap types being allocated on heap. Use it to manage memory for classes inheriting O...
Definition:
fwd.h:22
Aspose
Definition:
AFRelationship.h:4
Aspose::Pdf::Text::TextAbsorber::extractedText
System::SharedPtr< System::Text::StringBuilder > extractedText
Definition:
TextAbsorber.h:198
System::Drawing::Drawing2D::CoordinateSpace::Page
The coordinates are in the page coordinate context.
Aspose::Pdf::Facades::FieldType::Text
Text field.
Aspose::Pdf::Facades::SubmitFormFlag::Pdf
Whole PDF file will be submitted.