ParagraphAbsorber.h
1 #pragma once
2 // Copyright (c) 2001-2024 Aspose Pty Ltd. All Rights Reserved.
3 
4 #include <system/details/array_view.h>
5 #include <system/array.h>
6 #include <cstdint>
7 
8 #include "Aspose.PDF.Cpp/aspose_pdf_api_defs.h"
9 
10 namespace Aspose
11 {
12 namespace Pdf
13 {
14 class Document;
15 class Page;
16 class Rectangle;
17 namespace Text
18 {
19 class MarkupSection;
20 class PageMarkup;
21 class ParagraphAbsorberOptions;
22 class TextReplaceOptions;
23 } // namespace Text
24 } // namespace Pdf
25 } // namespace Aspose
26 namespace System
27 {
28 namespace Collections
29 {
30 namespace Generic
31 {
32 template <typename> class List;
33 } // namespace Generic
34 } // namespace Collections
35 } // namespace System
36 
37 namespace Aspose {
38 
39 namespace Pdf {
40 
41 namespace Text {
42 
54 class ASPOSE_PDF_SHARED_CLASS ParagraphAbsorber : public System::Object
55 {
57  typedef System::Object BaseType;
58 
59  typedef ::System::BaseTypesInfo<BaseType> ThisTypeBaseTypesInfo;
60  ASPOSE_PDF_SHARED_RTTI_INFO_DECL();
61 
64 
65 public:
66 
70  ASPOSE_PDF_SHARED_API System::SharedPtr<System::Collections::Generic::List<System::SharedPtr<PageMarkup>>> get_PageMarkups() const;
81  ASPOSE_PDF_SHARED_API int32_t get_SectionsSearchDepth() const;
92  ASPOSE_PDF_SHARED_API void set_SectionsSearchDepth(int32_t value);
96  ASPOSE_PDF_SHARED_API bool get_IsMulticolumnParagraphsAllowed() const;
100  ASPOSE_PDF_SHARED_API void set_IsMulticolumnParagraphsAllowed(bool value);
104  ASPOSE_PDF_SHARED_API System::SharedPtr<Aspose::Pdf::Text::ParagraphAbsorberOptions> get_ParagraphAbsorberOptions() const;
108  ASPOSE_PDF_SHARED_API void set_ParagraphAbsorberOptions(System::SharedPtr<Aspose::Pdf::Text::ParagraphAbsorberOptions> value);
112  ASPOSE_PDF_SHARED_API System::SharedPtr<Aspose::Pdf::Text::TextReplaceOptions> get_TextReplaceOptions() const;
116  ASPOSE_PDF_SHARED_API void set_TextReplaceOptions(System::SharedPtr<Aspose::Pdf::Text::TextReplaceOptions> value);
117 
121  ASPOSE_PDF_SHARED_API ParagraphAbsorber();
129  ASPOSE_PDF_SHARED_API ParagraphAbsorber(int32_t sectionsSearchDepth);
135  ASPOSE_PDF_SHARED_API ParagraphAbsorber(System::SharedPtr<Aspose::Pdf::Text::ParagraphAbsorberOptions> paragraphAbsorberOptions);
142  ASPOSE_PDF_SHARED_API ParagraphAbsorber(int32_t sectionsSearchDepth, System::SharedPtr<Aspose::Pdf::Text::ParagraphAbsorberOptions> paragraphAbsorberOptions);
143 
148  ASPOSE_PDF_SHARED_API void Visit(System::SharedPtr<Document> doc);
153  ASPOSE_PDF_SHARED_API void Visit(System::SharedPtr<Page> page);
154 
155 protected:
156 
157  static System::ArrayPtr<int32_t> GetGridIndicesForRect(System::SharedPtr<Rectangle> rect, double scaleFactor);
158  static System::SharedPtr<Rectangle> GetRectFromGridByIndices(System::Details::ArrayView<int32_t> indices, double scaleFactor);
159  static int32_t GetGridIndexForX(double x, double scaleFactor);
160  static bool IsCapitalLetter(char16_t c);
161 
162 private:
163 
164  int32_t _sectionsSearchDepth;
165  double _minBreakingHorizontalFillingLevel;
166  double _minBreakingVerticalFillingLevel;
167  int32_t _minBreakingHorizontalFillingOnGrid;
168  int32_t _minBreakingVerticalFillingOnGrid;
169  double _maxUnbreakingVLength;
170  double _maxUnbreakingHLength;
171  int32_t _maxUnbreakingVLengthOnGrid;
172  int32_t _maxUnbreakingHLengthOnGrid;
174  System::String _errors;
175  bool _isMulticolumnParagraphs;
178 
179  void ProcessMarkupSections(System::SharedPtr<PageMarkup> markup);
185 
186 };
187 
188 } // namespace Text
189 } // namespace Pdf
190 } // namespace Aspose
191 
192 
String class used across the library. Is a substitute for C# System.String when translating code...
Definition: string.h:121
class ASPOSECPP_SHARED_CLASS List
Definition: ienumerable.h:17
Base class that enables using methods available for System.Object class in C#. All non-trivial classe...
Definition: object.h:64
Definition: Artifact.h:67
void ToIsolatedSectionParagraphs(System::SharedPtr< PageMarkup > previousePageMarkup)
Rebuilds paragraphs using &#39;isolated section&#39; rule
Represents a markup section - the rectangular region of a page that contains text and can be visually...
Definition: MarkupSection.h:46
Pointer class to wrap types being allocated on heap. Use it to manage memory for classes inheriting O...
Definition: fwd.h:22
Represents a text type.
ASPOSE_PDF_SHARED_API bool get_IsMulticolumnParagraphsAllowed() const
Gets value that indicates whether starting text lines of a next section may be treated as continuatio...
Represents an absorber object of page structure objects such as sections and paragraphs. Performs search for sections and paragraphs of text and provides access for rectangles and polydons that describes it in text coordinate space. Also performs text segments search and provides access to search results via TextFragments collections grouped by structure elements.
Definition: ParagraphAbsorber.h:54
Definition: AFRelationship.h:4
ASPOSE_PDF_SHARED_API void set_IsMulticolumnParagraphsAllowed(bool value)
Sets value that indicates whether starting text lines of a next section may be treated as continuatio...
void ToCrossSectionParagraphs(System::SharedPtr< PageMarkup > previousePageMarkup)
Rebuilds paragraphs using &#39;cross-section&#39; rule
List forward declaration.
Definition: Artifact.h:74
The coordinates are in the page coordinate context.
Page markup represented by collections of MarkupSection and MarkupParagraph.
Definition: PageMarkup.h:53
Whole PDF file will be submitted.