PageMarkup.h
1 #pragma once
2 // Copyright (c) 2001-2024 Aspose Pty Ltd. All Rights Reserved.
3 
4 #include <system/array.h>
5 #include <cstdint>
6 
7 #include "Aspose.PDF.Cpp/aspose_pdf_api_defs.h"
8 
9 namespace Aspose
10 {
11 namespace Pdf
12 {
13 class Matrix;
14 class Page;
15 namespace PdfToMarkdown
16 {
17 namespace Extractors
18 {
19 class ParagraphsExtractor;
20 } // namespace Extractors
21 } // namespace PdfToMarkdown
22 class Rectangle;
23 namespace Text
24 {
25 class MarkupParagraph;
26 class MarkupSection;
27 class ParagraphAbsorber;
28 class TextFragment;
29 } // namespace Text
30 } // namespace Pdf
31 } // namespace Aspose
32 namespace System
33 {
34 namespace Collections
35 {
36 namespace Generic
37 {
38 template <typename> class List;
39 } // namespace Generic
40 } // namespace Collections
41 } // namespace System
42 
43 namespace Aspose {
44 
45 namespace Pdf {
46 
47 namespace Text {
48 
52 class ASPOSE_PDF_SHARED_CLASS PageMarkup final : public System::Object
53 {
54  typedef PageMarkup ThisType;
55  typedef System::Object BaseType;
56 
57  typedef ::System::BaseTypesInfo<BaseType> ThisTypeBaseTypesInfo;
58  ASPOSE_PDF_SHARED_RTTI_INFO_DECL();
59 
61  friend class Aspose::Pdf::PdfToMarkdown::Extractors::ParagraphsExtractor;
62 
63 public:
64 
68  ASPOSE_PDF_SHARED_API int32_t get_Number() const;
72  ASPOSE_PDF_SHARED_API System::SharedPtr<Aspose::Pdf::Rectangle> get_Rectangle() const;
87  ASPOSE_PDF_SHARED_API System::SharedPtr<System::Collections::Generic::List<System::SharedPtr<TextFragment>>> get_TextFragments() const;
91  ASPOSE_PDF_SHARED_API bool get_IsMulticolumnParagraphsAllowed() const;
95  ASPOSE_PDF_SHARED_API void set_IsMulticolumnParagraphsAllowed(bool value);
96 
97 protected:
98 
102  ASPOSE_PDF_SHARED_API void set_Sections(System::SharedPtr<System::Collections::Generic::List<System::SharedPtr<MarkupSection>>> value);
107  System::ArrayPtr<std::vector<uint8_t>> get_Grid() const;
108 
112  PageMarkup();
113 
114  MEMBER_FUNCTION_MAKE_OBJECT_DECLARATION(PageMarkup, CODEPORTING_ARGS());
115 
121 
122  MEMBER_FUNCTION_MAKE_OBJECT_DECLARATION(PageMarkup, CODEPORTING_ARGS(System::SharedPtr<Aspose::Pdf::Rectangle> searchRectangle));
127  void Attach(System::SharedPtr<Page> page);
131  void DistributeFragmentsForSections();
136  double GetAverageFontSize();
140  void ToCrossSectionParagraphs(System::SharedPtr<PageMarkup> previousePageMarkup);
144  void ToIsolatedSectionParagraphs(System::SharedPtr<PageMarkup> previousePageMarkup);
145 
146  virtual ASPOSE_PDF_SHARED_API ~PageMarkup();
147 
148 private:
149 
150  int32_t _number;
153  int32_t _scaleFactor;
154  bool _isMulticolumnParagraphs;
161 
162  void FillGrid(System::SharedPtr<Matrix> reversedMatrix);
169 
170 };
171 
172 } // namespace Text
173 } // namespace Pdf
174 } // namespace Aspose
175 
176 
class ASPOSECPP_SHARED_CLASS List
Definition: ienumerable.h:17
Base class that enables using methods available for System.Object class in C#. All non-trivial classe...
Definition: object.h:64
Definition: Artifact.h:67
Pointer class to wrap types being allocated on heap. Use it to manage memory for classes inheriting O...
Definition: fwd.h:22
Represents a text type.
Represents an absorber object of page structure objects such as sections and paragraphs. Performs search for sections and paragraphs of text and provides access for rectangles and polydons that describes it in text coordinate space. Also performs text segments search and provides access to search results via TextFragments collections grouped by structure elements.
Definition: ParagraphAbsorber.h:53
Definition: AFRelationship.h:4
List forward declaration.
Definition: Artifact.h:74
The coordinates are in the page coordinate context.
Page markup represented by collections of MarkupSection and MarkupParagraph.
Definition: PageMarkup.h:52
Whole PDF file will be submitted.