MarkupSection.h
1 #pragma once
2 // Copyright (c) 2001-2024 Aspose Pty Ltd. All Rights Reserved.
3 
4 #include <system/object.h>
5 #include <cstdint>
6 
7 #include "Aspose.PDF.Cpp/aspose_pdf_api_defs.h"
8 
9 namespace Aspose
10 {
11 namespace Pdf
12 {
13 class Matrix;
14 class Rectangle;
15 namespace Text
16 {
17 class MarkupParagraph;
18 class PageMarkup;
19 class ParagraphAbsorber;
20 class TextFragment;
21 } // namespace Text
22 } // namespace Pdf
23 } // namespace Aspose
24 namespace System
25 {
26 namespace Collections
27 {
28 namespace Generic
29 {
30 template <typename> class List;
31 } // namespace Generic
32 } // namespace Collections
33 class String;
34 } // namespace System
35 
36 namespace Aspose {
37 
38 namespace Pdf {
39 
40 namespace Text {
41 
45 class ASPOSE_PDF_SHARED_CLASS MarkupSection final : public System::Object
46 {
47  typedef MarkupSection ThisType;
48  typedef System::Object BaseType;
49 
50  typedef ::System::BaseTypesInfo<BaseType> ThisTypeBaseTypesInfo;
51  ASPOSE_PDF_SHARED_RTTI_INFO_DECL();
52 
55 
56 public:
57 
61  ASPOSE_PDF_SHARED_API System::SharedPtr<Aspose::Pdf::Rectangle> get_Rectangle() const;
73 
74 protected:
75 
83  int32_t get_ScaleFactor() const;
84  System::SharedPtr<Matrix> get_TransformationMatrix() const;
85 
87 
88  MEMBER_FUNCTION_MAKE_OBJECT_DECLARATION(MarkupSection, CODEPORTING_ARGS(System::SharedPtr<Aspose::Pdf::Rectangle> rect, System::SharedPtr<Matrix> matrix));
89 
91 
92  MEMBER_FUNCTION_MAKE_OBJECT_DECLARATION(MarkupSection, CODEPORTING_ARGS(System::SharedPtr<Aspose::Pdf::Rectangle> rect, System::SharedPtr<Matrix> matrix, int32_t scaleFactor));
102  void ProcessParagraphs();
107  bool IsSectionLinesAlignedToLeft();
112  bool IsSectionLinesAlignedToRight();
116  void ProcessParagraphsAlignedLeft();
120  void ProcessParagraphsAlignedRight();
121  void TransformPoints(System::SharedPtr<Matrix> matrix);
122  static bool IsSentenceEnding(char16_t c);
123  static char16_t GetFirstNotSpace(System::SharedPtr<System::Collections::Generic::List<System::SharedPtr<TextFragment>>> line, bool fromEnd);
124 
125 private:
126 
130  int32_t _scaleFactor;
131  System::SharedPtr<Matrix> _transformationMatrix;
133  System::SharedPtr<Matrix> _normalizationMatrix;
134 
143  bool IsTextLineStartedFromCapitalLetter(System::SharedPtr<System::Collections::Generic::List<System::SharedPtr<TextFragment>>> line);
145  static bool LinesIdentical(System::SharedPtr<TextFragment> line, System::SharedPtr<TextFragment> prevLine);
146  static bool CharsAreSimilar(char16_t c1, char16_t c2);
148  bool IsHyphen(char16_t c);
149  bool IsSuspiciousStarting(char16_t c);
156  static int32_t GetIndexOfFirstLetter(System::SharedPtr<TextFragment> textFragment);
157  static bool IsValidLineStartSymbol(char16_t c);
158  int32_t GetIndexOfYearNumber(System::SharedPtr<TextFragment> textFragment);
159 
160 };
161 
162 } // namespace Text
163 } // namespace Pdf
164 } // namespace Aspose
165 
166 
String class used across the library. Is a substitute for C# System.String when translating code...
Definition: string.h:121
class ASPOSECPP_SHARED_CLASS List
Definition: ienumerable.h:17
Base class that enables using methods available for System.Object class in C#. All non-trivial classe...
Definition: object.h:64
Definition: Artifact.h:67
Represents a markup section - the rectangular region of a page that contains text and can be visually...
Definition: MarkupSection.h:45
Pointer class to wrap types being allocated on heap. Use it to manage memory for classes inheriting O...
Definition: fwd.h:22
Represents a text type.
Represents an absorber object of page structure objects such as sections and paragraphs. Performs search for sections and paragraphs of text and provides access for rectangles and polydons that describes it in text coordinate space. Also performs text segments search and provides access to search results via TextFragments collections grouped by structure elements.
Definition: ParagraphAbsorber.h:53
Definition: AFRelationship.h:4
List forward declaration.
Definition: Artifact.h:74
Page markup represented by collections of MarkupSection and MarkupParagraph.
Definition: PageMarkup.h:52
Whole PDF file will be submitted.