-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathTextUtils.h
76 lines (53 loc) · 1.75 KB
/
TextUtils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#ifndef __figureextractor__TextUtils__
#define __figureextractor__TextUtils__
#include <vector>
#include <unordered_map>
#include <string>
#include <TextOutputDev.h>
#include <Page.h>
#include "PDFUtils.h"
// Class to track document level statistics
class DocumentStatistics {
public:
DocumentStatistics(std::vector<TextPage *> &textPages, PDFDoc *doc,
bool quiet);
double getModeFont();
bool wordIsLarge(TextWord *word);
int lineIsAligned(double x, double x2);
bool wordIsStandardFont(TextWord *word);
int lineIsAlignedToTol(double x, double x2, double l_tol, double r_tol);
bool isPageHeader(TextLine *line);
bool isPageNumber(TextLine *line);
bool documentIsTwoColumn();
bool lineIsBold(TextLine *line);
bool isBoldCentered(double x, double x2);
bool isBodyTextGraphical();
private:
int totalWords;
int totalLines;
double lMarginFirst;
double lMarginSecond;
double rMarginFirst;
double rMarginSecond;
double modeFont;
std::string modeFontName;
bool twoColumn;
bool rightAligned;
bool hasPageNumbers;
bool imageFilled;
std::unordered_map<int, int> boldCentersUp;
std::unordered_map<int, int> boldCentersDown;
std::unordered_map<std::string, int> pageHeaders;
std::unordered_map<double, int> rMarginCounts;
std::unordered_map<double, int> lMarginCounts;
};
// Debugging
void printTextProperties(TextPage *page, DocumentStatistics *docStats,
bool onlyLineStarts);
std::vector<TextLine *> getLines(TextPage *textPage);
void getTextLineBB(TextLine *line, double *minX, double *minY, double *maxX,
double *maxY);
bool wordIsItalic(TextWord *const word);
bool wordIsBold(TextWord *const word);
bool wordEndsWithPeriod(TextWord *const word);
#endif