-
Notifications
You must be signed in to change notification settings - Fork 0
/
html_scraper_test.go
101 lines (87 loc) · 1.88 KB
/
html_scraper_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
package scraper
import (
"strings"
"testing"
)
func TestIsValid(t *testing.T) {
tc := TagWithContent{
Content: "",
Tag: "",
}
actual := tc.isValidTag("")
if actual {
t.Errorf("shold fail when no tag and no content")
}
tc = TagWithContent{
Content: "content",
Tag: "",
}
actual = tc.isValidTag("")
if actual {
t.Errorf("shold fail when no tag")
}
tc = TagWithContent{
Content: "",
Tag: "div",
}
actual = tc.isValidTag("")
if actual {
t.Errorf("shold fail when no content")
}
tc = TagWithContent{
Content: "content",
Tag: "div",
}
actual = tc.isValidTag("")
if !actual {
t.Errorf("shold be ok when all passed and has no specifictag")
}
tc = TagWithContent{
Content: "",
Tag: "div",
}
actual = tc.isValidTag("div")
if actual {
t.Errorf("shold fail when no content")
}
tc = TagWithContent{
Content: "content",
Tag: "div",
}
actual = tc.isValidTag("div")
if !actual {
t.Errorf("shold be ok when all passed and has specifictag")
}
}
func TestParseHTMLPage(t *testing.T) {
mock := `<html>
<body>
<div></div>
</body>
</html>`
expectedTag := "html"
result := parseHTMLPage(mock, expectedTag)
t.Log("res ", result)
tag := result[0]
hasEmptySpaces := strings.Contains(tag.Content, " ")
if hasEmptySpaces {
t.Errorf("parsed content should not contain empty spaces")
}
isCorrectTagExracted := strings.EqualFold(tag.Tag, expectedTag)
if !isCorrectTagExracted {
t.Errorf("incorrect tag extracted, asked for %v and got instead %v", expectedTag, tag.Tag)
}
expectedTag = "body"
occurencesOfExpectedTag := 0
for _, b := range result {
if b.Tag == expectedTag {
occurencesOfExpectedTag++
}
}
if occurencesOfExpectedTag == 0 {
t.Errorf("parsed content should contain child tag %v", expectedTag)
}
if len(result) != 4 {
t.Errorf("expected 4 tags, received %v instead", len(result))
}
}