Skip to content

Commit

Permalink
Merge pull request #12 from giuseongit/experimental/better-engine
Browse files Browse the repository at this point in the history
New parsing engine
  • Loading branch information
madeindjs authored Apr 18, 2019
2 parents 8e66100 + 17590d1 commit e70753b
Showing 1 changed file with 71 additions and 5 deletions.
76 changes: 71 additions & 5 deletions src/crystagiri/html.cr
Original file line number Diff line number Diff line change
Expand Up @@ -49,32 +49,98 @@ module Crystagiri
# Initialize an Html object from Html source
def initialize(@content : String)
@nodes = XML.parse_html @content

@ids = Hash(String, XML::Node).new
@tags = Hash(String, Array(XML::Node)).new
@classes = Hash(String, Array(XML::Node)).new

visit @nodes # Build internal pointer map
end

# Functions used to populate internal maps

private def add_id(id : String, node : XML::Node)
@ids[id] = node
end

private def add_node(node : XML::Node)
if @tags[node.name]? == nil
@tags[node.name] = [] of XML::Node
end
@tags[node.name] << node
end

private def add_class(klass : String, node : XML::Node)
if @classes[klass]? == nil
@classes[klass] = [] of XML::Node
end
@classes[klass] << node
end

# Depth-first visit. Given a node, extract metadata from
# node (if exists), then visit each child.
private def visit(node : XML::Node)
# We only extract metadata from HTML nodes
if node.element?
add_node node
if to = node["id"]?
add_id to, node
end
if classes = node["class"]?
classes.split(' ') { |to| add_class to, node }
end
end
# visit each child
node.children.each do | child |
visit child
end
end

# Find first tag by tag name and return
# `Crystagiri::Tag` founded or a nil if not founded
def at_tag(tag_name : String) : Crystagiri::Tag | Nil
where_tag(tag_name) { |tag| return tag }
if tags = @tags[tag_name]?
tags.each do |tag|
return Tag.new(tag).as Crystagiri::Tag
end
end
return nil
end

# Find all nodes by tag name and yield
# `Crystagiri::Tag` founded
def where_tag(tag_name : String, &block) : Array(Tag)
return css(tag_name) { |tag| yield tag }
arr = [] of Crystagiri::Tag
if tags = @tags[tag_name]?
tags.each do |node|
tag = Tag.new(node).as Crystagiri::Tag
yield tag
arr << tag
end
end
return arr
end

# Find all nodes by classname and yield
# `Crystagiri::Tag` founded
def where_class(class_name : String, &block) : Array(Tag)
return css(".#{class_name}") { |tag| yield tag }
arr = [] of Crystagiri::Tag
if klasses = @classes[class_name]?
klasses.each do |node|
klass = Tag.new(node).as Crystagiri::Tag
yield klass
arr << klass
end
end
return arr
end

# Find a node by its id and return a
# `Crystagiri::Tag` founded or a nil if not founded
def at_id(id_name : String) : Crystagiri::Tag | Nil
css("##{id_name}") { |tag| return tag }
return nil
if node = @ids[id_name]?
return Tag.new(node).as Crystagiri::Tag
end
end

# Find all node corresponding to the css query and yield
Expand Down

0 comments on commit e70753b

Please sign in to comment.