From a67e823838943b31fb7cea68bd592093e197cf16 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 15 Jun 2019 18:55:47 +0200 Subject: [PATCH] LibHTML: Start working on a simple HTML library. I'd like to have rich text, and we might as well use HTML for that. :^) --- Kernel/build-root-filesystem.sh | 1 + Kernel/makeall.sh | 1 + LibHTML/.gitignore | 4 ++++ LibHTML/Document.cpp | 11 +++++++++ LibHTML/Document.h | 13 ++++++++++ LibHTML/Dump.cpp | 26 ++++++++++++++++++++ LibHTML/Dump.h | 5 ++++ LibHTML/Element.cpp | 12 ++++++++++ LibHTML/Element.h | 30 +++++++++++++++++++++++ LibHTML/Makefile | 42 +++++++++++++++++++++++++++++++++ LibHTML/Node.cpp | 24 +++++++++++++++++++ LibHTML/Node.h | 41 ++++++++++++++++++++++++++++++++ LibHTML/ParentNode.cpp | 10 ++++++++ LibHTML/ParentNode.h | 32 +++++++++++++++++++++++++ LibHTML/Parser.cpp | 32 +++++++++++++++++++++++++ LibHTML/Parser.h | 7 ++++++ LibHTML/Text.cpp | 13 ++++++++++ LibHTML/Text.h | 15 ++++++++++++ LibHTML/test.cpp | 10 ++++++++ 19 files changed, 329 insertions(+) create mode 100644 LibHTML/.gitignore create mode 100644 LibHTML/Document.cpp create mode 100644 LibHTML/Document.h create mode 100644 LibHTML/Dump.cpp create mode 100644 LibHTML/Dump.h create mode 100644 LibHTML/Element.cpp create mode 100644 LibHTML/Element.h create mode 100644 LibHTML/Makefile create mode 100644 LibHTML/Node.cpp create mode 100644 LibHTML/Node.h create mode 100644 LibHTML/ParentNode.cpp create mode 100644 LibHTML/ParentNode.h create mode 100644 LibHTML/Parser.cpp create mode 100644 LibHTML/Parser.h create mode 100644 LibHTML/Text.cpp create mode 100644 LibHTML/Text.h create mode 100644 LibHTML/test.cpp diff --git a/Kernel/build-root-filesystem.sh b/Kernel/build-root-filesystem.sh index 8ffa9fe64274ae..8850ed8b65031d 100755 --- a/Kernel/build-root-filesystem.sh +++ b/Kernel/build-root-filesystem.sh @@ -83,6 +83,7 @@ cp ../Servers/LookupServer/LookupServer mnt/bin/LookupServer cp ../Servers/SystemServer/SystemServer mnt/bin/SystemServer cp ../Servers/WindowServer/WindowServer mnt/bin/WindowServer cp ../Shell/Shell mnt/bin/Shell +cp ../LibHTML/tho mnt/bin/tho echo "done" echo -n "installing shortcuts... " diff --git a/Kernel/makeall.sh b/Kernel/makeall.sh index 9b04f3339f2c5b..12d5edfb7bbf1e 100755 --- a/Kernel/makeall.sh +++ b/Kernel/makeall.sh @@ -17,6 +17,7 @@ build_targets="$build_targets ../Servers/SystemServer" build_targets="$build_targets ../Servers/LookupServer" build_targets="$build_targets ../Servers/WindowServer" build_targets="$build_targets ../LibGUI" +build_targets="$build_targets ../LibHTML" build_targets="$build_targets ../Userland" build_targets="$build_targets ../Applications/Terminal" build_targets="$build_targets ../Applications/FontEditor" diff --git a/LibHTML/.gitignore b/LibHTML/.gitignore new file mode 100644 index 00000000000000..e6e86bf61b5360 --- /dev/null +++ b/LibHTML/.gitignore @@ -0,0 +1,4 @@ +*.o +*.d +libhtml.a +tho diff --git a/LibHTML/Document.cpp b/LibHTML/Document.cpp new file mode 100644 index 00000000000000..a7ba17c2b34d8c --- /dev/null +++ b/LibHTML/Document.cpp @@ -0,0 +1,11 @@ +#include + +Document::Document() + : ParentNode(NodeType::DOCUMENT_NODE) +{ +} + +Document::~Document() +{ +} + diff --git a/LibHTML/Document.h b/LibHTML/Document.h new file mode 100644 index 00000000000000..cf31d766b9642e --- /dev/null +++ b/LibHTML/Document.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + +class Document : public ParentNode { +public: + Document(); + virtual ~Document() override; + +private: +}; + diff --git a/LibHTML/Dump.cpp b/LibHTML/Dump.cpp new file mode 100644 index 00000000000000..8826eb444439d2 --- /dev/null +++ b/LibHTML/Dump.cpp @@ -0,0 +1,26 @@ +#include +#include +#include +#include +#include + +void dump_tree(Node& node) +{ + static int indent = 0; + for (int i = 0; i < indent; ++i) + printf(" "); + if (node.is_document()) { + printf("*Document*\n"); + } else if (node.is_element()) { + printf("<%s>\n", static_cast(node).tag_name().characters()); + } else if (node.is_text()) { + printf("\"%s\"\n", static_cast(node).data().characters()); + } + ++indent; + if (node.is_parent_node()) { + static_cast(node).for_each_child([](Node& child) { + dump_tree(child); + }); + } + --indent; +} diff --git a/LibHTML/Dump.h b/LibHTML/Dump.h new file mode 100644 index 00000000000000..09b7de697e4864 --- /dev/null +++ b/LibHTML/Dump.h @@ -0,0 +1,5 @@ +#pragma once + +class Node; + +void dump_tree(Node&); diff --git a/LibHTML/Element.cpp b/LibHTML/Element.cpp new file mode 100644 index 00000000000000..5c284b6f741d57 --- /dev/null +++ b/LibHTML/Element.cpp @@ -0,0 +1,12 @@ +#include + +Element::Element(const String& tag_name) + : ParentNode(NodeType::ELEMENT_NODE) + , m_tag_name(tag_name) +{ +} + +Element::~Element() +{ +} + diff --git a/LibHTML/Element.h b/LibHTML/Element.h new file mode 100644 index 00000000000000..5b9f390603f3c6 --- /dev/null +++ b/LibHTML/Element.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +class Attribute { +public: + Attribute(const String& name, const String& value) + : m_name(name) + , m_value(value) + { + } + +private: + String m_name; + String m_value; +}; + +class Element : public ParentNode { +public: + explicit Element(const String& tag_name); + virtual ~Element() override; + + const String& tag_name() const { return m_tag_name; } + +private: + String m_tag_name; + Vector m_attributes; +}; + diff --git a/LibHTML/Makefile b/LibHTML/Makefile new file mode 100644 index 00000000000000..ca947be04fafa7 --- /dev/null +++ b/LibHTML/Makefile @@ -0,0 +1,42 @@ +include ../Makefile.common + +LIBHTML_OBJS = \ + Node.o \ + ParentNode.o \ + Element.o \ + Document.o \ + Text.o \ + Parser.o \ + Dump.o + +TEST_OBJS = test.o +TEST_PROGRAM = tho + +OBJS = $(LIBHTML_OBJS) $(TEST_OBJS) + +LIBRARY = libhtml.a +DEFINES += -DUSERLAND + +all: $(LIBRARY) $(TEST_PROGRAM) + +$(TEST_PROGRAM): $(TEST_OBJS) $(LIBRARY) + $(LD) -o $@ $(LDFLAGS) -L. $(TEST_OBJS) -lhtml -lgui -lcore -lc + +$(LIBRARY): $(LIBHTML_OBJS) + @echo "LIB $@"; $(AR) rcs $@ $(LIBHTML_OBJS) + +.cpp.o: + @echo "CXX $<"; $(CXX) $(CXXFLAGS) -o $@ -c $< + +-include $(OBJS:%.o=%.d) + +clean: + @echo "CLEAN"; rm -f $(TEST_PROGRAM) $(LIBRARY) $(OBJS) *.d + +install: $(LIBRARY) + mkdir -p ../Root/usr/include/LibHTML + # Copy headers + rsync -r -a --include '*/' --include '*.h' --exclude '*' . ../Root/usr/include/LibHTML + # Install the library + cp $(LIBRARY) ../Root/usr/lib + diff --git a/LibHTML/Node.cpp b/LibHTML/Node.cpp new file mode 100644 index 00000000000000..961699d55402a5 --- /dev/null +++ b/LibHTML/Node.cpp @@ -0,0 +1,24 @@ +#include +#include + +Node::Node(NodeType type) + : m_type(type) +{ +} + +Node::~Node() +{ +} + +void Node::retain() +{ + ASSERT(m_retain_count); + ++m_retain_count; +} + +void Node::release() +{ + ASSERT(m_retain_count); + if (!--m_retain_count) + delete this; +} diff --git a/LibHTML/Node.h b/LibHTML/Node.h new file mode 100644 index 00000000000000..3650782929f493 --- /dev/null +++ b/LibHTML/Node.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +enum class NodeType : unsigned { + INVALID = 0, + ELEMENT_NODE = 1, + TEXT_NODE = 3, + DOCUMENT_NODE = 9, +}; + +class Node { +public: + virtual ~Node(); + + void retain(); + void release(); + int retain_count() const { return m_retain_count; } + + NodeType type() const { return m_type; } + bool is_element() const { return type() == NodeType::ELEMENT_NODE; } + bool is_text() const { return type() == NodeType::TEXT_NODE; } + bool is_document() const { return type() == NodeType::DOCUMENT_NODE; } + bool is_parent_node() const { return is_element() || is_document(); } + + Node* next_sibling() { return m_next_sibling; } + Node* previous_sibling() { return m_previous_sibling; } + void set_next_sibling(Node* node) { m_next_sibling = node; } + void set_previous_sibling(Node* node) { m_previous_sibling = node; } + +protected: + explicit Node(NodeType); + + int m_retain_count { 1 }; + NodeType m_type { NodeType::INVALID }; + Vector m_children; + Node* m_next_sibling { nullptr }; + Node* m_previous_sibling { nullptr }; +}; + diff --git a/LibHTML/ParentNode.cpp b/LibHTML/ParentNode.cpp new file mode 100644 index 00000000000000..d2410323759749 --- /dev/null +++ b/LibHTML/ParentNode.cpp @@ -0,0 +1,10 @@ +#include + +void ParentNode::append_child(Retained node) +{ + if (m_last_child) + m_last_child->set_next_sibling(node.ptr()); + m_last_child = &node.leak_ref(); + if (!m_first_child) + m_first_child = m_last_child; +} diff --git a/LibHTML/ParentNode.h b/LibHTML/ParentNode.h new file mode 100644 index 00000000000000..357315e3d789ba --- /dev/null +++ b/LibHTML/ParentNode.h @@ -0,0 +1,32 @@ +#pragma once + +#include + +class ParentNode : public Node { +public: + void append_child(Retained); + + Node* first_child() { return m_first_child; } + Node* last_child() { return m_last_child; } + + template void for_each_child(F); + +protected: + explicit ParentNode(NodeType type) + : Node(type) + { + } + +private: + Node* m_first_child { nullptr }; + Node* m_last_child { nullptr }; +}; + +template +inline void ParentNode::for_each_child(F func) +{ + for (auto* node = first_child(); node; node = node->next_sibling()) { + func(*node); + } +} + diff --git a/LibHTML/Parser.cpp b/LibHTML/Parser.cpp new file mode 100644 index 00000000000000..3bc6f2a99a9db3 --- /dev/null +++ b/LibHTML/Parser.cpp @@ -0,0 +1,32 @@ +#include +#include +#include + +static Retained create_element(const String& tag_name) +{ + return adopt(*new Element(tag_name)); +} + +Retained parse(const String& html) +{ + auto doc = adopt(*new Document); + + auto head = create_element("head"); + auto title = create_element("title"); + auto title_text = adopt(*new Text("Page Title")); + title->append_child(title_text); + head->append_child(title); + + doc->append_child(head); + + auto body = create_element("body"); + auto h1 = create_element("h1"); + auto h1_text = adopt(*new Text("Hello World!")); + + h1->append_child(h1_text); + body->append_child(h1); + doc->append_child(body); + + return doc; +} + diff --git a/LibHTML/Parser.h b/LibHTML/Parser.h new file mode 100644 index 00000000000000..db593dae2d5f99 --- /dev/null +++ b/LibHTML/Parser.h @@ -0,0 +1,7 @@ +#pragma once + +#include +#include + +Retained parse(const String& html); + diff --git a/LibHTML/Text.cpp b/LibHTML/Text.cpp new file mode 100644 index 00000000000000..67ec381f73fdd9 --- /dev/null +++ b/LibHTML/Text.cpp @@ -0,0 +1,13 @@ +#include + +Text::Text(const String& data) + : Node(NodeType::TEXT_NODE) + , m_data(data) +{ +} + +Text::~Text() +{ +} + + diff --git a/LibHTML/Text.h b/LibHTML/Text.h new file mode 100644 index 00000000000000..bdc9f86ec2d47e --- /dev/null +++ b/LibHTML/Text.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +class Text final : public Node { +public: + explicit Text(const String&); + virtual ~Text() override; + + const String& data() const { return m_data; } + +private: + String m_data; +}; diff --git a/LibHTML/test.cpp b/LibHTML/test.cpp new file mode 100644 index 00000000000000..9d774335c29084 --- /dev/null +++ b/LibHTML/test.cpp @@ -0,0 +1,10 @@ +#include +#include +#include + +int main() +{ + String html = "my page

Hi there

Hello World!

"; + auto doc = parse(html); + dump_tree(doc); +}