Skip to content

Commit

Permalink
Parse HTML List
Browse files Browse the repository at this point in the history
  • Loading branch information
sixdouglas committed Nov 11, 2019
1 parent 7327d2f commit 93f9417
Show file tree
Hide file tree
Showing 6 changed files with 195 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -183,4 +183,4 @@ buildNumber.properties
*.pdf
#Allow pdf as source resources for testing
!openpdf/src/test/resources/*.pdf

!pdf-toolbox/src/test/resources/com/lowagie/examples/html/parseHelloWorld.html
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ Significant [Contributors to OpenPDF](https://github.com/LibrePDF/OpenPDF/graphs
[@andreasrosdal](https://github.com/andreasrosdal) - Andreas Røsdal - Maintainer of OpenPDF from 1.0 to 1.3.11, now retired from OpenPDF development.
[@daviddurand](https://github.com/daviddurand) - David G. Durand
[@tlxtellef](https://github.com/tlxtellef) - Tellef
[@asturio](https://github.com/asturio) - Claudio Clemens
[@asturio](https://github.com/asturio) - Claudio Clemens
[@ymasory](https://github.com/ymasory)
[@albfernandez](https://github.com/albfernandez) - Alberto Fernández
[@noavarice](https://github.com/noavarice)
Expand All @@ -101,9 +101,9 @@ Significant [Contributors to OpenPDF](https://github.com/LibrePDF/OpenPDF/graphs
[@lapo-luchini](https://github.com/lapo-luchini)
[@jeffrey-easyesi](https://github.com/jeffrey-easyesi)
[@V-F](https://github.com/V-F)
[@sixdouglas](https://github.com/sixdouglas) - Douglas Six
[@sixdouglas](https://github.com/sixdouglas) - Douglas Six
[@razilein](https://github.com/razilein) - Sita Geßner
[@PalAditya](https://github.com/PalAditya) - Aditya Pal

[@rammetzger](https://github.com/rammetzger)

Also, a very special thanks to the iText developers ;)
Original file line number Diff line number Diff line change
Expand Up @@ -552,10 +552,16 @@ public void endElement(String tag) {
stack.push(obj);
return;
}
if (stack.empty())
if (stack.empty()) {
document.add((Element) obj);
else
((TextElementArray) stack.peek()).add((Element) obj);
} else {
Object peek = stack.peek();
if (peek instanceof com.lowagie.text.List) {
((com.lowagie.text.List) peek).add((com.lowagie.text.List) obj);
} else {
((TextElementArray) peek).add((Element) obj);
}
}
return;
}
if (tag.equals(HtmlTags.LISTITEM)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* $Id: HelloHtml.java 3373 2008-05-12 16:21:24Z xlv $
*
* This code is part of the 'OpenPDF Tutorial'.
* You can find the complete tutorial at the following address:
* https://github.com/LibrePDF/OpenPDF/wiki/Tutorial
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*
*/

package com.lowagie.examples.html;

import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.html.HtmlParser;
import com.lowagie.text.pdf.PdfWriter;

import java.io.FileOutputStream;
import java.io.IOException;

/**
* Generates a simple 'Hello World' HTML page.
*
* @author blowagie
*/

public class ParseHelloHtml {

/**
* Generates an HTML page with the text 'Hello World'
*
* @param args no arguments needed here
*/
public static void main(String[] args) {
System.out.println("Parse Hello World");

// step 1: creation of a document-object
try (Document document = new Document()) {
PdfWriter.getInstance(document, new FileOutputStream("parseHelloWorld.pdf"));
// step 2: we open the document
document.open();
// step 3: parsing the HTML document to convert it in PDF
HtmlParser.parse(document, ParseHelloHtml.class.getClassLoader().getResourceAsStream("com/lowagie/examples/html/parseHelloWorld.html"));
} catch (DocumentException | IOException de) {
System.err.println(de.getMessage());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* $Id: HelloHtml.java 3373 2008-05-12 16:21:24Z xlv $
*
* This code is part of the 'OpenPDF Tutorial'.
* You can find the complete tutorial at the following address:
* https://github.com/LibrePDF/OpenPDF/wiki/Tutorial
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*
*/

package com.lowagie.examples.html;

import com.lowagie.text.DocumentException;
import com.lowagie.text.Element;
import com.lowagie.text.html.simpleparser.HTMLWorker;
import com.lowagie.text.html.simpleparser.StyleSheet;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Generates a simple 'Hello World' HTML page.
*
* @author blowagie
*/

public class ParseNestedHtmlList {

/**
* Generates an HTML page with the text 'Hello World'
*
* @param args no arguments needed here
*/
public static void main(String[] args) {
System.out.println("Parse Nested HTML List");
try {
final String htmlText =
"<html>"
+ "<body>"
+ "<p>What should you say?</p>"
+ "<ul>"
+ " <li>Hello</li>"
+ " <li>World</li>"
+ "</ul>"
+ "<ol>"
+ " <li>Element-1"
+ " <ol>"
+ " <li>Element-1-1</li>"
+ " <li>Element-1-2</li>"
+ " </ol>"
+ " </li>"
+ " <li>Element-2"
+ " <ol>"
+ " <li>Element-2-1"
+ " <ol>"
+ " <li>Element-2-1-1"
+ " <ol>"
+ " <li>Element-2-1-1-1</li>"
+ " <li>Element-2-1-1-2</li>"
+ " </ol>"
+ " </li>"
+ " <li>Element-2-1-2"
+ " <ol>"
+ " <li>Element-2-1-2-1</li>"
+ " <li>Element-2-1-2-2</li>"
+ " </ol>"
+ " </li>"
+ " </ol>"
+ " </li>"
+ " <li>Element-2-2</li>"
+ " </ol>"
+ " </li>"
+ "</ol>"
+ "</body>"
+ "</html>";

final StringReader reader = new StringReader(htmlText);
final StyleSheet styleSheet = new StyleSheet();
final Map<String, Object> interfaceProps = new HashMap<>();

final List<Element> elements = HTMLWorker.parseToList(reader, styleSheet, interfaceProps);
printElement("", elements);

} catch (DocumentException | IOException de) {
System.err.println(de.getMessage());
}
}

private static void printElement(String depth, List<Element> elements) {
for (Element element : elements) {
System.out.println(depth + "- element.getClass() = " + element.getClass());
if (element instanceof com.lowagie.text.List) {
com.lowagie.text.List elementList = (com.lowagie.text.List) element;
printElement(depth + " ", elementList.getItems());
} else {
System.out.println(depth + " element = " + element.getChunks().get(0).toString());
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<html>
<body>
<p>What should you say?</p>
<ul>
<li>Hello</li>
<li>World</li>
</ul>
<ol>
<li>Element-1
<ol>
<li>Element-1-1</li>
<li>Element-1-2</li>
</ol>
</li>
<li>Element-2
<ol>
<li>Element-2-1</li>
<li>Element-2-2</li>
</ol>
</li>
</ol>
</body>
</html>

0 comments on commit 93f9417

Please sign in to comment.