forked from NaturalIntelligence/fast-xml-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7439272
commit 6f10c26
Showing
9 changed files
with
327 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
FXP supports parsing of HTML document. Here is an example; | ||
|
||
Input HTML Document | ||
```html | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<script> | ||
window.dataLayer = window.dataLayer || []; | ||
function gtag(){dataLayer.push(arguments);} | ||
gtag('js', new Date()); | ||
gtag('config', 'UA-80202630-2'); | ||
</script> | ||
|
||
<title>Fast XMl Parser</title> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1"> | ||
<link rel="stylesheet" href="static/css/bootstrap.min.css"> | ||
<link rel="stylesheet" href="static/css/jquery-confirm.min.css"> | ||
<link rel="stylesheet" type="text/css" href="style.css"> | ||
|
||
<script src="static/js/jquery-3.2.1.min.js"></script> | ||
<style> | ||
.CodeMirror{ | ||
height: 100%; | ||
width: 100%; | ||
} | ||
</style> | ||
</head> | ||
<body role="document" style="background-color: #2c3e50;"> | ||
<h1>Heading</h1> | ||
<hr> | ||
<h2>&inr;</h2> | ||
<pre> | ||
<h1>Heading</h1> | ||
<hr> | ||
<h2>&inr;</h2> | ||
</pre> | ||
<script> | ||
let highlightedLine = null; | ||
let editor; | ||
<!-- this should not be parsed separately --> | ||
function updateLength(){ | ||
const xmlData = editor.getValue(); | ||
$("#lengthxml")[0].innerText = xmlData.replace(/>\s*</g, "><").length; | ||
} | ||
</script> | ||
</body> | ||
</html> | ||
``` | ||
|
||
Code and necessary configuration to parse it to JS object. | ||
|
||
```js | ||
const parsingOptions = { | ||
ignoreAttributes: false, | ||
// preserveOrder: true, | ||
unpairedTags: ["hr", "br", "link", "meta"], | ||
stopNodes : [ "*.pre", "*.script"], | ||
processEntities: true, | ||
htmlEntities: true | ||
}; | ||
const parser = new XMLParser(parsingOptions); | ||
parser.parse(html); | ||
``` | ||
|
||
JS Object | ||
```json | ||
{ | ||
"html": { | ||
"head": { | ||
"script": [ | ||
"\n window.dataLayer = window.dataLayer || [];\n function gtag(){dataLayer.push(arguments);}\n gtag('js', new Date());\n \n gtag('config', 'UA-80202630-2');\n ", | ||
{ | ||
"@_src": "static/js/jquery-3.2.1.min.js" | ||
} | ||
], | ||
"title": "Fast XMl Parser", | ||
"meta": [ | ||
{ | ||
"@_charset": "UTF-8" | ||
}, | ||
{ | ||
"@_name": "viewport", | ||
"@_content": "width=device-width, initial-scale=1" | ||
} | ||
], | ||
"link": [ | ||
{ | ||
"@_rel": "stylesheet", | ||
"@_href": "static/css/bootstrap.min.css" | ||
}, | ||
{ | ||
"@_rel": "stylesheet", | ||
"@_href": "static/css/jquery-confirm.min.css" | ||
}, | ||
{ | ||
"@_rel": "stylesheet", | ||
"@_type": "text/css", | ||
"@_href": "style.css" | ||
} | ||
], | ||
"style": ".CodeMirror{\n height: 100%;\n width: 100%;\n }" | ||
}, | ||
"body": { | ||
"h1": "Heading", | ||
"hr": "", | ||
"h2": "₹", | ||
"pre": "\n <h1>Heading</h1>\n <hr>\n <h2>&inr;</h2>\n ", | ||
"script": "\n let highlightedLine = null;\n let editor;\n <!-- this should not be parsed separately -->\n function updateLength(){\n const xmlData = editor.getValue();\n $(\"#lengthxml\")[0].innerText = xmlData.replace(/>s*</g, \"><\").length;\n }\n ", | ||
"@_role": "document", | ||
"@_style": "background-color: #2c3e50;" | ||
}, | ||
"@_lang": "en" | ||
} | ||
} | ||
``` | ||
|
||
To build the HTML document back from JS object, you need to uncomment `preserveOrder: true` in above code. And pass the output to the XML builder; | ||
```js | ||
const parsingOptions = { | ||
ignoreAttributes: false, | ||
preserveOrder: true, | ||
unpairedTags: ["hr", "br", "link", "meta"], | ||
stopNodes : [ "*.pre", "*.script"], | ||
processEntities: true, | ||
htmlEntities: true | ||
}; | ||
const parser = new XMLParser(parsingOptions); | ||
let result = parser.parse(html); | ||
|
||
const builderOptions = { | ||
ignoreAttributes: false, | ||
format: true, | ||
preserveOrder: true, | ||
suppressEmptyNode: true, | ||
unpairedTags: ["hr", "br", "link", "meta"], | ||
stopNodes : [ "*.pre", "*.script"], | ||
} | ||
const builder = new XMLBuilder(builderOptions); | ||
const output = builder.build(result); | ||
``` | ||
|
||
Output | ||
```html | ||
<html lang="en"> | ||
<head> | ||
<script> | ||
window.dataLayer = window.dataLayer || []; | ||
function gtag(){dataLayer.push(arguments);} | ||
gtag('js', new Date()); | ||
gtag('config', 'UA-80202630-2'); | ||
</script> | ||
<title> | ||
Fast XMl Parser | ||
</title> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1"> | ||
<link rel="stylesheet" href="static/css/bootstrap.min.css"> | ||
<link rel="stylesheet" href="static/css/jquery-confirm.min.css"> | ||
<link rel="stylesheet" type="text/css" href="style.css"> | ||
<script src="static/js/jquery-3.2.1.min.js"> | ||
</script> | ||
<style> | ||
.CodeMirror{ | ||
height: 100%; | ||
width: 100%; | ||
} | ||
</style> | ||
</head> | ||
<body role="document" style="background-color: #2c3e50;"> | ||
<h1> | ||
Heading | ||
</h1> | ||
<hr> | ||
<h2> | ||
₹ | ||
</h2> | ||
<pre> | ||
|
||
<h1>Heading</h1> | ||
<hr> | ||
<h2>&inr;</h2> | ||
|
||
</pre> | ||
<script> | ||
let highlightedLine = null; | ||
let editor; | ||
<!-- this should not be parsed separately --> | ||
function updateLength(){ | ||
const xmlData = editor.getValue(); | ||
$("#lengthxml")[0].innerText = xmlData.replace(/>s*</g, "><").length; | ||
} | ||
</script> | ||
</body> | ||
</html> | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
|
||
const {XMLParser, XMLBuilder} = require("../src/fxp"); | ||
|
||
describe("XMLParser", function() { | ||
|
||
it("should parse HTML with basic entities, <pre>, <script>, <br>", function() { | ||
const html = ` | ||
<html lang="en"> | ||
<head> | ||
<script> | ||
window.dataLayer = window.dataLayer || []; | ||
function gtag(){dataLayer.push(arguments);} | ||
gtag('js', new Date()); | ||
gtag('config', 'UA-80202630-2'); | ||
</script> | ||
<title>Fast XMl Parser</title> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1"> | ||
<link rel="stylesheet" href="static/css/bootstrap.min.css"> | ||
<link rel="stylesheet" href="static/css/jquery-confirm.min.css"> | ||
<link rel="stylesheet" type="text/css" href="style.css"> | ||
<script src="static/js/jquery-3.2.1.min.js"></script> | ||
<style> | ||
.CodeMirror{ | ||
height: 100%; | ||
width: 100%; | ||
} | ||
</style> | ||
</head> | ||
<body role="document" style="background-color: #2c3e50;"> | ||
<h1>Heading</h1> | ||
<hr> | ||
<h2>&inr;</h2> | ||
<pre> | ||
<h1>Heading</h1> | ||
<hr> | ||
<h2>&inr;</h2> | ||
</pre> | ||
<script> | ||
let highlightedLine = null; | ||
let editor; | ||
<!-- this should not be parsed separately --> | ||
function updateLength(){ | ||
const xmlData = editor.getValue(); | ||
$("#lengthxml")[0].innerText = xmlData.replace(/>\s*</g, "><").length; | ||
} | ||
</script> | ||
</body> | ||
</html>`; | ||
|
||
const parsingOptions = { | ||
ignoreAttributes: false, | ||
preserveOrder: true, | ||
unpairedTags: ["hr", "br", "link", "meta"], | ||
stopNodes : [ "*.pre", "*.script"], | ||
processEntities: true, | ||
htmlEntities: true | ||
}; | ||
const parser = new XMLParser(parsingOptions); | ||
let result = parser.parse(html); | ||
// console.log(JSON.stringify(result, null,4)); | ||
|
||
const builderOptions = { | ||
ignoreAttributes: false, | ||
format: true, | ||
preserveOrder: true, | ||
suppressEmptyNode: true, | ||
unpairedTags: ["hr", "br", "link", "meta"], | ||
stopNodes : [ "*.pre", "*.script"], | ||
} | ||
const builder = new XMLBuilder(builderOptions); | ||
let output = builder.build(result); | ||
// console.log(output); | ||
output = output.replace('₹','&inr;'); | ||
expect(output.replace(/\s+/g, "")).toEqual(html.replace(/\s+/g, "")); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.