1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- /*
- * HTMLParser - This implementation of parser assumes we are parsing HTML in browser
- * and user DOM methods available in browser for parsing HTML.
- *
- * @author Himanshu Gilani
- *
- */
- var HTMLParser = function(node, handler, opts) {
- opts = opts || {};
- var nodesToIgnore = opts['nodesToIgnore'] || [];
- var parseHiddenNodes = opts['parseHiddenNodes'] || 'false';
-
- var c = node.childNodes;
- for ( var i = 0; i < c.length; i++) {
- try {
- var ignore = false;
- for(var k=0; k< nodesToIgnore.length; k++) {
- if(c[i].nodeName.toLowerCase() == nodesToIgnore[k]) {
- ignore= true;
- break;
- }
- }
-
- //NOTE hidden node testing is expensive in FF.
- if (ignore || (!parseHiddenNodes && isHiddenNode(c[i])) ){
- continue;
- }
-
- if (c[i].nodeName.toLowerCase() != "#text" && c[i].nodeName.toLowerCase() != "#comment") {
- var attrs = [];
- if (c[i].hasAttributes()) {
- var attributes = c[i].attributes;
- for ( var a = 0; a < attributes.length; a++) {
- var attribute = attributes.item(a);
- attrs.push({
- name : attribute.nodeName,
- value : attribute.nodeValue,
- });
- }
- }
- if (handler.start) {
- if (c[i].hasChildNodes()) {
- handler.start(c[i].nodeName, attrs, false);
- if (c[i].nodeName.toLowerCase() == "pre" || c[i].nodeName.toLowerCase() == "code") {
- handler.chars(c[i].innerHTML);
- } else if (c[i].nodeName.toLowerCase() == "iframe" || c[i].nodeName.toLowerCase() == "frame") {
- if (c[i].contentDocument && c[i].contentDocument.documentElement) {
- return HTMLParser(c[i].contentDocument.documentElement, handler, opts);
- }
- } else if (c[i].hasChildNodes()) {
- HTMLParser(c[i], handler, opts);
- }
- if (handler.end) {
- handler.end(c[i].nodeName);
- }
- } else {
- handler.start(c[i].nodeName, attrs, true);
- }
- }
- } else if (c[i].nodeName.toLowerCase() == "#text") {
- if (handler.chars) {
- handler.chars(c[i].nodeValue);
- }
- } else if (c[i].nodeName.toLowerCase() == "#comment") {
- if (handler.comment) {
- handler.comment(c[i].nodeValue);
- }
- }
- } catch (e) {
- //properly log error
- console.log("error while parsing node: " + c[i].nodeName.toLowerCase());
- }
- }
- };
- function isHiddenNode(node) {
- if(node.nodeName.toLowerCase() == "title"){
- return false;
- }
-
- if (window.getComputedStyle) {
- try {
- var style = window.getComputedStyle(node, null);
- if (style.getPropertyValue && style.getPropertyValue('display') == 'none') {
- return true;
- }
- } catch (e) {
- // consume and ignore. node styles are not accessible
- }
- return false;
- }
- }
|