source/html/CleanHtml.js
import htmlparser from "htmlparser2";
import {makeHtmlStartTag, makeHtmlEndTag, isInlineTag} from "./utils";
import TypeConvert from "../utils/TypeConvert";
import ObjectManager from "../utils/ObjectManager";
import typeDetect from "../utils/typeDetect";
import {PasteMarkerNotSetError} from "./CleanHtmlErrors";
export class CleanerNode {
constructor(options, parentNode, rootNode, preservePasteMarker, tagName, attributes={}) {
this.pasteMarkerAttribute = 'data-ievv-paste-marker';
this.preservePasteMarker = preservePasteMarker;
this.pasteMarkerNode = null;
this.rootNode = rootNode ? rootNode : this;
this._inlineWrapperNode = null;
this.options = options;
this.parentNode = parentNode;
this.originalTagName = tagName;
this.originalAttributes = attributes;
this.tagName = this.cleanTagName();
this.attributes = this.cleanAttributes();
this.children = [];
this.pasteLevels = {
root: 0,
block: 1,
inline: 2
};
}
getPasteLevel() {
if (this.isRootNode()) {
return this.pasteLevels.root;
}
if (isInlineTag(this.tagName)) {
return this.pasteLevels.inline;
}
return this.pasteLevels.block;
}
getPasteMarkerLevel() {
return this.rootNode.pasteMarkerNode.parentNode.getPasteLevel();
}
getDeepestPasteLevelInTree() {
let pasteLevel = this.getPasteLevel();
for (let child of this.children) {
if (pasteLevel == this.pasteLevels.inline) {
return pasteLevel; // Returning because already at deepest possible level.. no need to iterate further..
}
if (typeDetect(child) == 'object') {
const childPasteLevel = child.getDeepestPasteLevelInTree();
if (childPasteLevel > pasteLevel) {
pasteLevel = childPasteLevel;
}
}
}
return pasteLevel;
}
getBlockNodeForNode(node) {
if (node.tagName != null && !isInlineTag(node.tagName)) {
return [true, node.tagName];
}
if (!node.isRootNode()) {
return this.getBlockNodeForNode(node.parentNode);
}
return [false, null];
}
checkIfNodeIsJustStringAndExtractString(node) {
if (typeDetect(node) == 'string') {
return [true, node];
}
if (node.tagName == null && node.children.length == 1) {
return this.checkIfNodeIsJustStringAndExtractString(node.children[0]);
}
const [pasteMarkerInBlockTag, pasteMarkerBlockTag] = this.getBlockNodeForNode(this.rootNode.pasteMarkerNode);
if (pasteMarkerInBlockTag && pasteMarkerBlockTag == node.tagName && node.children.length == 1) {
return this.checkIfNodeIsJustStringAndExtractString(node.children[0]);
}
return [false, null];
}
insertNodeAtPasteMarker(node) {
if (!this.rootNode.pasteMarkerNode) {
throw new PasteMarkerNotSetError("Cannot insert node at pasteMarker - aborting insertion");
}
let [isStringNode, stringValue] = this.checkIfNodeIsJustStringAndExtractString(node);
if (isStringNode) {
// console.log("This node is just a string: ", node);
this.rootNode.pasteMarkerNode.parentNode.addChildNodeAtIndex(
this.rootNode.pasteMarkerNode.getParentChildListIndex(), stringValue);
return;
}
// console.log("This node is not a string: ", node);
// console.log("Got rootNode: ", this.rootNode);
const pasteLevelOfNewNode = node.getDeepestPasteLevelInTree();
while (this.getPasteMarkerLevel() >= pasteLevelOfNewNode) {
this.splitAtPasteMarker();
}
node.parentNode = this.rootNode.pasteMarkerNode.parentNode;
this.rootNode.pasteMarkerNode.parentNode.addChildNodeAtIndex(
this.rootNode.pasteMarkerNode.getParentChildListIndex(), node);
const [newMarkerParent, newMarkerIndex] = node.getLastPositionInNodeTree();
this.movePasteMarkerTo(newMarkerParent, newMarkerIndex+1);
}
getLastPositionInNodeTree() {
let parentNode = null, currentNode = this.rootNode;
while (currentNode.children.length > 0 &&
typeDetect(currentNode.children[currentNode.children.length-1]) == 'object') {
currentNode = currentNode.children[currentNode.children.length-1];
}
return [currentNode, currentNode.children.length-1];
}
splitAtPasteMarker() {
if (!this.rootNode.pasteMarkerNode) {
throw new Error("Cannot split at pasteMarker! pasteMarker is not set!");
}
if (this.rootNode.pasteMarkerNode.parentNode.isRootNode()) {
throw new Error("Cannot split at pasteMarker! pasteMarker is placed at root!");
}
this.rootNode.pasteMarkerNode.splitParentAfterMe();
this.movePasteMarkerTo(
this.rootNode.pasteMarkerNode.parentNode.parentNode,
this.rootNode.pasteMarkerNode.parentNode.getParentChildListIndex());
}
movePasteMarkerTo(node, index) {
const previousParent = this.rootNode.pasteMarkerNode.parentNode;
const previousParentIndex = this.rootNode.pasteMarkerNode.getParentChildListIndex();
previousParent.children.splice(previousParentIndex, 1);
this.rootNode.pasteMarkerNode.parentNode = node;
node.addChildNodeAtIndex(index, this.rootNode.pasteMarkerNode);
}
getParentChildListIndex() {
if (!this.parentNode) {
throw new Error("Cannot get parentChildListIndex. Has no parent.");
}
return this.parentNode.children.indexOf(this);
}
splitAfterChildIndex(index) {
if (index >= this.children.length) {
throw new Error(`Cannot split children at index ${index}, children.length: ${this.children.length}`);
}
const newSiblingNode = new CleanerNode(
this.options, this.parentNode, this.rootNode,
this.preservePasteMarker, this.tagName, this.attributes);
newSiblingNode.children = this.children.slice(index);
for (let child of newSiblingNode.children) {
if (typeDetect(child) == 'object') {
child.parentNode = newSiblingNode;
}
}
this.children = this.children.slice(0, index);
this.parentNode.addChildNodeAtIndex(this.getParentChildListIndex()+1, newSiblingNode);
}
splitParentAfterMe() {
this.parentNode.splitAfterChildIndex(this.getParentChildListIndex());
}
addChildNodeAtIndex(index, node) {
this.children.splice(index, 0, node);
}
getClosestParentWithTagName(tagName) {
if(this.parentNode == null || this.parentNode.tagName == null) {
return null;
}
if(this.parentNode.tagName == tagName) {
return this.parentNode;
} else {
return this.parentNode.getClosestParentWithTagName(tagName);
}
}
transformTagName() {
if(this.originalTagName != null && this.options.transformTagsMap.has(this.originalTagName)) {
return this.options.transformTagsMap.get(this.originalTagName);
}
return this.originalTagName;
}
cleanTagName() {
const tagName = this.transformTagName();
if((tagName != null && this.options.allowedTagsSet.has(tagName)) || this.isSpecialNode()) {
return tagName;
}
return null;
}
cleanAttributes() {
const cleanedAttributes = {};
for(let attributeName of Object.keys(this.originalAttributes)) {
if(this.options.isAllowedAttributeForTagName(this.tagName, attributeName) || this.isSpecialNode()) {
cleanedAttributes[attributeName] = this.originalAttributes[attributeName];
}
}
return cleanedAttributes;
}
shouldWrapStandaloneInlineTags() {
return this.parentNode == null && this.options.wrapStandaloneInlineTagName != null;
}
getStandaloneInlineTagWrapper() {
if(this._inlineWrapperNode == null) {
const node = this.makeChildNode(
this.options.wrapStandaloneInlineTagName,
this.options.wrapStandaloneInlineTagAttributes);
this._inlineWrapperNode = node;
this.children.push(node);
}
return this._inlineWrapperNode;
}
stopWrappingStandaloneInlineTags() {
this._inlineWrapperNode = null;
}
addText(text) {
if(this.shouldWrapStandaloneInlineTags()) {
this.getStandaloneInlineTagWrapper().addText(text);
} else {
this.children.push(text);
}
}
makeChildNode(tagName, attributes) {
const cleanerNodeClass = this.options.getCleanerNodeClassForTagName(tagName);
return new cleanerNodeClass(
this.options, this, this.rootNode, this.preservePasteMarker,
tagName, attributes);
}
isInlineTag() {
return isInlineTag(this.tagName);
}
isRootNode() {
return this.parentNode == null;
}
isPasteMarker() {
if (this.originalAttributes.hasOwnProperty(this.pasteMarkerAttribute)) {
if (this.isRootNode()) {
throw new Error("the rootnode cannot be the paste marker-node!");
}
this.rootNode.setPasteMarkerNode(this);
return true;
}
return false;
}
setPasteMarkerNode(node) {
this.pasteMarkerNode = node;
}
/**
* Special nodes are nodes like the paste-marker. If the cleaner is configured for it, these nodes should not be
* cleaned or altered in any way.
*
* @returns {boolean} if true, the current node is a special node, as such, any attributes is legal and any tagname is legal.
*/
isSpecialNode() {
if (this.preservePasteMarker && this.isPasteMarker()) {
return true;
}
// Add if-tests for other special nodes here if any are added...
return false;
}
addChildNode(node) {
if(this.shouldWrapStandaloneInlineTags() && node.isInlineTag()) {
this.getStandaloneInlineTagWrapper().addChildNode(node);
} else {
this.stopWrappingStandaloneInlineTags();
this.children.push(node);
}
}
addChildNodeFromTag(tagName, attributes) {
const node = this.makeChildNode(tagName, attributes);
this.addChildNode(node);
return node;
}
shouldRenderTag() {
if (this.isSpecialNode()) {
return true;
}
if(this.tagName == null) {
return false;
}
const closestParentNodeWithSameTag = this.getClosestParentWithTagName(this.tagName);
if(closestParentNodeWithSameTag == null) {
return true;
}
return this.options.allowNestedWithinSameTagSet.has(this.tagName);
}
makeStartTag() {
if(this.shouldRenderTag()) {
return makeHtmlStartTag(this.tagName, this.attributes);
}
return '';
}
makeEndTag() {
if(this.shouldRenderTag()) {
return makeHtmlEndTag(this.tagName);
}
return '';
}
childrenToHtml() {
let html = '';
for(let child of this.children) {
if(typeof child == 'string') {
html += child;
} else {
html += child.toHtml();
}
}
return html;
}
toHtml() {
let html = `${this.makeStartTag()}${this.childrenToHtml()}${this.makeEndTag()}`;
if (ObjectManager.validate(this.options, 'normalizeEmptyTags')) {
const emptyTagHtml = `<${this.tagName}></${this.tagName}>`;
if(html == emptyTagHtml) {
if (ObjectManager.validate(this.options.normalizeEmptyTags, 'fill', this.tagName)) {
const textToFillEmptyTag = this.options.normalizeEmptyTags.fill[this.tagName];
return `<${this.tagName}>${textToFillEmptyTag}</${this.tagName}>`;
}
if (ObjectManager.validate(this.options.normalizeEmptyTags, 'remove')) {
if (this.options.normalizeEmptyTags.remove.includes(this.tagName)) {
return '';
}
}
}
}
return html;
}
toString() {
return this.toHtml();
}
}
// Should be the default for nodes that can not contain
// text as a direct child, such as UL, OL, TABLE, ...
// and all the self-closing tags.
export class NoTextCleanerNode extends CleanerNode {
addText(text) {}
}
export class FlatListCleanerNode extends NoTextCleanerNode {
shouldRenderTag() {
if(this.tagName == null) {
return false;
}
const closestUlParentNode = this.getClosestParentWithTagName(this.tagName);
return closestUlParentNode == null;
}
addChildNode(node) {
const closestListParentNode = this.getClosestParentWithTagName(this.tagName);
if(closestListParentNode == null) {
super.addChildNode(node);
} else {
closestListParentNode.addChildNode(node);
}
}
}
/*
Handle paste:
<p>Hello PASTEHERE</p>
<ul>
<li>Item PASTEHERE</li>
</ul>
Handle force single parent element (ul).
Handle (should be removed)
2 options:
- Clean everything after paste, and handle invalid nesting in the cleaner.
- Know where we are cleaning.
*/
export class CleanHtmlParser {
constructor(html, options, preservePasteMarker) {
this.options = options;
this.preservePasteMarker = preservePasteMarker;
this._parse(html);
if(this._isWrappingStandaloneInline) {
this.endWrappingStandaloneInline();
}
}
_parse(html) {
this._rootNode = new this.options.rootCleanerNodeClass(
this.options,
null, // parentNode
null, // rootNode
this.preservePasteMarker,
this.options.rootCleanerNodeTagName,
this.options.rootCleanerNodeAttributes);
this._currentNode = this._rootNode;
const parser = new htmlparser.Parser({
onopentag: (...args) => {
this.onOpenTag(...args);
},
ontext: (...args) => {
this.onText(...args);
},
onclosetag: (...args) => {
this.onCloseTag(...args);
}
}, {decodeEntities: true});
parser.write(html);
parser.end();
}
onOpenTag(tagName, attributes) {
const node = this._currentNode.addChildNodeFromTag(tagName, attributes);
// console.log(`${tagName}: ${node.toString()}`);
this._currentNode = node;
}
onText(text) {
this._currentNode.addText(text);
}
onCloseTag(tagName) {
this._currentNode = this._currentNode.parentNode;
}
get rootNode() {
return this._rootNode;
}
}
export class CleanHtmlOptions {
constructor() {
this._allowedTagsSet = new Set();
this._allowedAttributesMap = new Map();
this._allowNestedWithinSameTagSet = new Set();
this._transformTagsMap = new Map();
this.defaultCleanerNodeClass = CleanerNode;
this.rootCleanerNodeClass = CleanerNode;
this.rootCleanerNodeTagName = null;
this.rootCleanerNodeAttributes = {};
this._tagNameToCleanerNodeClassMap = new Map();
this.wrapStandaloneInlineTagName = null;
this.wrapStandaloneInlineTagAttributes = {};
this.normalizeEmptyTags = null;
}
get allowedTagsSet() {
return this._allowedTagsSet;
}
set allowedTagsSet(allowedTagsSet) {
this._allowedTagsSet = TypeConvert.toSet(allowedTagsSet);
}
get allowedAttributesMap() {
return this._allowedAttributesMap;
}
set allowedAttributesMap(allowedAttributesMap) {
this._allowedAttributesMap = TypeConvert.toMapOfSets(allowedAttributesMap);
}
isAllowedAttributeForTagName(tagName, attributeName) {
if(this._allowedAttributesMap.has(tagName)) {
return this._allowedAttributesMap.get(tagName).has(attributeName);
}
return false;
}
set transformTagsMap(transformTagsMap) {
this._transformTagsMap = TypeConvert.toMap(transformTagsMap);
}
get transformTagsMap() {
return this._transformTagsMap;
}
get allowNestedWithinSameTagSet() {
return this._allowNestedWithinSameTagSet;
}
set allowNestedWithinSameTagSet(allowNestedWithinSameTagSet) {
this._allowNestedWithinSameTagSet = TypeConvert.toSet(allowNestedWithinSameTagSet);
}
set tagNameToCleanerNodeClassMap(tagNameToCleanerNodeClassMap) {
this._tagNameToCleanerNodeClassMap = TypeConvert.toMap(tagNameToCleanerNodeClassMap);
}
get tagNameToCleanerNodeClassMap() {
return this._tagNameToCleanerNodeClassMap;
}
getCleanerNodeClassForTagName(tagName) {
if(this._tagNameToCleanerNodeClassMap.has(tagName)) {
return this._tagNameToCleanerNodeClassMap.get(tagName);
} else {
return this.defaultCleanerNodeClass;
}
}
setCleanerNodeClassForTagName(tagName, cleanerNodeClass) {
this._tagNameToCleanerNodeClassMap.set(tagName, cleanerNodeClass);
}
// updateFromObject(optionsObject) {
// if(typeof optionsObject.allowedTagsSet != 'undefined') {
// this.allowedTagsSet = optionsObject.allowedTagsSet;
// }
// if(typeof optionsObject.allowedAttributesMap != 'undefined') {
// this.allowedAttributesMap = optionsObject.allowedAttributesMap;
// }
// if(typeof optionsObject.transformTagsMap != 'undefined') {
// this.transformTagsMap = optionsObject.transformTagsMap;
// }
// }
}
/**
* HTML cleaner with extra post cleaning that makes it
* suitable for cleaning input typed and pasted into
* contenteditable editors.
*/
export default class CleanHtml {
constructor() {
this.options = new CleanHtmlOptions();
}
/**
* Called at the beginning of {@link CleanHtml#clean}
* before performing the default cleaning.
*
* Subclasses can override this to perform additional
* cleaning pre-cleaning.
*
* @param {string} html The HTML to pre-clean.
* @returns {string} The pre-cleaned HTML. Defaults to returning
* the provided ``html`` unchanged.
*/
preClean(html) {
return html;
}
/**
* Called at the end of {@link CleanHtml#clean}
* after performing the default cleaning.
*
* Subclasses can override this to perform additional
* cleaning post-cleaning.
*
* @param {string} html The HTML to post-clean.
* @returns {string} The cleaned HTML. Defaults to returning
* the provided ``html`` unchanged.
*/
postClean(html) {
return html;
}
_getCleanedTree(html, preservePasteMarker) {
return new CleanHtmlParser(html, this.options, preservePasteMarker);
}
_clean(html, preservePasteMarker) {
return this._getCleanedTree(html, preservePasteMarker).rootNode.toHtml();
}
/**
* Clean the provided html.
*
* @param {string} html The HTML to clean.
* @param preservePasteMarker {boolean} if true, leave the tag with `data-ievv-paste-marker` attribute.
* @returns {string} The cleaned HTML.
*/
clean(html, preservePasteMarker=false) {
let cleanedHtml = this.preClean(html);
cleanedHtml = this._clean(cleanedHtml, preservePasteMarker);
cleanedHtml = this.postClean(cleanedHtml);
return cleanedHtml;
}
/**
* This function takes two html-blobs, `originalHtml` is the original text, `pastedHtml` is text to be inserted in
* `originalHtml`.
* The original html-blob should contain a 'marker-element' determining where to paste the given `pastedHtml`. This
* marker element should have the data-attribute `data-ievv-paste-marker`. If multiple marker-elements are present,
* an error will be logged, and `pastedHtml` will be inserted at the first one.
*
* Note: result from these examples will be cleaned once more using default cleaner, so if the cleaner is configured
* to wrap standalone text the standalone text in e.g. example 1 would be wrapped in some block-level tag before returning.
*
* @example <caption>1 - pasting unformatted text without marker:</caption>
* originalHtml: <p>Hello world! I am some text</p>
* pastedHtml: awesome
* result: <p>Hello world! I am some text</p>awesome
*
* @example <caption>2 - pasting formatted text without marker:</caption>
* originalHtml: <p>Hello world! I am some text</p>
* pastedHtml: <strong>awesome</strong>
* result: <p>Hello world! I am some text</p><strong>awesome</strong>
*
* @example <caption>3 - pasting unformatted text with marker:</caption>
* originalHtml: <p>Hello world! I am some <span data-ievv-paste-marker></span>text</p>
* pastedHtml: awesome
* result: <p>Hello world! I am some awesome<span data-ievv-paste-marker></span>text</p>
*
* @example <caption>4 - pasting formatted text with marker:</caption>
* originalHtml: <p>Hello world! I am some <span data-ievv-paste-marker></span>text</p>
* pastedHtml: <strong>awesome</strong>
* result: <p>Hello world! I am some <strong>awesome<span data-ievv-paste-marker></span></strong>text</p>
*
* @example <caption>5 - pasting block tag with marker:</caption>
* originalHtml: <p>Hello world! I am some <span data-ievv-paste-marker></span>text</p>
* pastedHtml: <p>awesome</p>
* result: <p>Hello world! I am some </p>
* <p>awesome<span data-ievv-paste-marker></span></p>
* <p>text</p>
*
* @example <caption>6 - pasting formatted text in formatting with marker:</caption>
* originalHtml: <p>Hello world! I am <strong>some <span data-ievv-paste-marker></span>text</strong></p>
* pastedHtml: <strong>awesome</strong>
* result: <p>Hello world! I am <strong>some </strong><strong>awesome<span data-ievv-paste-marker></span></strong><strong>text</strong></p>
*
* @param originalHtml
* @param pastedHtml
*/
paste(originalHtml, pastedHtml) {
const cleanedPastedTree = this._getCleanedTree(pastedHtml);
const cleanedOriginalTree = this._getCleanedTree(originalHtml, true);
// console.log(`Running paste.\n\nCleaned original tree: ${cleanedOriginalTree.rootNode.toHtml()}\n\ncleanedPastedTree: ${cleanedPastedTree.rootNode.toHtml()}`);
// console.log(`cleanedOriginalTree.rootNode: `, cleanedOriginalTree.rootNode);
// console.log(`cleanedPastedTree.rootNode: `, cleanedPastedTree.rootNode);
try {
cleanedOriginalTree.rootNode.insertNodeAtPasteMarker(cleanedPastedTree.rootNode);
} catch(e) {
if (e instanceof PasteMarkerNotSetError) {
const cleanedOriginalHtml = cleanedOriginalTree.rootNode.toHtml();
const cleanedPastedHtml = cleanedPastedTree.rootNode.toHtml();
return this.clean(`${cleanedOriginalHtml}${cleanedPastedHtml}`, true);
} else {
throw e;
}
}
return this.clean(cleanedOriginalTree.rootNode.toHtml(), true);
}
}