c# - GetSafeHtmlFragment removing all html tags -
i using getsafehtmlfragment in website , found of tags except <p>
, <a>
removed.
i researched around , found there no resolution microsoft.
is there superseded or there solution?
thanks.
an alternative solution use html agility pack in conjunction own tags white list :
using system; using system.io; using system.text; using system.linq; using system.collections.generic; using htmlagilitypack; class program { static void main(string[] args) { var whitelist = new[] { "#comment", "html", "head", "title", "body", "img", "p", "a" }; var html = file.readalltext("input.html"); var doc = new htmldocument(); doc.loadhtml(html); var nodestoremove = new list<htmlagilitypack.htmlnode>(); var e = doc .createnavigator() .selectdescendants(system.xml.xpath.xpathnodetype.all, false) .getenumerator(); while (e.movenext()) { var node = ((htmlagilitypack.htmlnodenavigator)e.current) .currentnode; if (!whitelist.contains(node.name)) { nodestoremove.add(node); } } nodestoremove.foreach(node => node.remove()); var sb = new stringbuilder(); using (var w = new stringwriter(sb)) { doc.save(w); } console.writeline(sb.tostring()); } }
Comments
Post a Comment