Language: C#
string StripTags(string Input, string[] AllowedTags) and string StripTagsAndAttributes(string Input, string[] AllowedTags)
/* Usage string test1 = StripTags("<p>George</p><b>W</b><i>Bush</i>", new string[]{"i","b"}); string test2 = StripTags("<p>George <img src='someimage.png' onmouseover='someFunction()'>W <i>Bush</i></p>", new string[]{"p"}); string test3 = StripTags("<a href='http://www.dijksterhuis.org'>Martijn <b>Dijksterhuis</b></a>", new string[]{"a"}); */ using System; using System.Text.RegularExpressions; namespace StripHTML { class MainClass { private static string ReplaceFirst(string haystack, string needle, string replacement) { int pos = haystack.IndexOf(needle); if (pos < 0) return haystack; return haystack.Substring(0,pos) + replacement + haystack.Substring(pos+needle.Length); } private static string ReplaceAll(string haystack, string needle, string replacement) { int pos; // Avoid a possible infinite loop if (needle == replacement) return haystack; while((pos = haystack.IndexOf(needle))>0) haystack = haystack.Substring(0,pos) + replacement + haystack.Substring(pos+needle.Length); return haystack; } public static string StripTags(string Input, string[] AllowedTags) { Regex StripHTMLExp = new Regex(@"(<\/?[^>]+>)"); string Output = Input; foreach(Match Tag in StripHTMLExp.Matches(Input)) { string HTMLTag = Tag.Value.ToLower(); bool IsAllowed = false; foreach(string AllowedTag in AllowedTags) { int offset = -1; // Determine if it is an allowed tag // "<tag>" , "<tag " and "</tag" if (offset!=0) offset = HTMLTag.IndexOf('<'+AllowedTag+'>'); if (offset!=0) offset = HTMLTag.IndexOf('<'+AllowedTag+' '); if (offset!=0) offset = HTMLTag.IndexOf("</"+AllowedTag); // If it matched any of the above the tag is allowed if (offset==0) { IsAllowed = true; break; } } // Remove tags that are not allowed if (!IsAllowed) Output = ReplaceFirst(Output,Tag.Value,""); } return Output; } public static string StripTagsAndAttributes(string Input, string[] AllowedTags) { /* Remove all unwanted tags first */ string Output = StripTags(Input,AllowedTags); /* Lambda functions */ MatchEvaluator HrefMatch = m => m.Groups[1].Value + "href..;,;.." + m.Groups[2].Value; MatchEvaluator ClassMatch = m => m.Groups[1].Value + "class..;,;.." + m.Groups[2].Value; MatchEvaluator UnsafeMatch = m => m.Groups[1].Value + m.Groups[4].Value; /* Allow the "href" attribute */ Output = new Regex("(<a.*)href=(.*>)").Replace(Output,HrefMatch); /* Allow the "class" attribute */ Output = new Regex("(<a.*)class=(.*>)").Replace(Output,ClassMatch); /* Remove unsafe attributes in any of the remaining tags */ Output = new Regex(@"(<.*) .*=(\'|\""|\w)[\w|.|(|)]*(\'|\""|\w)(.*>)").Replace(Output,UnsafeMatch); /* Return the allowed tags to their proper form */ Output = ReplaceAll(Output,"..;,;..", "="); return Output; } public static void Main(string[] args) { string test1 = StripTags("<p>George</p><b>W</b><i>Bush</i>", new string[]{"i","b"}); string test2 = StripTags("<p>George <img src='someimage.png' onmouseover='someFunction()'>W <i>Bush</i></p>", new string[]{"p"}); string test3 = StripTags("<a href='http://www.dijksterhuis.org'>Martijn <b>Dijksterhuis</b></a>", new string[]{"a"}); Console.WriteLine(test1); Console.WriteLine(test2); Console.WriteLine(test3); string test4 = "<a class=\"classof69\" onClick='crosssite.boom()' href='http://www.dijksterhuis.org'>Martijn Dijksterhuis</a>"; Console.WriteLine(StripTagsAndAttributes(test4, new string[]{"a"})); } } }
Tags:
Description:
The PHP strip_tags function not only does it neatly remove HTML from an input it also allows you to specify which tags should stay. This is great if you are allowing your visitors to apply some basic HTML tags to their comments.
Report Abuse
Subscribe
Discuss
What's new
What is it
New Snippet
Recent Snippets
My Snippets
Web Code
Search

