CodePaste Logo
New Snippet New Snippet Recent Snippets Recent Snippets My Snippets My Snippets Web Code Search Snippets Search
Sign inor Register
Language: C#

string StripTags(string Input, string[] AllowedTags) and string StripTagsAndAttributes(string Input, string[] AllowedTags)

219 Views
Copy Code Show/Hide Line Numbers
/* Usage 
string test1 = StripTags("<p>George</p><b>W</b><i>Bush</i>", new string[]{"i","b"});
string test2 = StripTags("<p>George <img src='someimage.png' onmouseover='someFunction()'>W <i>Bush</i></p>", new string[]{"p"});
string test3 = StripTags("<a href='http://www.dijksterhuis.org'>Martijn <b>Dijksterhuis</b></a>", new string[]{"a"});

*/
 
 
 
 
 
using System;
using System.Text.RegularExpressions;
 
namespace StripHTML
{
    class MainClass
    {
 
        private static string ReplaceFirst(string haystack, string needle, string replacement)
        {
               int pos = haystack.IndexOf(needle);
            if (pos < 0) return haystack;
            return haystack.Substring(0,pos) + replacement + haystack.Substring(pos+needle.Length);
        }
 
        private static string ReplaceAll(string haystack, string needle, string replacement)
        {
             int pos;
             // Avoid a possible infinite loop
             if (needle == replacement) return haystack;
              while((pos = haystack.IndexOf(needle))>0)
                       haystack = haystack.Substring(0,pos) + replacement + haystack.Substring(pos+needle.Length);
                        return haystack;
        }        
 
        public static string StripTags(string Input, string[] AllowedTags)
        {
            Regex StripHTMLExp = new Regex(@"(<\/?[^>]+>)");
            string Output = Input;
 
            foreach(Match Tag in StripHTMLExp.Matches(Input))
            {
                string HTMLTag = Tag.Value.ToLower();
                bool IsAllowed = false;
 
                foreach(string AllowedTag in AllowedTags)
                {
                    int offset = -1;
 
                    // Determine if it is an allowed tag
                    // "<tag>" , "<tag " and "</tag"
                    if (offset!=0) offset = HTMLTag.IndexOf('<'+AllowedTag+'>');
                    if (offset!=0) offset = HTMLTag.IndexOf('<'+AllowedTag+' ');
                    if (offset!=0) offset = HTMLTag.IndexOf("</"+AllowedTag);
 
                    // If it matched any of the above the tag is allowed
                    if (offset==0)
                    {
                         IsAllowed = true;
                        break;
                    }
                }
 
                // Remove tags that are not allowed
                if (!IsAllowed) Output = ReplaceFirst(Output,Tag.Value,"");
            }
 
            return Output;
        }
 
        public static string StripTagsAndAttributes(string Input, string[] AllowedTags)
        {
            /* Remove all unwanted tags first */
            string Output = StripTags(Input,AllowedTags);
 
            /* Lambda functions */
            MatchEvaluator HrefMatch = m => m.Groups[1].Value + "href..;,;.." + m.Groups[2].Value;
            MatchEvaluator ClassMatch = m => m.Groups[1].Value + "class..;,;.." + m.Groups[2].Value;
            MatchEvaluator UnsafeMatch = m => m.Groups[1].Value + m.Groups[4].Value;
 
            /* Allow the "href" attribute */
            Output = new Regex("(<a.*)href=(.*>)").Replace(Output,HrefMatch);
 
            /* Allow the "class" attribute */
            Output = new Regex("(<a.*)class=(.*>)").Replace(Output,ClassMatch);
 
            /* Remove unsafe attributes in any of the remaining tags */
            Output = new Regex(@"(<.*) .*=(\'|\""|\w)[\w|.|(|)]*(\'|\""|\w)(.*>)").Replace(Output,UnsafeMatch);
 
            /* Return the allowed tags to their proper form */
            Output = ReplaceAll(Output,"..;,;..", "=");
 
            return Output;
        }
 
        public static void Main(string[] args)
        {
            string test1 = StripTags("<p>George</p><b>W</b><i>Bush</i>", new string[]{"i","b"});
            string test2 = StripTags("<p>George <img src='someimage.png' onmouseover='someFunction()'>W <i>Bush</i></p>", new string[]{"p"});
            string test3 = StripTags("<a href='http://www.dijksterhuis.org'>Martijn <b>Dijksterhuis</b></a>", new string[]{"a"});
 
            Console.WriteLine(test1);
            Console.WriteLine(test2);
            Console.WriteLine(test3);
 
            string test4 = "<a class=\"classof69\" onClick='crosssite.boom()' href='http://www.dijksterhuis.org'>Martijn Dijksterhuis</a>";
            Console.WriteLine(StripTagsAndAttributes(test4, new string[]{"a"}));
        }
    }
}
by tarasn
  March 31, 2010 @ 9:21am
Tags:
Description:
The PHP strip_tags function not only does it neatly remove HTML from an input it also allows you to specify which tags should stay. This is great if you are allowing your visitors to apply some basic HTML tags to their comments.

Add a comment


Report Abuse
brought to you by:
West Wind Techologies



If you find this site useful and use it frequently please consider making a donation to support this free service.
Donate