1. 程式人生 > >C#正則表示式入門(下)

C#正則表示式入門(下)

一、匹配郵政編碼,郵政編碼為6位數字組成。

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"^\d{6}$",RegexOptions.None);

Console.WriteLine(reg.IsMatch(code)?"匹配成功!":"匹配失敗!");

 

二、匹配數值

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"^\d+.?\d*$", RegexOptions.None);

Console.WriteLine(reg.IsMatch(code) ? "匹配成功!" : "匹配失敗!");

 

三、去掉字串中的所有空格

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"\s+", RegexOptions.None);//匹配所有空格符

Console.WriteLine("去掉空格符後的字串為:"+reg.Replace(code, ""));

Console.WriteLine("原始字串長度為:"+code.Length);

reg = new Regex(@"\s+$");//匹配尾部空格符

code = reg.Replace(code, "");

Console.WriteLine("去掉尾部空格符後的長度為:"+code.Length);

reg = new Regex(@"^\s+");//匹配首部空格符

code = reg.Replace(code, "");

Console.WriteLine("去掉首部空格符後的長度為:" + code.Length);

注:上面所說的空格符包括、製表符等。

 

四、提取輸入字串中的所有合法的數值

下面程式用於提取所有的整數或者浮點數

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"\d+|\d+.?\d*", RegexOptions.Singleline);//單行模式

MatchCollection mc = reg.Matches(code,0);//從首個字元開始匹配

int counter=0;

foreach (Match mt in mc)

{

Console.WriteLine("數值【" + (++counter) + "】:" + mt.Value);

}

五、身份證號碼驗證

身份證號碼為15位全數字,或者為18位,前17位為數字,最後一位為0-9的數字或者字元”x”、”X”。

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"^\d{15}$|^\d{17}[0-9xX]$", RegexOptions.Singleline);//單行模式

Console.WriteLine(reg.IsMatch(code) ? "匹配成功!" : "匹配失敗!");

 

六、獲取一個網頁中包括的所有URL

包含的名字空間如下:

using System.Text.RegularExpressions;

using System.Net;

using System.IO;

using System.Collections;

 

static void Main(string[] args)

        {

            Hashtable myUrl = new Hashtable();

            string pageContent;

            pageContent = GetWebPageContent(@"http://www.baidu.com");

            Regex reg = new Regex(@"http(s)?://[-\w]+(\.\w[-\w]*)+", RegexOptions.Singleline);//單行模式

            MatchCollection mc = reg.Matches(pageContent,0);

            Console.WriteLine("提取網址數量:"+mc.Count);

            int counter = 0;

            foreach (Match mt in mc)

            {

                if (!myUrl.ContainsValue(mt.Value))

                {

                    myUrl.Add(++counter, mt.Value);

                }

            }

            Console.WriteLine("去重後網址數量:" + counter);

            foreach (DictionaryEntry url in myUrl)

            {

                Console.WriteLine("網址【"+url.Key+"】:"+url.Value);

            }

        }

        /// <summary>

        /// 獲取網頁內容

        /// </summary>

        /// <param name="url">網址</param>

        /// <returns></returns>

        private static string GetWebPageContent(string url)

        {

            try

            {

                HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);

                req.UserAgent = "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)";

                req.Accept = "*/*";

                req.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");

                req.ContentType = "text/xml";

 

                HttpWebResponse resp = (HttpWebResponse)req.GetResponse();

                Encoding enc;

                try

                {

                    if (resp.CharacterSet.ToUpper() != "ISO-8859-1")

                        enc = Encoding.GetEncoding(resp.CharacterSet);

                    else

                        enc = Encoding.UTF8;

                }

                catch

                {

                    // *** Invalid encoding passed

                    enc = Encoding.UTF8;

                }

                string sHTML = string.Empty;

                using (StreamReader read = new StreamReader(resp.GetResponseStream(), enc))

                {

                    sHTML = read.ReadToEnd();

                }

                return sHTML;

            }

            catch (Exception ex)

            {

                Console.WriteLine(ex.Message.ToString());

                return null;

            }

 

        }

七、獲取圖片連結

 

Regex reg = new Regex("<img\\s+[^<>]*(src|data-src)=[^<>\\s]+(\\.(png|gif|jpg|ico|bmp|jpeg|tiff|dxf|\\s))", RegexOptions.Singleline | RegexOptions.IgnoreCase);//單行模式且不區分大小寫

                MatchCollection mc = reg.Matches(strLine);

                foreach (Match mt in mc)

                {

                    Match tmp = Regex.Match(mt.Value, "(src|data-src)=(\")?(.*?)(\"|\\s|$)",RegexOptions.IgnoreCase);//再次匹配,獲取圖片連結

                    string res = tmp.Groups[3].Value;

                    if (res.Trim().Length > 1)

                    {

                        if (!myPicture.ContainsValue(res))

                        {

                            myPicture.Add(++counter, res);

                        }

                    }

 

                }

八、分組的使用

string content = @"vqwsdvasdhttp://202.38.193.153:8000/publishHtml/announcement_11024/announcement_11024.html asdf?gr";

            Regex reg1 = new Regex(@"http(s)?://([^/:]*)(.*?)\.html",RegexOptions.IgnoreCase);

            Match mt = reg1.Match(content,0);

 

            Console.WriteLine(mt.Value);//列印結果:http://202.38.193.153:8000/publishHtml/announcement_11024/announcement_11024.html

            Console.WriteLine(mt.Groups[2].Value);//列印結果:202.38.193.153

 

            Regex reg2 = new Regex(@":(\d{1,6})/", RegexOptions.IgnoreCase);

            mt = reg2.Match(content);

            Console.WriteLine(mt.Groups[1].Value);//列印結果:8000

 

            content = "<p><img class=news-smallimg-img height=40 width=68 src=\"http://images.cnitblog.com/blog/347600/201304/27121407-76ee60d0689949668aed919e47f9b959.jpg\" alt=\"\" /></p>";

            Regex reg = new Regex(@"<img\s+[^<>]*(src|data-src)=\s*""?(([^<>\s])+)(\.(png|gif|jpg|ico|bmp|jpeg|tiff|dxf|\s))", RegexOptions.Singleline | RegexOptions.IgnoreCase);//單行模式且不區分大小寫

            mt = reg.Match(content);

            Console.WriteLine(mt.Groups[1].Value);//列印src

            Console.WriteLine(mt.Groups[2].Value);//列印http://images.cnitblog.com/blog/347600/201304/27121407-76ee60d0689949668aed919e47f9b959

            Console.WriteLine(mt.Groups[4].Value);//列印.jpg

            Console.WriteLine(mt.Groups[5].Value);//列印jpg

 

九、環視的使用

【例1】要求匹配teacher,但是不能匹配teachers,teacher’s等形式。

string content = "you i last teacher * 800 <,?ojJOa sd teachers y628 TEACHER'S";

Regex reg=new Regex(@"teacher(?=\s)",RegexOptions.IgnoreCase);

MatchCollection mc = reg.Matches(content);

foreach (Match mt in mc)

{

Console.WriteLine(mt.Value);

}

 

【例2】要求找出所有含teach的單詞

string content = "you i last teacher * 800 teaching <,?ojJOa sd teachers y628 TEACHER'S";

Regex reg=new Regex(@"teach((?!\s).)*",RegexOptions.IgnoreCase);

MatchCollection mc = reg.Matches(content);

foreach (Match mt in mc)

{

Console.WriteLine(mt.Value);

}

【例3】將一個較大的數值每3位用一個逗號隔開

string content = "The population of 298444215 is growing";

Regex reg = new Regex(@"(?<=\d)(?=(\d\d\d)+(\s|\D|$))", RegexOptions.IgnoreCase);

string  res = reg.Replace(content,",");

Console.WriteLine(res);

十、其它

【例1】最左最長規則

string source = "oneselfsufficient";

Regex reg = new Regex("one(self)?(selfsufficient)?");

Console.WriteLine(reg.Match(source).Value);//列印oneself

【例2】多分支結構按照順序優先匹配

string source = "oneselfsufficient";

Regex reg = new Regex("one(self|selfsufficient)");

Console.WriteLine(reg.Match(source).Value);//列印oneself

reg = new Regex("one(selfsufficient|self)");

Console.WriteLine(reg.Match(source).Value);//列印oneselfsufficient

注:.Net使用的正則表示式引擎型別為傳統NFA,對於多分支結構,按照順序優先匹配。