正则表达式相关

正则表达式相关

Charles

2015 年 02 月 06 日

806 次浏览

暂无评论

6122字数

正则表达式(?is)(<tr[^>]*>)(.*?)(</tr>),请问前面的(?is)是什么意思？

(?is)表示启用模式修改符号

i表示忽略大小写

s表示启用单行模式

asp.net html内容去格式

public static string NoHTML(string Htmlstring)
        {
            //删除脚本 
            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
            //删除HTML 
            Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"([\t])[\s]+", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "  ", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);

            Htmlstring.Replace("<", "");
            Htmlstring.Replace(">", "");
            Htmlstring.Replace("\r\n", "");
            Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

            return Htmlstring;
        }

正则表达式匹配table

string s = "......"; // html string
string pattern = @"(?is)<table[^>]+?class=""tableDataTable""[^>]*>\s*(<tr.*?>.+?</tr>\s*)+</table>";
var list = Regex.Match(s, pattern).Groups[1].Captures.Cast<Capture>().Skip(1).Select(c =>
{
    var td = Regex.Matches(c.Value, "<td.*?>(.*?)</td>")
        .Cast<Match>().Select(m => m.Groups[1].Value).ToArray();
    return new
    {
        币种 = td[0],
        中间价 = td[1],
        现汇买入价 = td[2],
        现钞买入价 = td[3],
        卖出价 = td[4],
        发布时间 = td[5]
    };
}).ToList();

.*?，.+?，\s+?注意区别非贪婪模式

强制匹配 ^....$ 例如：

Regex regIDCard = new Regex(@"^[1-9]\d{5}[1-9]\d{3}((0\d)|(1[0-2]))(([0|1|2]\d)|3[0-1])(\d{4}|\d{3}X)$");
if (!regIDCard.IsMatch(pid))
{
    result.ErrorRes.Err_code = "400";
    result.ErrorRes.Err_content = "身份证号有误";
    return result;
}

----*? 或+?表示非贪婪模式

使用RegexBuddy工具验证

正则表达式相关