正则表达式(?is)(<tr[^>]*>)(.*?)(</tr>),请问前面的(?is)是什么意思?
(?is)表示启用模式修改符号
i表示忽略大小写
s表示启用单行模式
asp.net html内容去格式
public static string NoHTML(string Htmlstring) { //删除脚本 Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase); //删除HTML Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"([\t])[\s]+", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase); Htmlstring.Replace("<", ""); Htmlstring.Replace(">", ""); Htmlstring.Replace("\r\n", ""); Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); return Htmlstring; }
正则表达式匹配table
string s = "......"; // html string string pattern = @"(?is)<table[^>]+?class=""tableDataTable""[^>]*>\s*(<tr.*?>.+?</tr>\s*)+</table>"; var list = Regex.Match(s, pattern).Groups[1].Captures.Cast<Capture>().Skip(1).Select(c => { var td = Regex.Matches(c.Value, "<td.*?>(.*?)</td>") .Cast<Match>().Select(m => m.Groups[1].Value).ToArray(); return new { 币种 = td[0], 中间价 = td[1], 现汇买入价 = td[2], 现钞买入价 = td[3], 卖出价 = td[4], 发布时间 = td[5] }; }).ToList();
.*?,.+?,\s+?注意区别 非贪婪模式
强制匹配 ^....$ 例如:
Regex regIDCard = new Regex(@"^[1-9]\d{5}[1-9]\d{3}((0\d)|(1[0-2]))(([0|1|2]\d)|3[0-1])(\d{4}|\d{3}X)$"); if (!regIDCard.IsMatch(pid)) { result.ErrorRes.Err_code = "400"; result.ErrorRes.Err_content = "身份证号有误"; return result; }
----*? 或+?表示非贪婪模式
使用RegexBuddy工具验证