private static readonly Regex META_TAG_CHARSET_REGEX = new Regex(@"<META\s+http-equiv\s*=\s*Content-Type\s+content=\s*""[^""]*\s+charset\s*=\s*(?<charset>[^""\s]*).*""\s*>",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
public static string ExtractCharset(string htmlText)
{
string result = null;
var m = META_TAG_CHARSET_REGEX.Match(htmlText);
if (m.Success)
{
result = m.Groups["charset"].Value;
}
return result;
}
Note: System.Text.RegularExpressions.Regex class is thread safe according to this msdn doc.
コメント