准备工作:
通过 NUGET 安装 Microsoft.Bcl.Compression
;
使用命名空间
using System.IO.Compression ;
public static async Task Get(string url)
{
WebRequest request = WebRequest.CreateHttp(new Uri(url)); //创建WebRequest对象
request.Method = "GET"; //设置请求方式为GET
request.Headers[HttpRequestHeader.UserAgent] = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0";
request.Headers[HttpRequestHeader.AcceptEncoding] = "gzip, deflate"; //设置接收的编码 可以接受 gzip
var response = await request.GetResponseAsync();
Stream stream = null;
stream = response.Headers[HttpRequestHeader.ContentEncoding].Equals("gzip",
StringComparison.CurrentCultureIgnoreCase) ?
new GZipStream(response.GetResponseStream(), CompressionMode.Decompress) : response.GetResponseStream();
var ms = new MemoryStream();
var buffer = new byte[1024];
while (true)
{
if (stream == null) continue;
var sz = stream.Read(buffer, 0, 1024);
if (sz == 0) break;
ms.Write(buffer, 0, sz);
}
var bytes = ms.ToArray();
var html = GetEncoding(bytes, response.Headers[HttpRequestHeader.ContentType]).GetString(bytes);
await stream.FlushAsync();
return html;
}
获取编码:
public static Encoding GetEncoding(byte[] bytes, string charSet)
{
var html = Encoding.UTF8.GetString(bytes);
var regCharset = new Regex(@"charset\b\s*=\s*""*(?<charset>[^""]*)");
if (regCharset.IsMatch(html))
{
return Encoding.GetEncoding(regCharset.Match(html).Groups["charset"].Value);
}
if (string.IsNullOrEmpty(charSet))
{
return Encoding.UTF8;
}
try
{
// 解决 gbk gb2312
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
return Encoding.GetEncoding(charSet);
}
catch (Exception)
{
return Encoding.UTF8;
}
}
虽然使用 HttpClient 更简单
var http = new HttpClient();
http.DefaultRequestHeaders.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0");
http.DefaultRequestHeaders.Add("accept-encoding", "gzip, deflate");
var response = await http.GetAsync(new Uri(url));
response.EnsureSuccessStatusCode();//确保请求成功
但是它的响应头并没有 Content-Encoding
,所以无法直接判断需不需要 Gzip 解压。
转载请保留原文链接: https://zodream.cn/blog/id/9.html