166 lines
7.4 KiB
C#
166 lines
7.4 KiB
C#
using CsharpHttpHelper;
|
||
using System;
|
||
using System.Collections.Generic;
|
||
using System.Linq;
|
||
using System.Text;
|
||
using System.Text.RegularExpressions;
|
||
using System.Threading.Tasks;
|
||
|
||
namespace Api.Framework.Tools
|
||
{
|
||
/// <summary>
|
||
/// 京东解析助手
|
||
/// </summary>
|
||
public static class JDHelper
|
||
{
|
||
/// <summary>
|
||
/// 查找宝贝Id的正则表达式
|
||
/// </summary>
|
||
private static string RegItemId = @"(?:item\.jd\.com/|item\.m\.jd\.com/product/|item\.jd\.hk/|jd(?:.*?)sku_id=|skuId=|re.jd.com/cps/item/|jd(?:.*?)[\?&]sku=|jd/dujia/item/|http(?:.*?)jd\.(?:.{0,15}?)|jd(?:.*?)wareId=|sku=|京东(?:[\w\W]*?)wareId=|wareId=)(?<itemId>\d{4,})";
|
||
|
||
/// <summary>
|
||
/// 通过字符串获取京东商品id
|
||
/// </summary>
|
||
/// <param name="str">包含京东链接的文本</param>
|
||
/// <returns></returns>
|
||
public static string GetJingdongItemId(string str)
|
||
{
|
||
try
|
||
{
|
||
if (str.ToLower().Contains("taobao") || str.ToLower().Contains("detail.tmall") || str.ToLower().Contains("<title><![CDATA[微信红包]]></title>")) return string.Empty;
|
||
Match reg = null;
|
||
if (!str.ToLower().Contains("sq.jd."))
|
||
{
|
||
reg = Regex.Match(str, RegItemId, RegexOptions.IgnoreCase);
|
||
if (reg.Success)
|
||
{
|
||
return reg.Groups["itemId"].Value;//京东宝贝ID
|
||
}
|
||
else
|
||
reg = null;
|
||
}
|
||
|
||
if (reg == null)
|
||
{
|
||
if (!str.ToLower().Contains("sq.jd."))
|
||
{
|
||
reg = Regex.Match(HttpHelper.URLDecode(str), RegItemId, RegexOptions.IgnoreCase);
|
||
if (reg.Success)
|
||
{
|
||
return reg.Groups["itemId"].Value;//京东宝贝ID
|
||
}
|
||
}
|
||
|
||
str = Regex.Replace(str, "u.jd", "https://u.jd", RegexOptions.IgnoreCase);
|
||
var _url = HttpExtend.RegexMatchUrl(str);
|
||
if (string.IsNullOrWhiteSpace(_url))
|
||
_url = HttpExtend.RegexMatch(str, @"""jumpUrl"":""(?<链接>http:[^""]+?)""");
|
||
if (string.IsNullOrWhiteSpace(_url))//没有前缀https:
|
||
{
|
||
_url = HttpExtend.RegexMatchUrl(str);
|
||
}
|
||
if (!string.IsNullOrWhiteSpace(_url))
|
||
{
|
||
var url = _url;
|
||
HttpHelper http = new HttpHelper();
|
||
var item = http.GetItem(url);
|
||
var result = http.GetHtml(item);
|
||
var html = result.Html;
|
||
//reg = Regex.Match(html, @"(?:window._itemOnly\s*?=\s*?\({\s*?""item"": {""skuId"":""(?<宝贝Id>\d+?)"",)");
|
||
if (str.Contains("taobao") || str.Contains("detail.tmall")) return string.Empty;
|
||
reg = Regex.Match(html, @"""skuId"":""(?<宝贝Id>\d+?)""");
|
||
if (reg.Success)
|
||
{
|
||
return reg.Groups["宝贝Id"].ToString();
|
||
}
|
||
else
|
||
{
|
||
reg = Regex.Match(html, @"hrl='(?<跳转A>.+?)';");
|
||
if (reg.Success)
|
||
{
|
||
var skip = reg.Groups["跳转A"].ToString();
|
||
item.URL = skip;
|
||
item.Referer = url;
|
||
item.Allowautoredirect = false;
|
||
result = http.GetHtml(item);
|
||
var header = result.Header;
|
||
var location = result.Header.Get("Location");
|
||
if (location != null)
|
||
{
|
||
reg = Regex.Match(location, RegItemId, RegexOptions.IgnoreCase);
|
||
if (reg.Success)
|
||
{
|
||
return reg.Groups["itemId"].Value;//京东宝贝ID
|
||
}
|
||
}
|
||
}
|
||
else //http://3.cn/Vc0Avuc 这类链接
|
||
{
|
||
var location = result.ResponseUri;
|
||
if (!string.IsNullOrWhiteSpace(location))
|
||
{
|
||
var url1 = location.Substring(location.IndexOf("appurl=") + 7);
|
||
url1 = HttpHelper.URLDecode(url1);
|
||
item.URL = url1;
|
||
item.Referer = location;
|
||
item.Allowautoredirect = false;
|
||
result = http.GetHtml(item);
|
||
reg = Regex.Match(result.Html, @"hrl='(?<hrl>.+?)';");
|
||
if (reg.Success)
|
||
{
|
||
var hrl = reg.Groups["hrl"].Value;
|
||
item.URL = hrl;
|
||
item.Referer = hrl;
|
||
item.Allowautoredirect = false;
|
||
result = http.GetHtml(item);
|
||
|
||
reg = Regex.Match(result.RedirectUrl, RegItemId, RegexOptions.IgnoreCase);
|
||
if (reg.Success)
|
||
{
|
||
return reg.Groups["itemId"].Value;//京东宝贝ID
|
||
}
|
||
}
|
||
}
|
||
|
||
}
|
||
}
|
||
|
||
http = new HttpHelper();
|
||
item = new HttpItem()
|
||
{
|
||
URL = url,
|
||
Method = "GET",
|
||
Timeout = 100000,
|
||
ReadWriteTimeout = 30000,
|
||
IsToLower = false,
|
||
Cookie = "",
|
||
UserAgent = "Opera/9.27 (Windows NT 5.2; U; zh-cn)",
|
||
Accept = "text/html, application/xhtml+xml, */*",
|
||
ContentType = "text/html",
|
||
Referer = "",
|
||
Postdata = "",
|
||
};
|
||
result = http.GetHtml(item);
|
||
var _location = result.Header.Get("Location");
|
||
if (!string.IsNullOrWhiteSpace(_location))
|
||
{
|
||
reg = Regex.Match(_location, RegItemId, RegexOptions.IgnoreCase);
|
||
if (reg.Success)
|
||
{
|
||
return reg.Groups["itemId"].Value;//京东宝贝ID
|
||
}
|
||
}
|
||
|
||
}
|
||
}
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
//throw ex;
|
||
}
|
||
return string.Empty;
|
||
}
|
||
|
||
}
|
||
}
|