using CsharpHttpHelper; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace Api.Framework.Tools { /// /// 京东解析助手 /// public static class JDHelper { /// /// 查找宝贝Id的正则表达式 /// private static string RegItemId = @"(?:item\.jd\.com/|item\.m\.jd\.com/product/|item\.jd\.hk/|jd(?:.*?)sku_id=|skuId=|re.jd.com/cps/item/|jd(?:.*?)[\?&]sku=|jd/dujia/item/|http(?:.*?)jd\.(?:.{0,15}?)|jd(?:.*?)wareId=|sku=|京东(?:[\w\W]*?)wareId=|wareId=)(?\d{4,})"; /// /// 通过字符串获取京东商品id /// /// 包含京东链接的文本 /// public static string GetJingdongItemId(string str) { try { if (str.ToLower().Contains("taobao") || str.ToLower().Contains("detail.tmall") || str.ToLower().Contains("<![CDATA[微信红包]]>")) return string.Empty; Match reg = null; if (!str.ToLower().Contains("sq.jd.")) { reg = Regex.Match(str, RegItemId, RegexOptions.IgnoreCase); if (reg.Success) { return reg.Groups["itemId"].Value;//京东宝贝ID } else reg = null; } if (reg == null) { if (!str.ToLower().Contains("sq.jd.")) { reg = Regex.Match(HttpHelper.URLDecode(str), RegItemId, RegexOptions.IgnoreCase); if (reg.Success) { return reg.Groups["itemId"].Value;//京东宝贝ID } } str = Regex.Replace(str, "u.jd", "https://u.jd", RegexOptions.IgnoreCase); var _url = HttpExtend.RegexMatchUrl(str); if (string.IsNullOrWhiteSpace(_url)) _url = HttpExtend.RegexMatch(str, @"""jumpUrl"":""(?<链接>http:[^""]+?)"""); if (string.IsNullOrWhiteSpace(_url))//没有前缀https: { _url = HttpExtend.RegexMatchUrl(str); } if (!string.IsNullOrWhiteSpace(_url)) { var url = _url; HttpHelper http = new HttpHelper(); var item = http.GetItem(url); var result = http.GetHtml(item); var html = result.Html; //reg = Regex.Match(html, @"(?:window._itemOnly\s*?=\s*?\({\s*?""item"": {""skuId"":""(?<宝贝Id>\d+?)"",)"); if (str.Contains("taobao") || str.Contains("detail.tmall")) return string.Empty; reg = Regex.Match(html, @"""skuId"":""(?<宝贝Id>\d+?)"""); if (reg.Success) { return reg.Groups["宝贝Id"].ToString(); } else { reg = Regex.Match(html, @"hrl='(?<跳转A>.+?)';"); if (reg.Success) { var skip = reg.Groups["跳转A"].ToString(); item.URL = skip; item.Referer = url; item.Allowautoredirect = false; result = http.GetHtml(item); var header = result.Header; var location = result.Header.Get("Location"); if (location != null) { reg = Regex.Match(location, RegItemId, RegexOptions.IgnoreCase); if (reg.Success) { return reg.Groups["itemId"].Value;//京东宝贝ID } } } else //http://3.cn/Vc0Avuc 这类链接 { var location = result.ResponseUri; if (!string.IsNullOrWhiteSpace(location)) { var url1 = location.Substring(location.IndexOf("appurl=") + 7); url1 = HttpHelper.URLDecode(url1); item.URL = url1; item.Referer = location; item.Allowautoredirect = false; result = http.GetHtml(item); reg = Regex.Match(result.Html, @"hrl='(?.+?)';"); if (reg.Success) { var hrl = reg.Groups["hrl"].Value; item.URL = hrl; item.Referer = hrl; item.Allowautoredirect = false; result = http.GetHtml(item); reg = Regex.Match(result.RedirectUrl, RegItemId, RegexOptions.IgnoreCase); if (reg.Success) { return reg.Groups["itemId"].Value;//京东宝贝ID } } } } } http = new HttpHelper(); item = new HttpItem() { URL = url, Method = "GET", Timeout = 100000, ReadWriteTimeout = 30000, IsToLower = false, Cookie = "", UserAgent = "Opera/9.27 (Windows NT 5.2; U; zh-cn)", Accept = "text/html, application/xhtml+xml, */*", ContentType = "text/html", Referer = "", Postdata = "", }; result = http.GetHtml(item); var _location = result.Header.Get("Location"); if (!string.IsNullOrWhiteSpace(_location)) { reg = Regex.Match(_location, RegItemId, RegexOptions.IgnoreCase); if (reg.Success) { return reg.Groups["itemId"].Value;//京东宝贝ID } } } } } catch (Exception ex) { //throw ex; } return string.Empty; } } }