using System;
using System.Collections.Generic;using System.Text;using System.Drawing;using System.Net;using System.Web;using HtmlAgilityPack;using System.Text.RegularExpressions;using System.IO;namespace Pmars{ class JingDong { //得到京东页面的编码格式 static Encoding encoding = Encoding.GetEncoding("gb2312"); //如果知道某件商品的ID,我们如何得到这件商品的价格 public static double GetPrice(string id) { double price = 0; //第一种方式 //详细查看以前发过的图片识别的博客,利用识别京东价格图片的方法去获得商品的价格 price = GetPriceByImage(id); //第二种方式 //虽然京东的价格被做成了图片的模式,但是,在购物车或者在结算的时候的价格不是图片的 //这样我们就可以想办法得到那里的价格就可以了 //实验证明,在获得购物车或者结算时的页面需要cookie //那么,我们可以去下载另一个页面,之后记录cookie就可以解决问题了 //这里,定义一个类去继承WebClient,用来记录前后的cookie price = GetPriceByCookie(id); //第三种方式 //虽然京东在价格上做了很多的手脚,但是我们可以变相的采用其他的方式来处理 //比如,我们发现,京东有手机商城,这样,我们在手机商场上做了查看 //发现,手机商城根本就不用Cookie,而是直接下载页面就可以得到数据 //好吧,看下面的程序就可以了 price = GetPriceByMobile(id); return price; } private static double GetPriceByImage(string id) { //京东的商品的Id是独一无二的,也就是说,每个Id标示了一件商品的内容页 //得到京东商品的下载页面 string downUrl = "http://www.360buy.com/product/"+id+".html"; //下载京东的商品内容页面,主要是为了下载图片 byte[] bytes = new WebClient().DownloadData(downUrl); //通过编码得到页面的内容string,用HttpUtility.HtmlDecode解码 string content = HttpUtility.HtmlDecode(encoding.GetString(bytes)); //利用HtmlAgilityPack来加载分析Html页面内容 HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(content); //得到需要下载的价格图片节点 var imgNode = htmlDoc.DocumentNode.SelectSingleNode(@"//strong[@class='price']/img"); //得到需要下载的图片地址,需要用当前的页面地址去拼接一下,否则有可能是部分地址 string imgUrl = new Uri(new Uri(downUrl),imgNode.Attributes["src"].Value).AbsoluteUri; //下载图片,放到Images文件夹下 //生成图片的地址,@"Images/" + 生成一个Guid以保证他们的名字都是不同的 + 图片的格式 string imgPath = @"Images/" + Guid.NewGuid().ToString().Replace("-", "") + imgUrl.Substring(imgUrl.LastIndexOf('.')); //下载图片 new WebClient().DownloadFile(imgUrl, imgPath); //分析图片,得到商品的价格 JingdongImage jdImage = new JingdongImage(); string priceStr = jdImage.GetPicNum(imgPath); //定义一个匹配double的正则,从价格字符串中得到价格 //更简单的方法就是priceStr = priceStr.SubString(1);//去掉第一个¥字符 Regex doublePattern = new Regex(@"\d+(\.\d+)?", RegexOptions.Compiled); string dbPrice = doublePattern.Match(priceStr).Value; double price = 0; if(double.TryParse(dbPrice,out price)) return price; return 0; } private static double GetPriceByCookie(string id) { //京东的商品的Id是独一无二的,也就是说,每个Id标示了一件商品的内容页 //得到京东商品的加入购物车的链接,用程序来模仿人来将商品加入到购物车里面去 //加入购物车的链接 string goUrl = "http://jd2008.360buy.com/purchase/InitCart.aspx?pid=" + id + "&pcount=1&ptype=1"; //定义一个CookieWebClient类来达到记录Cookie的效果 CookieWebClient client = new CookieWebClient(); //去下载一个页面来记录Cookie client.DownloadData(goUrl); //得到结算页面的页面的Url string shopUrl = "http://jd2008.360buy.com/purchase/shoppingcart_pop.aspx"; //下载得到结算页面的数据 byte[] bytes = client.DownloadData(shopUrl); string content = HttpUtility.HtmlDecode(encoding.GetString(bytes)); //利用HtmlAgilityPack来加载分析Html页面内容 HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(content); //得到商品价格的节点 var priceNode = htmlDoc.DocumentNode.SelectSingleNode(@"//span[@class='price'][@id='cartBottom_price']"); if (priceNode == null)//节点查找失败 return 0; //得到商品的价格string,去掉第一个字符¥ string priceStr = priceNode.InnerText.Substring(1); double price = 0; if (double.TryParse(priceStr, out price)) return price; return 0; } private static double GetPriceByMobile(string id) { //京东的商品的Id是独一无二的,也就是说,每个Id标示了一件商品的内容页 //得到京东商品的下载页面 string downUrl = "http://m.360buy.com/cart/add.action?wareId=" + id; //下载京东的商品内容页面,主要是为了下载图片 byte[] bytes = new WebClient().DownloadData(downUrl); //注意手机商城里面的Encoding是UTF8的 //通过编码得到页面的内容string,用HttpUtility.HtmlDecode解码 string content = HttpUtility.HtmlDecode(Encoding.UTF8.GetString(bytes)); //利用HtmlAgilityPack来加载分析Html页面内容 HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(content); //得到价格节点 var priceNode = htmlDoc.DocumentNode.SelectSingleNode(@"//div[@class='p-price']/strong[@class='flk31'][3]"); if (priceNode == null)//节点查找失败 return 0; //得到商品的价格string,去掉第一个字符¥ string priceStr = priceNode.InnerText; double price = 0; if (double.TryParse(priceStr, out price)) return price; return 0; } } class CookieWebClient : WebClient { private CookieContainer m_container = new CookieContainer(); protected override WebRequest GetWebRequest(Uri address) { WebRequest request = base.GetWebRequest(address); if (request is HttpWebRequest) { (request as HttpWebRequest).CookieContainer = m_container; } return request; } } class MyImage { //需要进行分析的图片 private Bitmap bmpobj; //需要继承并且重写的数字比对串,在子类里面需要用到这个去比对数字 public Dictionary<string, char> numDic = new Dictionary<string, char>(); //得到图片中某一点的灰度数值 private int GetGrayNumColor(System.Drawing.Color posClr) { return (posClr.R * 19595 + posClr.G * 38469 + posClr.B * 7472) >> 16; } //进行灰度处理 private void GrayByPixels() { for (int i = 0; i < bmpobj.Height; i++) { for (int j = 0; j < bmpobj.Width; j++) { int tmpValue = GetGrayNumColor(bmpobj.GetPixel(j, i)); bmpobj.SetPixel(j, i, Color.FromArgb(tmpValue, tmpValue, tmpValue)); } } } //得到一个图片的01代码序列 private string GetSingleBmpCode(Bitmap singlepic, int dgGrayValue) { Color piexl; string code = ""; for (int posy = 0; posy < singlepic.Height; posy++) for (int posx = 0; posx < singlepic.Width; posx++) { piexl = singlepic.GetPixel(posx, posy); if (piexl.R < dgGrayValue) // Color.Black ) code = code + "1"; else code = code + "0"; } return code; } //从一个图片里面得到几个分开的数字小图片 private Bitmap[] GetPicValidByValue(int dgGrayValue) { List<Bitmap> PicList = new List<Bitmap>(); Rectangle cloneRect; int posx1 = bmpobj.Width, posy1 = bmpobj.Height, posx2 = 0, posy2 = 0; bool cut = false; int last = -1, lastx = 0; for (int j = 0; j < bmpobj.Width; j++) //找有效区 { cut = false; for (int i = 0; i < bmpobj.Height; i++) { int pixelValue = bmpobj.GetPixel(j, i).R; if (pixelValue < dgGrayValue) //根据灰度值 { if (posx1 > j) posx1 = j; if (posy1 > i) posy1 = i; if (posx2 < j) posx2 = j; if (posy2 < i) posy2 = i; cut = true; } }; if (cut) continue; if (last + 1 == j) { last++; continue; } cloneRect = new Rectangle(posx1, posy1, posx2 - posx1 + 1, posy2 - posy1 + 1); lastx = j; last = j; posx1 = bmpobj.Width; posy1 = bmpobj.Height; posx2 = 0; posy2 = 0; PicList.Add(bmpobj.Clone(cloneRect, bmpobj.PixelFormat));//复制小块图 }; return PicList.ToArray(); } //得到一个图片的数字串 private string GetPicNumber() { GrayByPixels(); //灰度处理 Bitmap[] pics = GetPicValidByValue(128); //得到有效值 StringBuilder sb = new StringBuilder(); char c; for (int i = 0; i < pics.Length; ++i) { string code = GetSingleBmpCode(pics[i], 128); //得到代码串 if (numDic.TryGetValue(code, out c)) { sb.Append(c); } } return sb.ToString(); } //外部调用,得到一个图片的数字串 public string GetPicNum(Bitmap pic) { bmpobj = new Bitmap(pic); //转换为Format32bppRgb return GetPicNumber(); } //外部调用,得到一个图片的数字串(重载) public string GetPicNum(string fileName) { bmpobj = new Bitmap(fileName); return GetPicNumber(); } //输出在一幅图里面找到的数字,测试或者找到数字比对串时用 private void TestNumber() { GrayByPixels(); //灰度处理 Bitmap[] pics = GetPicValidByValue(128); //得到有效值 for (int i = 0; i < pics.Length; ++i) { string code = GetSingleBmpCode(pics[i], 128); //得到代码串 Console.WriteLine(i); Console.WriteLine(code); } } //外部调用,输出在一幅图里面找到的数字,测试或者找到数字比对串时用 public void TestNum(Bitmap pic) { bmpobj = new Bitmap(pic); //转换为Format32bppRgb TestNumber(); } //外部调用,输出在一幅图里面找到的数字,测试或者找到数字比对串时用(重载) public void TestNum(string fileName) { bmpobj = new Bitmap(fileName); TestNumber(); } } class JingdongImage : MyImage { public JingdongImage() { numDic.Add("111100001111011000000110001100001100000110011000000011110000000011110000000001100000000001100000001111111100000001100000000001100000000001100000000001100000000011110000", '¥'); numDic.Add("001111100011000110110000011110000011110000011110000011110000011110000011110000011011000110001111100", '0'); numDic.Add("001100111100001100001100001100001100001100001100001100001100111111", '1'); numDic.Add("011111100110000110110000011000000011000000110000001100000011000000110000001100000011000000111111111", '2'); numDic.Add("011111100110000110110000011000000011000000110000111100000000110000000011110000011110000110011111100", '3'); numDic.Add("00000011000000011100000010110000010011000010001100010000110010000011001111111111000000110000000011000000001100", '4'); numDic.Add("011111111011000000011000000011000000011111100000000110000000011000000011110000011110000110011111100", '5'); numDic.Add("000111110001100000011000000110000000110111100111000110110000011110000011110000011011000110001111100", '6'); numDic.Add("111111111000000011000000110000000110000001100000001100000011000000011000000110000000110000001100000", '7'); numDic.Add("001111100011000110110000011110000011011000110001111100011000110110000011110000011011000110001111100", '8'); numDic.Add("001111100011000110110000011110000011110000011011000111001111011000000011000000110000001100011111000", '9'); numDic.Add("111111", '.'); } }}