博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
京东商城商品价格获取方法(转帖)
阅读量:6239 次
发布时间:2019-06-22

本文共 8794 字,大约阅读时间需要 29 分钟。

using System;

using System.Collections.Generic;
using System.Text;
using System.Drawing;
using System.Net;
using System.Web;
using HtmlAgilityPack;
using System.Text.RegularExpressions;
using System.IO;
namespace Pmars
{
    class JingDong
    {
        //得到京东页面的编码格式
        static Encoding encoding = Encoding.GetEncoding("gb2312");
        //如果知道某件商品的ID,我们如何得到这件商品的价格
        public static double GetPrice(string id)
        {
            double price = 0;
            //第一种方式
            //详细查看以前发过的图片识别的博客,利用识别京东价格图片的方法去获得商品的价格
            price = GetPriceByImage(id);
            //第二种方式
            //虽然京东的价格被做成了图片的模式,但是,在购物车或者在结算的时候的价格不是图片的
            //这样我们就可以想办法得到那里的价格就可以了
            //实验证明,在获得购物车或者结算时的页面需要cookie
            //那么,我们可以去下载另一个页面,之后记录cookie就可以解决问题了
            //这里,定义一个类去继承WebClient,用来记录前后的cookie
            price = GetPriceByCookie(id);
            //第三种方式
            //虽然京东在价格上做了很多的手脚,但是我们可以变相的采用其他的方式来处理
            //比如,我们发现,京东有手机商城,这样,我们在手机商场上做了查看
            //发现,手机商城根本就不用Cookie,而是直接下载页面就可以得到数据
            //好吧,看下面的程序就可以了
            price = GetPriceByMobile(id);
            return price;
        }
        private static double GetPriceByImage(string id)
        {
            //京东的商品的Id是独一无二的,也就是说,每个Id标示了一件商品的内容页
            //得到京东商品的下载页面
            string downUrl = "http://www.360buy.com/product/"+id+".html";
            
            //下载京东的商品内容页面,主要是为了下载图片
            byte[] bytes = new WebClient().DownloadData(downUrl);
            
            //通过编码得到页面的内容string,用HttpUtility.HtmlDecode解码
            string content = HttpUtility.HtmlDecode(encoding.GetString(bytes));
            //利用HtmlAgilityPack来加载分析Html页面内容
            HtmlDocument htmlDoc = new HtmlDocument();
            htmlDoc.LoadHtml(content);
            //得到需要下载的价格图片节点
            var imgNode = htmlDoc.DocumentNode.SelectSingleNode(@"//strong[@class='price']/img");
            //得到需要下载的图片地址,需要用当前的页面地址去拼接一下,否则有可能是部分地址
            string imgUrl = new Uri(new Uri(downUrl),imgNode.Attributes["src"].Value).AbsoluteUri;
            //下载图片,放到Images文件夹下
            //生成图片的地址,@"Images/" + 生成一个Guid以保证他们的名字都是不同的 + 图片的格式
            string imgPath = @"Images/" + Guid.NewGuid().ToString().Replace("-", "") + imgUrl.Substring(imgUrl.LastIndexOf('.'));
            //下载图片
            new WebClient().DownloadFile(imgUrl, imgPath);
            //分析图片,得到商品的价格
            JingdongImage jdImage = new JingdongImage();
            string priceStr = jdImage.GetPicNum(imgPath);
            //定义一个匹配double的正则,从价格字符串中得到价格
            //更简单的方法就是priceStr = priceStr.SubString(1);//去掉第一个¥字符
            Regex doublePattern = new Regex(@"\d+(\.\d+)?", RegexOptions.Compiled);
            string dbPrice = doublePattern.Match(priceStr).Value;
            double price = 0;
            if(double.TryParse(dbPrice,out price))
                return price;
            return 0;
        }
        private static double GetPriceByCookie(string id)
        {
            //京东的商品的Id是独一无二的,也就是说,每个Id标示了一件商品的内容页
            //得到京东商品的加入购物车的链接,用程序来模仿人来将商品加入到购物车里面去
            //加入购物车的链接
            string goUrl = "http://jd2008.360buy.com/purchase/InitCart.aspx?pid=" + id + "&pcount=1&ptype=1";
            //定义一个CookieWebClient类来达到记录Cookie的效果
            CookieWebClient client = new CookieWebClient();
            //去下载一个页面来记录Cookie
            client.DownloadData(goUrl);
            //得到结算页面的页面的Url
            string shopUrl = "http://jd2008.360buy.com/purchase/shoppingcart_pop.aspx";
            //下载得到结算页面的数据
            byte[] bytes = client.DownloadData(shopUrl);
            string content = HttpUtility.HtmlDecode(encoding.GetString(bytes));
            //利用HtmlAgilityPack来加载分析Html页面内容
            HtmlDocument htmlDoc = new HtmlDocument();
            htmlDoc.LoadHtml(content);
            //得到商品价格的节点
            var priceNode = htmlDoc.DocumentNode.SelectSingleNode(@"//span[@class='price'][@id='cartBottom_price']");
            if (priceNode == null)//节点查找失败
                return 0;
            //得到商品的价格string,去掉第一个字符¥
            string priceStr = priceNode.InnerText.Substring(1);
            double price = 0;
            if (double.TryParse(priceStr, out price))
                return price;
            return 0;
        }
        private static double GetPriceByMobile(string id)
        {
            //京东的商品的Id是独一无二的,也就是说,每个Id标示了一件商品的内容页
            //得到京东商品的下载页面
            string downUrl = "http://m.360buy.com/cart/add.action?wareId=" + id;
            //下载京东的商品内容页面,主要是为了下载图片
            byte[] bytes = new WebClient().DownloadData(downUrl);
            //注意手机商城里面的Encoding是UTF8的
            //通过编码得到页面的内容string,用HttpUtility.HtmlDecode解码
            string content = HttpUtility.HtmlDecode(Encoding.UTF8.GetString(bytes));
            //利用HtmlAgilityPack来加载分析Html页面内容
            HtmlDocument htmlDoc = new HtmlDocument();
            htmlDoc.LoadHtml(content);
            //得到价格节点
            var priceNode = htmlDoc.DocumentNode.SelectSingleNode(@"//div[@class='p-price']/strong[@class='flk31'][3]");
            if (priceNode == null)//节点查找失败
                return 0;
            //得到商品的价格string,去掉第一个字符¥
            string priceStr = priceNode.InnerText;
            double price = 0;
            if (double.TryParse(priceStr, out price))
                return price;
            return 0;
        }
    }
    class CookieWebClient : WebClient
    {
        private CookieContainer m_container = new CookieContainer();
        protected override WebRequest GetWebRequest(Uri address)
        {
            WebRequest request = base.GetWebRequest(address);
            if (request is HttpWebRequest)
            {
                (request as HttpWebRequest).CookieContainer = m_container;
            }
            return request;
        }
    }
    class MyImage
    {
        //需要进行分析的图片
        private Bitmap bmpobj;
        //需要继承并且重写的数字比对串,在子类里面需要用到这个去比对数字
        public Dictionary<string, char> numDic = new Dictionary<string, char>();
        //得到图片中某一点的灰度数值
        private int GetGrayNumColor(System.Drawing.Color posClr)
        {
            return (posClr.R * 19595 + posClr.G * 38469 + posClr.B * 7472) >> 16;
        }
        //进行灰度处理
        private void GrayByPixels()
        {
            for (int i = 0; i < bmpobj.Height; i++)
            {
                for (int j = 0; j < bmpobj.Width; j++)
                {
                    int tmpValue = GetGrayNumColor(bmpobj.GetPixel(j, i));
                    bmpobj.SetPixel(j, i, Color.FromArgb(tmpValue, tmpValue, tmpValue));
                }
            }
        }
        //得到一个图片的01代码序列
        private string GetSingleBmpCode(Bitmap singlepic, int dgGrayValue)
        {
            Color piexl;
            string code = "";
            for (int posy = 0; posy < singlepic.Height; posy++)
                for (int posx = 0; posx < singlepic.Width; posx++)
                {
                    piexl = singlepic.GetPixel(posx, posy);
                    if (piexl.R < dgGrayValue)    // Color.Black )
                        code = code + "1";
                    else
                        code = code + "0";
                }
            return code;
        }
        //从一个图片里面得到几个分开的数字小图片
        private Bitmap[] GetPicValidByValue(int dgGrayValue)
        {
            List<Bitmap> PicList = new List<Bitmap>();
            Rectangle cloneRect;
            int posx1 = bmpobj.Width, posy1 = bmpobj.Height, posx2 = 0, posy2 = 0;
            bool cut = false; int last = -1, lastx = 0;
            for (int j = 0; j < bmpobj.Width; j++)      //找有效区
            {
                cut = false;
                for (int i = 0; i < bmpobj.Height; i++)
                {
                    int pixelValue = bmpobj.GetPixel(j, i).R;
                    if (pixelValue < dgGrayValue)     //根据灰度值
                    {
                        if (posx1 > j) posx1 = j;
                        if (posy1 > i) posy1 = i;
                        if (posx2 < j) posx2 = j;
                        if (posy2 < i) posy2 = i;
                        cut = true;
                    }
                };
                if (cut)
                    continue;
                if (last + 1 == j)
                {
                    last++;
                    continue;
                }
                cloneRect = new Rectangle(posx1, posy1, posx2 - posx1 + 1, posy2 - posy1 + 1);
                lastx = j; last = j; posx1 = bmpobj.Width; posy1 = bmpobj.Height; posx2 = 0; posy2 = 0;
                PicList.Add(bmpobj.Clone(cloneRect, bmpobj.PixelFormat));//复制小块图
            };
            return PicList.ToArray();
        }
        //得到一个图片的数字串
        private string GetPicNumber()
        {
            GrayByPixels(); //灰度处理
            Bitmap[] pics = GetPicValidByValue(128); //得到有效值
            StringBuilder sb = new StringBuilder();
            char c;
            for (int i = 0; i < pics.Length; ++i)
            {
                string code = GetSingleBmpCode(pics[i], 128);   //得到代码串
                if (numDic.TryGetValue(code, out c))
                {
                    sb.Append(c);
                }
            }
            return sb.ToString();
        }
        //外部调用,得到一个图片的数字串
        public string GetPicNum(Bitmap pic)
        {
            bmpobj = new Bitmap(pic);    //转换为Format32bppRgb
            return GetPicNumber();
        }
        //外部调用,得到一个图片的数字串(重载)
        public string GetPicNum(string fileName)
        {
            bmpobj = new Bitmap(fileName);
            return GetPicNumber();
        }
        //输出在一幅图里面找到的数字,测试或者找到数字比对串时用
        private void TestNumber()
        {
            GrayByPixels(); //灰度处理
            Bitmap[] pics = GetPicValidByValue(128); //得到有效值
            for (int i = 0; i < pics.Length; ++i)
            {
                string code = GetSingleBmpCode(pics[i], 128);   //得到代码串
                Console.WriteLine(i);
                Console.WriteLine(code);
            }
        }
        //外部调用,输出在一幅图里面找到的数字,测试或者找到数字比对串时用
        public void TestNum(Bitmap pic)
        {
            bmpobj = new Bitmap(pic);    //转换为Format32bppRgb
            TestNumber();
        }
        //外部调用,输出在一幅图里面找到的数字,测试或者找到数字比对串时用(重载)
        public void TestNum(string fileName)
        {
            bmpobj = new Bitmap(fileName);
            TestNumber();
        }
    }
    class JingdongImage : MyImage
    {
        public JingdongImage()
        {
            numDic.Add("111100001111011000000110001100001100000110011000000011110000000011110000000001100000000001100000001111111100000001100000000001100000000001100000000001100000000011110000", '¥');
            numDic.Add("001111100011000110110000011110000011110000011110000011110000011110000011110000011011000110001111100", '0');
            numDic.Add("001100111100001100001100001100001100001100001100001100001100111111", '1');
            numDic.Add("011111100110000110110000011000000011000000110000001100000011000000110000001100000011000000111111111", '2');
            numDic.Add("011111100110000110110000011000000011000000110000111100000000110000000011110000011110000110011111100", '3');
            numDic.Add("00000011000000011100000010110000010011000010001100010000110010000011001111111111000000110000000011000000001100", '4');
            numDic.Add("011111111011000000011000000011000000011111100000000110000000011000000011110000011110000110011111100", '5');
            numDic.Add("000111110001100000011000000110000000110111100111000110110000011110000011110000011011000110001111100", '6');
            numDic.Add("111111111000000011000000110000000110000001100000001100000011000000011000000110000000110000001100000", '7');
            numDic.Add("001111100011000110110000011110000011011000110001111100011000110110000011110000011011000110001111100", '8');
            numDic.Add("001111100011000110110000011110000011110000011011000111001111011000000011000000110000001100011111000", '9');
            numDic.Add("111111", '.');
        }
    }
}

转载于:https://www.cnblogs.com/mrray/archive/2011/12/30/2307331.html

你可能感兴趣的文章
第14章 结构和其他数据形式 14.6 指向结构的指针
查看>>
FLUSH PRIVILEGES
查看>>
CI3如何使用controller继承
查看>>
Moodle介绍篇
查看>>
nagios部署
查看>>
CentOS 内核升级步骤和方法
查看>>
俄罗斯方块,算法简单 注释详细,大家运行玩玩
查看>>
centos6.5 iptables 配置
查看>>
tuxera NTFS 2016支持macOS 10.12 Sierra
查看>>
ftpclient 上传下载文件
查看>>
JQuery 获取touchstart,touchmove,touchend 坐标
查看>>
exp/imp远程操作数据库
查看>>
安全狗云备份 数据库、网站备份好帮手
查看>>
《Linux 内核分析》课程作业(1)——计算机基本原理和汇编基础
查看>>
ORACLE 近期执行的sql
查看>>
WAS集群安装<六>错误解答及概念术语
查看>>
编写GO的WEB开发框架 (九): Dao和Service
查看>>
【js】call,apply的区别
查看>>
node.js 基于socket.io的聊天室
查看>>
使用jQuery和ajax代替iframe
查看>>