【www.gdgbn.com--Google】

java 爬取天气预报代码
//利用http请求爬取google天气预报的代码,并将请求过的城市天气预报按天缓存一下

 public nodelist getweatherdiv(string htmlurl) {
        nodelist res = null;
        try{
            parser parser = new parser(htmlurl);
            parser.setencoding("gbk");

            nodefilter divfilter = new nodeclassfilter(div.class);

            orfilter lastfilter = new orfilter();
            lastfilter
                    .setpredicates(new nodefilter[] { divfilter });

            nodelist nodelist = parser.parse(lastfilter);
            node[] nodes = nodelist.tonodearray();

            for (int i = 0; i < nodes.length; i++) {
                node anode = (node) nodes[i];
                if(anode instanceof div){
                    div mydiv = (div)anode;
                    string classname = mydiv.getattribute("class");
                    if(classname!=null && classname.equals("e")){
                        res = mydiv.getchildren();
                    }
                }
            }
        } catch (parserexception e) {
            e.printstacktrace();
        }
        return res;
    }   
   
    public static void cleancache() {
        if(isstart) return;
        isstart = true;
        timertask task = new timertask() {
            public void run() {       
                iterator it = hmcache.entryset().iterator();
                while (it.hasnext()) {
                    map.entry entry = (map.entry) it.next();
                    object key = entry.getkey();
                    string today = datetimeutil.format(new date(),"yyyymmdd");
                    if(key.tostring().indexof(today)>=0){
                        it.remove();
                        hmcache.remove(key);
                    }         
                }              
            }
        };
        timer timer = new timer();
        timer.schedule(task, calendar.getinstance ().gettime(), 24*3600 * 1000);

     }    
   
   
    private void addweatherday(网页特效onobject 网页特效on,int flag,string htmlcontent){
        string tt = (flag==0?"t":("t"+flag));
        try{

                node anode = null;
                parser parser = parser.createparser(htmlcontent, "gbk");
                nodefilter textfilter = new nodeclassfilter(textnode.class);
                nodefilter imgfilter = new nodeclassfilter(imagetag.class);
               
                orfilter lastfilter = new orfilter();
                lastfilter.setpredicates(new nodefilter[] { textfilter,imgfilter });
                //string t = "",t_res = "",t_tp="";
                nodelist nodelist = parser.parse(lastfilter);
                node[] nodes = nodelist.tonodearray();
                for (int i = 0; i < nodes.length; i++) {
                    anode = (node) nodes[i];
                    if(anode instanceof imagetag){
                        imagetag img = (imagetag)anode;
                        if(img!=null){
                            json.put(tt+"_res", img.getattribute("title"));
                            json.put(tt+"_result", img.getattribute("title"));
                            json.put(tt+"_tp", ("http://www.google.cn"+img.getimageurl()));
                        }
                    }else if(anode instanceof textnode){
                        textnode text = (textnode)anode;
                        string t = text.gettext();
                        if(t.indexof("°c")>0){
                            json.put(tt, t);
                        }
                    }
                }

        }catch(exception ex){
            ex.printstacktrace();
        }
    }
   
    private void getdivtext(jsonobject json, string htmlcontent) {
        string line = "";
        node anode = null;
        div divnode = null;
        try {
            parser parser = parser.createparser(htmlcontent, "gbk");
            nodefilter divfilter = new nodeclassfilter(div.class);
            orfilter lastfilter = new orfilter();
            lastfilter.setpredicates(new nodefilter[] { divfilter });

            nodelist nodelist = parser.parse(lastfilter);
            int idx = 0;
            node[] nodes = nodelist.tonodearray();
            for (int i = 0; i < nodes.length; i++) {
                anode = (node) nodes[i];
                line = "";
                if (anode instanceof div) {
                    divnode = (div) anode;
                    string classname = strcharutil.formatnullstr(divnode.getattribute("class"));
                    string align = strcharutil.formatnullstr(divnode.getattribute("align"));
                    if(align.equals("")) continue;
                    if(classname.equals("") && align.equals("center")){
                        line = divnode.getchildrenhtml();
                        addweatherday(json,idx,line);
                        idx ++;
                    }
                   
                }
                if (strcharutil.formatnullstr(line).equals(""))
                    continue;
            }
        } catch (parserexception pe) {
            pe.printstacktrace();
        }
    }   
   
    public jsonobject getweather(string city){
        string today = datetimeutil.format(new date(),"yyyymmdd");
        if(hmcache.get(city+today)!=null){
            return hmcache.get(city+today);
        }       
        jsonobject hm =new jsonobject();
        hm.put("zhishu","");
       
      
        try{
            city = getcityname(city);
            final string googleweatherurl = "http://www.google.cn/search?hl=zh-cn&newwindow=1&q=tq+"+urlencoder.encode(city,"utf-8")+"&aq=f&oq=";
           
            nodelist nodelistdiv = getweatherdiv(googleweatherurl);
            int idx = 0;
            if(nodelistdiv!=null){
                getdivtext(hm,nodelistdiv.tohtml());
            }
           
           
        }catch(exception ex){
            ex.printstacktrace();
        }
       
       
        hmcache.put(city+today, hm);
        return hm;
    }

本文来源:http://www.gdgbn.com/seo/25778/