【www.gdgbn.com--Google】
java 爬取天气预报代码
//利用http请求爬取google天气预报的代码,并将请求过的城市天气预报按天缓存一下
public nodelist getweatherdiv(string htmlurl) {
nodelist res = null;
try{
parser parser = new parser(htmlurl);
parser.setencoding("gbk");nodefilter divfilter = new nodeclassfilter(div.class);
orfilter lastfilter = new orfilter();
lastfilter
.setpredicates(new nodefilter[] { divfilter });nodelist nodelist = parser.parse(lastfilter);
node[] nodes = nodelist.tonodearray();for (int i = 0; i < nodes.length; i++) {
node anode = (node) nodes[i];
if(anode instanceof div){
div mydiv = (div)anode;
string classname = mydiv.getattribute("class");
if(classname!=null && classname.equals("e")){
res = mydiv.getchildren();
}
}
}
} catch (parserexception e) {
e.printstacktrace();
}
return res;
}
public static void cleancache() {
if(isstart) return;
isstart = true;
timertask task = new timertask() {
public void run() {
iterator it = hmcache.entryset().iterator();
while (it.hasnext()) {
map.entry entry = (map.entry) it.next();
object key = entry.getkey();
string today = datetimeutil.format(new date(),"yyyymmdd");
if(key.tostring().indexof(today)>=0){
it.remove();
hmcache.remove(key);
}
}
}
};
timer timer = new timer();
timer.schedule(task, calendar.getinstance ().gettime(), 24*3600 * 1000);}
private void addweatherday(网页特效onobject 网页特效on,int flag,string htmlcontent){
string tt = (flag==0?"t":("t"+flag));
try{node anode = null;
parser parser = parser.createparser(htmlcontent, "gbk");
nodefilter textfilter = new nodeclassfilter(textnode.class);
nodefilter imgfilter = new nodeclassfilter(imagetag.class);
orfilter lastfilter = new orfilter();
lastfilter.setpredicates(new nodefilter[] { textfilter,imgfilter });
//string t = "",t_res = "",t_tp="";
nodelist nodelist = parser.parse(lastfilter);
node[] nodes = nodelist.tonodearray();
for (int i = 0; i < nodes.length; i++) {
anode = (node) nodes[i];
if(anode instanceof imagetag){
imagetag img = (imagetag)anode;
if(img!=null){
json.put(tt+"_res", img.getattribute("title"));
json.put(tt+"_result", img.getattribute("title"));
json.put(tt+"_tp", ("http://www.google.cn"+img.getimageurl()));
}
}else if(anode instanceof textnode){
textnode text = (textnode)anode;
string t = text.gettext();
if(t.indexof("°c")>0){
json.put(tt, t);
}
}
}}catch(exception ex){
ex.printstacktrace();
}
}
private void getdivtext(jsonobject json, string htmlcontent) {
string line = "";
node anode = null;
div divnode = null;
try {
parser parser = parser.createparser(htmlcontent, "gbk");
nodefilter divfilter = new nodeclassfilter(div.class);
orfilter lastfilter = new orfilter();
lastfilter.setpredicates(new nodefilter[] { divfilter });nodelist nodelist = parser.parse(lastfilter);
int idx = 0;
node[] nodes = nodelist.tonodearray();
for (int i = 0; i < nodes.length; i++) {
anode = (node) nodes[i];
line = "";
if (anode instanceof div) {
divnode = (div) anode;
string classname = strcharutil.formatnullstr(divnode.getattribute("class"));
string align = strcharutil.formatnullstr(divnode.getattribute("align"));
if(align.equals("")) continue;
if(classname.equals("") && align.equals("center")){
line = divnode.getchildrenhtml();
addweatherday(json,idx,line);
idx ++;
}
}
if (strcharutil.formatnullstr(line).equals(""))
continue;
}
} catch (parserexception pe) {
pe.printstacktrace();
}
}
public jsonobject getweather(string city){
string today = datetimeutil.format(new date(),"yyyymmdd");
if(hmcache.get(city+today)!=null){
return hmcache.get(city+today);
}
jsonobject hm =new jsonobject();
hm.put("zhishu","");
try{
city = getcityname(city);
final string googleweatherurl = "http://www.google.cn/search?hl=zh-cn&newwindow=1&q=tq+"+urlencoder.encode(city,"utf-8")+"&aq=f&oq=";
nodelist nodelistdiv = getweatherdiv(googleweatherurl);
int idx = 0;
if(nodelistdiv!=null){
getdivtext(hm,nodelistdiv.tohtml());
}
}catch(exception ex){
ex.printstacktrace();
}
hmcache.put(city+today, hm);
return hm;
}