我们这些站长们都想自己的网站在
排名在后,收录在前,收录在后,抓取在前。要想被收录,就得先被抓取,抓取网页是前提,百度站长平台(ziyuan.baidu.com)提取了链接提交通道,不过不幸,手动提交,每次仅限20条url,对于一个稍大点的网站,每天产生的页面比较多,就比较麻烦了。虽然百度也提供了js自动提交的方式,但前提是该页面必须被访问才能触发提交行为,莫不是,每发布一篇,就得浏览一番,费劲啊。
有没有高效的办法呢?有的,百度也意识到上述问题,于是推出了api实时主动推送的服务,这个功能确实强大,可以将新发布的、未收录的网址链接收集起来,统统发给百度,通知蜘蛛尽快抓取,不过需要有编程能力才能实现。
作为一名会写些程序、又干过seo的我来说,并不是很难的问题,百度提供了post、curl、
package com.yangshengliang.baidutuisong;
import com.alibaba.fastjson.JSONObject;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.Vector;
public class PostUrls {
private static String zzApiUrl = 接口调用地址; //从百度站长平台获取
//返回状态码
private final static int SUCESS_200 = 200;
private final static int ERROR_400 = 400;
private final static int ERROR_401 = 401;
private final static int ERROR_404 = 404;
private final static int ERROR_500 = 500;
public static String postUrl(Vector urls) throws IOException {
URLConnection urlConnection;
StringBuffer result = new StringBuffer();
PrintWriter postPrintWriter = null;
BufferedReader bufferedReader = null;
int statusCode;
try {
urlConnection = new URL(zzApiUrl).openConnection();
urlConnection.setRequestProperty("Content-Type", "text/plain");
urlConnection.setRequestProperty("User-Agent", "curl/7.12.1");
urlConnection.setRequestProperty("Host", "data.zz.baidu.com");
urlConnection.setDoInput(true);
urlConnection.setDoOutput(true);
HttpURLConnection httpUrlConnection = (HttpURLConnection) urlConnection;
postPrintWriter = new PrintWriter(httpUrlConnection.getOutputStream());
//构造请求参数
StringBuffer parm = new StringBuffer();
for (String s : urls) {
if (s != null) {
//去除两头空格
String tmpUrl = s.trim();
if (tmpUrl.contains("http://") || tmpUrl.contains("https://")) {
parm.append(tmpUrl);
parm.append(" ");
}
}
}
//发送参数
postPrintWriter.print(parm);
//刷新输出流缓冲
postPrintWriter.flush();
statusCode = httpUrlConnection.getResponseCode();
switch (statusCode) {
case SUCESS_200:
if (statusCode == SUCESS_200) {
//通过BufferedReader输入流来读取Url的响应
bufferedReader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream()));
String line;
while ((line = bufferedReader.readLine()) != null) {
result.append(line);
}
}
String resultT = result.toString();
if (result != null) {
JSONObject jsonObject = JSONObject.parseObject(resultT);
Object t = jsonObject.get("not_same_site");
Object successInt = jsonObject.get("success");
if(successInt.equals(0)){
result.delete(0, result.length());
result.append("提交失败,");
}else if (successInt.equals(1)){
result.delete(0, result.length());
result.append("提交成功");
}
if (t != null) {
result.append("接口调用地址与提交的网址不匹配");
}
}
break;
case ERROR_400:
result.append("站点未在站长平台验证");
break;
case ERROR_401:
result.append("接口调用地址 错误");
break;
case ERROR_404:
result.append("接口地址填写错误");
break;
case ERROR_500:
result.append("服务器偶然异常,通常重试就会成功");
break;
default:
result.append("未知错误");
break;
}
} catch (Exception e) {
e.printStackTrace();
} finally {
//关闭流
if (postPrintWriter != null) {
postPrintWriter.close();
}
if (bufferedReader != null) {
bufferedReader.close();
}
}
return result.toString();
}
}