1. 程式人生 > >java實現爬蟲功能

java實現爬蟲功能

ack 訪問 base aid for tail tor obj 執行

/**
* 爬取新聞信息,封裝成實體bean
*/
public class GetNews {
public List<News> getNews() {
// 存儲新聞對象
List<News> list = new ArrayList<News>();
try {
// 請求DOM文檔
Document document = Jsoup.connect("http://baijia.baidu.com/").get();
// 解析
String selector = "h3>a";
Elements titlels = document.select(selector);

for (Element title : titlels) {
// System.out.println("標題---" + title.text());
// 再次請求a標簽,獲取內容
String url = title.absUrl("href");
Document document1 = Jsoup.connect(url).get();
String selectTime = document1.select("span[class=time]").text();
// System.out.println("時間---" + selectTime);
String selectBody = document1.select(
"div[class=article-detail]").text();
// System.out.println("正文---" + selectBody);
// 構成news對象加入list集合
News news = new News();
news.setTitle(title.text());
news.setBody(selectBody);
news.setDate(selectTime);
list.add(news);
}

} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

return list;
}

}

/*
* 把獲得的news對象存入數據庫
*/
public int save(List<News> list) {

// sql前綴
String sql = "insert into news (title,body,date) values";
/*
* 這種方式插入數據庫 速度最快
*/
for (News news : list) {
sql = sql + "(‘" + news.getTitle() + "‘,‘" + news.getBody() + "‘,‘"
+ news.getDate() + "‘),";
}
sql = sql.substring(0, sql.length() - 1);
System.out.println(sql);
int rows = BaseDao.executeUpdate(sql);
return rows;
}

/**
* 連接數據庫 通用的 工具類
*
*/
public class BaseDao {
// 創建需要得到JDBC API
protected static Connection connection = null;
protected static PreparedStatement ps = null;
protected static ResultSet rs = null;

// 01.獲取數據庫連接
public static boolean getConnection() {
/**
* 獲取數據庫連接的4要素 連接數據庫的前提
*/
String driver = ConfigManager.getInstance().getValue("jdbc.driver");
String url = ConfigManager.getInstance().getValue("jdbc.url");
String userName = ConfigManager.getInstance().getValue("jdbc.userName");
String password = ConfigManager.getInstance().getValue("jdbc.password");

try {
Class.forName(driver); // 加載驅動
connection = DriverManager.getConnection(url, userName, password);
} catch (ClassNotFoundException e) {
e.printStackTrace();
return false;
} catch (SQLException e) {
e.printStackTrace();
return false;
}
return true;
}

/**
* 03.增刪改 executeUpdate() 返回int 代表影響數據庫中的行數 delete from user; delete from
* user where id=? and name=?;
*/
public static int executeUpdate(String sql, Object... params) {
int rowNum = 0;
if (getConnection()) { // 操作數據庫 肯定現有連接
try {
ps = connection.prepareStatement(sql);
// 循環給sql語句中的?占位符 賦值
for (int i = 0; i < params.length; i++) {
ps.setObject(i + 1, params[i]);
}
// 執行sql語句
rowNum = ps.executeUpdate();
} catch (SQLException e) {
e.printStackTrace();
} finally {
closeConnection(); // 關閉連接
}

}

return rowNum;
}

/**
* 04.查詢 executeQuery() 返回ResultSet select * from user; select * from user
* where id=? and name=?;
*/
public static ResultSet executeQuery(String sql, Object... params) {
if (getConnection()) { // 操作數據庫 肯定現有連接
try {
ps = connection.prepareStatement(sql);
// 循環給sql語句中的?占位符 賦值
for (int i = 0; i < params.length; i++) {
ps.setObject(i + 1, params[i]);
}
// 執行sql語句
rs = ps.executeQuery();
} catch (SQLException e) {
e.printStackTrace();
}
}
return rs;
}

// 02.釋放資源
public static boolean closeConnection() {
// 如果對象都沒有創建 ? 能關閉嗎? 必須進行非空判斷
if (rs != null) {
try {
rs.close();
} catch (SQLException e) {
e.printStackTrace();
return false;
}
}
if (ps != null) {
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
return false;
}
}
if (connection != null) {
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace();
return false;
}
}
return true;
}

}

/*
* 輸入關鍵字 查詢 模糊查詢
*/
public List<News> selectNews(String name) {
List<News> list = new ArrayList<News>();
String sql = "select * from news where title like ?";
Object[] params = { "%" + name + "%" };
ResultSet rs = BaseDao.executeQuery(sql, params);
try {
// 遍歷結果集
while (rs.next()) {
// 創建新聞對象
News news = new News();
// 獲取每一行的每一列
news.setId(rs.getInt("id"));
news.setTitle(rs.getString("title"));
news.setBody(rs.getString("body"));
news.setDate(rs.getString("date"));
list.add(news);
}
} catch (Exception e) {
// TODO: handle exception
}
return list;
}

/*
* 單例 讀取配置文件的工具類
* */

public class ConfigManager {

// 01.創建自身的靜態對象
private static ConfigManager manager = new ConfigManager();
private static Properties properties;

// 02.私有化構造
private ConfigManager() {
// 獲取配置文件的路徑
String path = "jdbc.properties";
properties = new Properties();
// 創建輸入流
InputStream stream = ConfigManager.class.getClassLoader()
.getResourceAsStream(path);
try {
properties.load(stream);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
stream.close();
} catch (IOException e) {
e.printStackTrace();
}
}

}

// 03.提供供外部訪問的接口
public static synchronized ConfigManager getInstance() {
return manager;
}

// 提供一個 根據key取得value的方法
public static String getValue(String key) {
return properties.getProperty(key);
}

}

/*

*properties文件

*/

jdbc.url=jdbc\:mysql\://localhost\:3306/test
jdbc.userName=hhr
jdbc.password=hhr
jdbc.driver=com.mysql.jdbc.Driver

java實現爬蟲功能