本文最后更新于 2325 天前,其中的信息可能已经有所发展或是发生改变。
emmmm…好像没有什么要备注的就是自己练手怕信息吧?
package com.htjf.main;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class HelloWordJSoup {
public static void main(String[] args) {
for(int i=1;i<=100;i++){
try {
String url = "http://www.ybzhan.cn/Company/a_t0/list_p"+i+".html";
System.out.println(url);
Document doc = Jsoup.connect(url).get();
Elements companyLists = doc.select(".companyList");
StringBuffer stringBuffer=new StringBuffer();
for (Element companyList : companyLists) {
//公司名
Element companyNameDiv = companyList.select("div.companyName").first();
Element link = companyNameDiv.select("a").first();
String shopUrl = "http://www.ybzhan.cn"+link.attr("href");
String companyName = link.text();
//主营产品
Element ps = companyList.select("dt > p").first();
String mainProducts = ps.text().replace("主营产品", "");
//介绍页面
String personalityUrl = "";
String shopUrl2 = "";
String companyUrl = "";
System.out.println(shopUrl);
if(!StringUtil.isBlank(shopUrl)&&!shopUrl.contains("Company/Detail")){
Document contactusDoc= Jsoup.connect( shopUrl+"/contactus.html").get();
Elements ss = contactusDoc.getElementsByTag("p");
for (Element element : ss) {
if(element.text().contains("个 性 化")){
personalityUrl = element.text();
}else if(element.text().contains("商铺网址")){
shopUrl2 = element.text();
}else if(element.text().contains("公司网站")){
companyUrl = element.text();
}
}
if(StringUtil.isBlank(personalityUrl)||StringUtil.isBlank(shopUrl2)||StringUtil.isBlank(companyUrl)){
Elements dl = contactusDoc.getElementsByTag("dl");
for (Element element : dl) {
if(element.text().contains("个 性 化")){
if(StringUtil.isBlank(personalityUrl)){
personalityUrl = element.text();
}
}else if(element.text().contains("商铺网址")){
if(StringUtil.isBlank(shopUrl2)){
shopUrl2 = element.text();
}
}else if(element.text().contains("公司网站")){
if(StringUtil.isBlank(companyUrl)){
companyUrl = element.text();
}
}
}
}
}
//写入
stringBuffer.append(companyName+";");
stringBuffer.append(mainProducts+";");
if(StringUtil.isBlank(shopUrl2.trim())){
stringBuffer.append(shopUrl+";");
}else{
stringBuffer.append(shopUrl2.replace("商铺网址:", "").trim()+";");
}
if(StringUtil.isBlank(companyUrl.trim())){
stringBuffer.append(shopUrl+";");
}else{
stringBuffer.append(companyUrl.replace("公司网站:", "").trim()+";");
}
if(StringUtil.isBlank(personalityUrl.trim())){
stringBuffer.append(shopUrl);
}else{
stringBuffer.append(personalityUrl.replace("个 性 化:", "").trim());
}
stringBuffer.append(System.lineSeparator());//换行
}
new HelloWordJSoup().writerData(stringBuffer);
stringBuffer.setLength(0);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void writerData(StringBuffer stringBuffer){
FileWriter out = null;
String fileName = "G:"+File.separator+"pushFile_test"+File.separator+"data.csv";
File writeFile = new File(fileName); //文件路径名
if(!writeFile.exists()&&!writeFile.isFile()){// 如果文件不存在,创建文件
try {
writeFile.createNewFile();
} catch (IOException e) {
// TODO Auto-generated catch block
}
}
try {
out = new FileWriter(writeFile,true);
if(stringBuffer.length()>0){
out.write(stringBuffer.toString());
}
out.flush();
out.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
stringBuffer.setLength(0);
}
}
结束…
