emmmm...好像没有什么要备注的就是自己练手怕信息吧?
package com.htjf.main; import java.io.File; import java.io.FileWriter; import java.io.IOException; import org.jsoup.Jsoup; import org.jsoup.helper.StringUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class HelloWordJSoup { public static void main(String[] args) { for(int i=1;i<=100;i++){ try { String url = "http://www.ybzhan.cn/Company/a_t0/list_p"+i+".html"; System.out.println(url); Document doc = Jsoup.connect(url).get(); Elements companyLists = doc.select(".companyList"); StringBuffer stringBuffer=new StringBuffer(); for (Element companyList : companyLists) { //公司名 Element companyNameDiv = companyList.select("div.companyName").first(); Element link = companyNameDiv.select("a").first(); String shopUrl = "http://www.ybzhan.cn"+link.attr("href"); String companyName = link.text(); //主营产品 Element ps = companyList.select("dt > p").first(); String mainProducts = ps.text().replace("主营产品", ""); //介绍页面 String personalityUrl = ""; String shopUrl2 = ""; String companyUrl = ""; System.out.println(shopUrl); if(!StringUtil.isBlank(shopUrl)&&!shopUrl.contains("Company/Detail")){ Document contactusDoc= Jsoup.connect( shopUrl+"/contactus.html").get(); Elements ss = contactusDoc.getElementsByTag("p"); for (Element element : ss) { if(element.text().contains("个 性 化")){ personalityUrl = element.text(); }else if(element.text().contains("商铺网址")){ shopUrl2 = element.text(); }else if(element.text().contains("公司网站")){ companyUrl = element.text(); } } if(StringUtil.isBlank(personalityUrl)||StringUtil.isBlank(shopUrl2)||StringUtil.isBlank(companyUrl)){ Elements dl = contactusDoc.getElementsByTag("dl"); for (Element element : dl) { if(element.text().contains("个 性 化")){ if(StringUtil.isBlank(personalityUrl)){ personalityUrl = element.text(); } }else if(element.text().contains("商铺网址")){ if(StringUtil.isBlank(shopUrl2)){ shopUrl2 = element.text(); } }else if(element.text().contains("公司网站")){ if(StringUtil.isBlank(companyUrl)){ companyUrl = element.text(); } } } } } //写入 stringBuffer.append(companyName+";"); stringBuffer.append(mainProducts+";"); if(StringUtil.isBlank(shopUrl2.trim())){ stringBuffer.append(shopUrl+";"); }else{ stringBuffer.append(shopUrl2.replace("商铺网址:", "").trim()+";"); } if(StringUtil.isBlank(companyUrl.trim())){ stringBuffer.append(shopUrl+";"); }else{ stringBuffer.append(companyUrl.replace("公司网站:", "").trim()+";"); } if(StringUtil.isBlank(personalityUrl.trim())){ stringBuffer.append(shopUrl); }else{ stringBuffer.append(personalityUrl.replace("个 性 化:", "").trim()); } stringBuffer.append(System.lineSeparator());//换行 } new HelloWordJSoup().writerData(stringBuffer); stringBuffer.setLength(0); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } public void writerData(StringBuffer stringBuffer){ FileWriter out = null; String fileName = "G:"+File.separator+"pushFile_test"+File.separator+"data.csv"; File writeFile = new File(fileName); //文件路径名 if(!writeFile.exists()&&!writeFile.isFile()){// 如果文件不存在,创建文件 try { writeFile.createNewFile(); } catch (IOException e) { // TODO Auto-generated catch block } } try { out = new FileWriter(writeFile,true); if(stringBuffer.length()>0){ out.write(stringBuffer.toString()); } out.flush(); out.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } stringBuffer.setLength(0); } }
结束...