本文最后更新于 2834 天前,其中的信息可能已经有所发展或是发生改变。
最近一直在做域名筛选,因为之前一直在学javaweb,但是来到这个公司,一直是让我去截取数据。
所以自己也做下笔记吧..
先上图再说需求吧

需求:
如:把www.baidu.com 或者news.baidu.com,截取分布拆成
即,先去除www和后缀com/net/cn/com.cn….
然后输入结果为:.baidu.(根域名)news.(二级)news.baidu.(二级全域名)
实现步骤…
1.先把数据,即url除去www/后缀
2.使用java代码去跑,分析判断,如果遇到4级、5级域名,就重复跑(即把含有几级的数据,跑到只剩下根域名)
下面是主要代码吧。有点乱,做的过程我自己也很懵逼…
package com.glj.quchong;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Date;
import comglj.utils.CharUtil;
import comglj.utils.JDBCUtil;
public class TestJDBC {
public static void main(String[] args) {
try{
Class.forName("com.mysql.jdbc.Driver");
Connection conn = JDBCUtil.getConnection();
Statement s = conn.createStatement();
String sql = "select * FROM sanji";
ResultSet rs = s.executeQuery(sql);
while (rs.next()) {
String id = rs.getString(1);
String tac = rs.getString(2);
String host = rs.getString(3);
String is_reg = rs.getString(4);
String host_id = rs.getString(5);
String pass_time = "";
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式
pass_time = df.format(new Date());
//判断第一个.不在开头and最后一个.不在域名最后面
if(host.indexOf(".")>0&&host.indexOf(".")+1<host.length()){
//一级域名
// System.out.println(host+"--"+host.substring(0,host.indexOf(".")+1));
String hostByyiji = host.substring(0,host.indexOf(".")+1);
String sqlByyiji = "insert into yiji (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "
+ "values('"+id+"','"+tac+"','"+hostByyiji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";
Statement s1 = conn.createStatement();
s1.execute(sqlByyiji);
//包含一级二级域名,再次去筛选截取
String hostByerji = host.substring(host.indexOf("."),host.length());
if(CharUtil.CharNumber(hostByerji)>2){
//二级及其以上
// System.out.println(hostByerji);
hostByerji = host.substring(host.indexOf(".")+1,host.length());
// System.out.println(host+"----"+host.substring(host.indexOf(".")+1,host.length()));
String sqlByerji = "insert into siji (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "
+ "values('"+id+"','"+tac+"','"+hostByerji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";
Statement sterji = conn.createStatement();
sterji.execute(sqlByerji);
}else{
//根域名
// System.out.println(hostByerji);
String sqlBygeng = "insert into geng (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "
+ "values('"+id+"','"+tac+"','"+hostByerji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";
Statement st1 = conn.createStatement();
st1.execute(sqlBygeng);
}
}else{
//根域名
// System.out.println(host);
String sqlBygeng = "insert into geng (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "
+ "values('"+id+"','"+tac+"','"+host+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";
Statement st1 = conn.createStatement();
st1.execute(sqlBygeng);
}
}
}catch(ClassNotFoundException | SQLException e){
e.printStackTrace();
}
}
}
下面代码是判断域名里面还有几个‘.’即可判断还有几级没有拆分
package comglj.utils;
public class CharUtil {
public static int CharNumber(String url){
//需要查找重复的字或者符号
char c = '.';
int i,index=-1,count=0;
for(i=0;i<url.length();i++)
if((url.indexOf(c,index+1))!=-1){
index=url.indexOf(c,index+1);
count++;
}
return count;
}
}
数据库类
package comglj.utils;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
public class JDBCUtil {
private static String url;
private static String username;
private static String password;
public JDBCUtil() {
super();
// TODO Auto-generated constructor stub
}
static {
try {
Class.forName("com.mysql.jdbc.Driver");
url = "jdbc:mysql://127.0.0.1:3306/work?characterEncoding=UTF-8";
username = "root";
password = "root";
} catch (Exception e) {
// TODO: handle exception
}
}
public static Connection getConnection() throws SQLException{
Connection con = DriverManager.getConnection(url,username,password);
return con;
}
public static void main(String[] args) {
try {
Connection conn = JDBCUtil.getConnection();
String sql = "select * from k2";
PreparedStatement ps = conn.prepareStatement(sql);
ResultSet rs = ps.executeQuery(sql);
while(rs.next()){
String name = rs.getString("NAME");
System.out.println(name);
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
最后在用sql语句 统计拆分出现的次数
SELECT MFR_NAM_ID as ID,TAC,RULE as 域名,count(1) as 次数 from x5
GROUP BY MFR_NAM_ID,TAC,RULE ORDER BY count(1) DESC
最近一直和博客友情,加个链接可否?
已经添加
链接已经添加好 谢谢