关于java截取多重域名的笔记…

/ 3评 / 0

最近一直在做域名筛选,因为之前一直在学javaweb,但是来到这个公司,一直是让我去截取数据。

所以自己也做下笔记吧..

先上图再说需求吧

需求:

如:把www.baidu.com 或者news.baidu.com,截取分布拆成

即,先去除www和后缀com/net/cn/com.cn....

然后输入结果为:.baidu.(根域名)news.(二级)news.baidu.(二级全域名)

实现步骤...

1.先把数据,即url除去www/后缀

2.使用java代码去跑,分析判断,如果遇到4级、5级域名,就重复跑(即把含有几级的数据,跑到只剩下根域名)

下面是主要代码吧。有点乱,做的过程我自己也很懵逼...

package com.glj.quchong;

import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Date;

import comglj.utils.CharUtil;
import comglj.utils.JDBCUtil;


public class TestJDBC {
	public static void main(String[] args) {
		try{
			Class.forName("com.mysql.jdbc.Driver");
			
			Connection conn = JDBCUtil.getConnection();
			Statement s = conn.createStatement();
			
			String sql = "select * FROM sanji";
			ResultSet rs = s.executeQuery(sql);
			while (rs.next()) {
				String id = rs.getString(1);
				String tac = rs.getString(2);
				String host = rs.getString(3);
				String is_reg = rs.getString(4);
				String host_id = rs.getString(5);
				String pass_time = "";
				SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式
				pass_time = df.format(new Date());
				//判断第一个.不在开头and最后一个.不在域名最后面
				if(host.indexOf(".")>0&&host.indexOf(".")+1<host.length()){
					//一级域名
//					System.out.println(host+"--"+host.substring(0,host.indexOf(".")+1)); 
					String hostByyiji = host.substring(0,host.indexOf(".")+1);
					String sqlByyiji = "insert into yiji (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "
							+ "values('"+id+"','"+tac+"','"+hostByyiji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";
					Statement s1 = conn.createStatement();
					s1.execute(sqlByyiji);
					
					//包含一级二级域名,再次去筛选截取
					String hostByerji = host.substring(host.indexOf("."),host.length());
					if(CharUtil.CharNumber(hostByerji)>2){
						//二级及其以上
//						System.out.println(hostByerji);
						hostByerji = host.substring(host.indexOf(".")+1,host.length());
//						System.out.println(host+"----"+host.substring(host.indexOf(".")+1,host.length()));
						String sqlByerji = "insert into siji (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "
								+ "values('"+id+"','"+tac+"','"+hostByerji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";
						Statement sterji = conn.createStatement();
						sterji.execute(sqlByerji);
					}else{
						//根域名
//						System.out.println(hostByerji);
						String sqlBygeng = "insert into geng (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "
								+ "values('"+id+"','"+tac+"','"+hostByerji+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";
						Statement st1 = conn.createStatement();
						st1.execute(sqlBygeng);
					}
				}else{
					//根域名
//					System.out.println(host); 
					String sqlBygeng = "insert into geng (ID,TAC,HOST,IS_REG,HOST_ID,PASS_TIME) "
							+ "values('"+id+"','"+tac+"','"+host+"','"+is_reg+"','"+host_id+"','"+pass_time+"')";
					Statement st1 = conn.createStatement();
					st1.execute(sqlBygeng);
				}
			}
		}catch(ClassNotFoundException | SQLException e){
			e.printStackTrace();
		}
	}
}	

下面代码是判断域名里面还有几个‘.’即可判断还有几级没有拆分

package comglj.utils;

public class CharUtil {
	public static int CharNumber(String url){
		//需要查找重复的字或者符号
		char c = '.';
		int i,index=-1,count=0;
		for(i=0;i<url.length();i++)
		if((url.indexOf(c,index+1))!=-1){
			index=url.indexOf(c,index+1);
			count++;
		}
		return count;
	}
}

数据库类

package comglj.utils;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

public class JDBCUtil {
	private static String url;
	private static String username;
	private static String password;
	public JDBCUtil() {
		super();
		// TODO Auto-generated constructor stub
	}
	static {
		try {
			Class.forName("com.mysql.jdbc.Driver");
			url = "jdbc:mysql://127.0.0.1:3306/work?characterEncoding=UTF-8";
			username = "root";
			password = "root";
		} catch (Exception e) {
			// TODO: handle exception
		}
	}
	
	public static Connection getConnection() throws SQLException{
		Connection con = DriverManager.getConnection(url,username,password);
		return con;
	}
	public static void main(String[] args) {
		try {
			Connection conn = JDBCUtil.getConnection();
			String sql = "select * from k2";
			PreparedStatement ps = conn.prepareStatement(sql);
			ResultSet rs =  ps.executeQuery(sql);
			while(rs.next()){
				String name = rs.getString("NAME");
				System.out.println(name);
			}
		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
	}
}

最后在用sql语句 统计拆分出现的次数

SELECT MFR_NAM_ID as ID,TAC,RULE as 域名,count(1) as 次数 from x5
GROUP BY MFR_NAM_ID,TAC,RULE ORDER BY count(1) DESC

 

3条回应:“关于java截取多重域名的笔记…”

  1. 鸟叔の窝说道:

    最近一直和博客友情,加个链接可否?

发表评论

您的电子邮箱地址不会被公开。 必填项已用*标注