配置安装linux自动化巡检工具步骤

/ 0评 / 0

1.需要配置两个文件

config.conf:

#gp-master信息
[gp_server_info]
host=127.0.0.1
port=29001
username=gpadmin
password=111

#tomcat-nginx信息
[tomcat_nginx_server_info]
host=127.0.0.1
port=29001
username=gpadmin
password=111

#维护页面信息
[nbd_server_info]
login_url=http://ip/nbdGD/bigdata/user/login
check_all_url=http://ip/nbdGD/bigdata/monitor/gp
service_status_url=http://ip/nbdGD/bigdata/monitor/congestion
mr_service_url=http://ip/nbdGD/bigdata/monitor/mrsender
username=admin
password=111

#CDH信息
[cdh_server_info]
host=127.0.0.1
port=57182
username=admin
password=1111
cluster_name=Cluster 1

server_info.json 服务器信息

type:是区分数据库的使用类型,如gp是master节点,mr是代表采集mr服务器,KPI是代表采集KPI服务器

[
    {
        "host": "127.0.0.1",
        "username": "root",
        "password": "111!@#",
        "port": 29001,
        "type": "gp"
    },
    {
        "host": "127.0.0.1",
        "username": "root",
        "password": "111!@#",
        "port": 29001,
        "type": "sdw"
    },
    {
        "host": "127.0.0.1",
        "username": "root",
        "password": "111!@#",
        "port": 29001,
        "type": "mr"
    },
    {
        "host": "127.0.0.1",
        "username": "root",
        "password": "111!@#",
        "port": 29001,
        "type": "kpi"
    }
]

2.主代码如下:

#!/usr/bin/python3
# -*- coding: UTF-8 -*-

import configparser
import datetime
import json
import logging
import os
import platform
import random
import re
import sys
import time
import paramiko
import requests

"""
全局函数区域...
"""
banner = """\033[1;34m
////////////////////////////////////////////////////////////////////
//                          _ooOoo_                               //
//                         o8888888o                              //
//                         88" . "88                              //
//                         (| ^_^ |)                              //
//                         O\  =  /O                              //
//                      ____/`---'\____                           //
//                    .'  \\|     |//  `.                         //
//                   /  \\|||  :  |||//  \                        //
//                  /  _||||| -:- |||||-  \                       //
//                  |   | \\\  -  /// |   |                       //
//                  | \_|  ''\---/''  |   |                       //
//                  \  .-\__  `-`  ___/-. /                       //
//                ___`. .'  /--.--\  `. . ___                     //
//              ."" '<  `.___\_<|><------')
                    for check in item['healthChecks']:
                        logging.info(check['name'] + ':' + cdh_health_status(check['summary']))
                    if item['name'] == 'hbase':
                        open_file_num = get_cdh_time_series(cookies, host, port)
                        if open_file_num >= 15000:
                            logging.info('HBASE打开总文件描述符数大于等于1.5W--建议重启hbase!!!!')
                        else:
                            logging.info('HBASE打开总文件描述符数:{}'.format(open_file_num))
                    logging.info('\n')
                    input("请确认{}服务状态,按回车键继续...".format(item['name']))
                    logging.info('\n')
            link = 'http://{0}:{1}/api/v19/cm/service'.format(host, port)
            req = requests.get(link, headers=headers, timeout=100)
            response = req.text
            data_json = json.loads(response)
            if data_json is not None:
                logging.info('------>' + data_json['displayName'] + ':' + cdh_health_status(data_json['healthSummary']) + '<------')
                for check in data_json['healthChecks']:
                    logging.info(check['name'] + ':' + cdh_health_status(check['summary']))
                    logging.info('\n')
            req.close()
        except Exception as e:
            logging.error('请求登录CDH失败...请手动登录CDH检查服务.....')
    else:
        logging.error('获取CDH--cookie失败...请手动登录CDH检查服务.....')


def main():
    false_server_info = []
    logging.info('开始获取服务器所有信息...')
    server_info = read_all_server_info()
    logging.info("\n###########################\n")
    time.sleep(1)
    input("请确认待巡检的服务器ip信息,按回车键继续。")
    for info in server_info:
        host = info['host']
        username = info['username']
        password = info['password']
        port = info['port']
        type_str = info['type']
        conn = connect(host, username, password, port)
        if conn is None:
            logging.info('请检查{}服务器:{}的连通性...'.format(type_str, host))
            false_server_info.append(host)
        else:
            df_exec_command(conn)
            free_exec_command(conn)
            load_stat_exec_command(conn)
            input("请确认{}服务器:{}的磁盘/内存/服务器负载使用信息,按回车键继续...".format(type_str, host))
            time.sleep(1)
            logging.info("\n")
            cj_exec_command(conn, type_str, host)
            # input("请确认{}服务器:{}的集团采集程序,按回车键继续...".format(type_str, host))
            if type_str == 'gp':
                gp_exec_command(conn)
                input("请确认GP数据库状态,按回车键继续...")
            conn.close()
        logging.info("\n-------------------------------------------------------------------------\n")

    # server_info = read_config_info('gp_server_info')
    # conn = connect(server_info['host'], server_info['username'], server_info['password'], server_info['port'])
    # gp_exec_command(conn)
    # input("请确认GP数据库状态,按回车键继续...")

    # 数据库昨日入库情况
    get_nbd()
    input("请确认天粒度入库数据是否正常,按回车键继续...")
    # CDH监控...
    get_cdh_clusters_info()
    input("请确认CDH集群状态是否正常,按回车键继续...")
    logging.info("\n")

    server_info = read_config_info('tomcat_nginx_server_info')
    conn = connect(server_info['host'], server_info['username'], server_info['password'], server_info['port'])
    tomcat_nginx_exec_command(conn)
    input("请确认tomcat/nginx状态,按回车键继续...")
    conn.close()
    logging.info("\n")
    logging.info('最后.请检查以下服务器的连通性:{}'.format(false_server_info))


if __name__ == '__main__':
    main()
    logging.info('end....')

3.配置python环境及需要安装的依赖

python这边是使用python3.8的,我想快速安装,所以使用Miniconda3-latest-Linux-x86_64.sh

依赖分别是ssh:

PyNaCl-1.4.0/paramiko-2.7.2/bcrypt-3.2.0

requests:

requests-2.22.0-py2.py3-none-any

1.首先创建一个python_dev账号:
# 新建账号
adduser python_dev
# 设置密码
passwd python_dev
# 密码
Rj#Wvk031lyCx2
2.安装python
[python_dev@sxhdp01datanode05 python]chmod +x Miniconda3-latest-Linux-x86_64.sh 
[python_dev@sxhdp01datanode05 python] ./Miniconda3-latest-Linux-x86_64.sh 


installation finished.
Do you wish the installer to initialize Miniconda3
by running conda init? [yes|no]
[no] >>> no  #这里我自己是不喜欢初始化的所以选择了no,如果想设置初始化可以是yes
3.安装依赖
# 解压
tar -zxvf  bcrypt-3.2.0.tar.gz
# 进入文件夹并安装依赖
cd bcrypt-3.2.0
~/miniconda3/bin/python setup.py install
# 查看是否安装成功
~/miniconda3/bin/pip list

#同理
tar -zxvf PyNaCl-1.4.0.tar.gz
tar -zxvf  paramiko-2.7.2.tar.gz
.....

#安装 request,因为是whl包所以是用Pip安装
~/miniconda3/bin/pip install requests-2.22.0-py2.py3-none-any.whl 
4.手动运行执行
~/miniconda3/bin/python check_server.py

 

发表评论

您的电子邮箱地址不会被公开。 必填项已用*标注