#!/sf/vs/bin/python
# coding=utf-8
import argparse
import ast
import subprocess
import sys
import traceback
from libvs.volume.volume import Volume
from netaddr import IPNetwork
import requests
import fcntl
import errno
from libvs.utils.hostinfo import HostStatus
from zkapi.zk_op import zk_op
import logging
import os
import socket
import psutil
import time

REQUEST_TIMEOUT = 30
LOCALHOST = socket.gethostname()
file_path = os.path.abspath(__file__)
dst_file_path = '/sf/log/today/vs/fix_iscsi_ip.py'
current_path = os.path.dirname(file_path)
cur_file_name = os.path.basename(__file__)
log_path = os.path.join(current_path, cur_file_name + ".log")
log = logging.getLogger()
log.setLevel(logging.INFO)
formatter = logging.Formatter('[%(asctime)s] %(levelname)s '
                              '[%(pathname)s:%(lineno)d:%(funcName)s]'
                              ' %(message)s')
handle_file = logging.FileHandler(log_path, encoding='utf-8')
handle_file.setFormatter(formatter)
log.addHandler(handle_file)
ISCSI_CONF_PATH = '/iscsi/ha_iscsi'
zk_path = '/fix_iscsi_ip'
iscsi_check_inf = ['eds_manager', 'eds_private']


class SingleProc(object):
    """
    实现单例
    【注意：使用这个类后会残留pid文件】
    支持两种方式使用：
    with  SingleProc('/path/xxx'):
        ...
    或
    lock = SingleProc('/path/xxx')
    lock.lock()
    ...
    lock.unlock()
    建议采用第一种
    """

    def __init__(self, pid_file):
        self._pidfile_path = os.path.realpath(pid_file)
        self.__pidfile = None

    def __del__(self):
        if self._pidfile and self._pidfile != -1:
            self.unlock()

    @property
    def _pidfile(self):
        if self.__pidfile is None:
            self.__pidfile = open(self._pidfile_path, 'w')
        return self.__pidfile

    @_pidfile.setter
    def _pidfile(self, val):
        self.__pidfile = val

    def lock(self, block=False):
        if self._pidfile == -1:
            self._pidfile = None

        try:
            if not block:
                operation = fcntl.LOCK_EX | fcntl.LOCK_NB
            else:
                operation = fcntl.LOCK_EX
            fcntl.lockf(self._pidfile, operation)
        except IOError:
            log.warning("another instance is running...")
            self._pidfile = -1
            return -1
        return 0

    def unlock(self):
        # 先清理文件，再解锁
        if self._pidfile == -1 or self._pidfile is None:
            return 0

        pidfile_path = self._pidfile_path

        try:
            if os.path.exists(pidfile_path):
                os.unlink(pidfile_path)
        except IOError as e:
            if e.errno != errno.ENOENT:
                log.error(
                    "error on clean pidfile {}:{}".format(pidfile_path, e))
                raise
        try:

            fcntl.lockf(self._pidfile, fcntl.LOCK_UN)
            self._pidfile.close()
            self._pidfile = -1

        except IOError:
            log.error("pidfile unlock fail")
            return -1
        return 0

    def __enter__(self):
        if self.lock() == 0:
            return self
        else:
            raise Exception('lock fail')

    def __exit__(self, exc_type, exc_value, traceback):
        self.unlock()


def create_cluster_instance():
    zk_handle = zk_op()
    if zk_handle.zk_exist(zk_path):
        log.error(
            'If other hosts in the cluster are running the tool or '
            'confirm that other hosts are not running the script, '
            'the zk configuration remains (please manually delete '
            'the zk configuration if it remains,path: {})'.format(zk_path))
        return 1
    try:
        if zk_handle.zk_create(zk_path):
            log.info('Create a cluster single instance identifier. {}'.format(
                zk_path))
            return 0
    except Exception as ex:
        log.error('The tool is running on other hosts in the cluster, '
                  'so this host does not need to run it. err: {}'.format(ex))
        return 1


def delete_cluster_instance():
    zk_handle = zk_op()
    if not zk_handle.zk_exist(zk_path):
        log.warning(
            'Cluster single instance identifier {} does not exist, '
            'exit directly.'.format(zk_path))
        return 0
    try:
        if zk_handle.zk_rm(zk_path):
            log.info('Deleting cluster single instance identifier {} '
                     'successfully'.format(zk_path))
            return 0
    except Exception as ex:
        log.error(
            'Failed to delete cluster single instance identifier {}, '
            'please manually execute super_zkcli.py rm {}.err: {}'.format(
                zk_path, zk_path, ex))
        return 1


def get_ip_and_netmask(interfaces):
    """获取指定接口上的所有IP地址和子网掩码"""
    res = {}
    addrs = psutil.net_if_addrs()
    for interface in interfaces:
        if interface in addrs:
            res[interface] = []
            for addr in addrs[interface]:
                if addr.family == socket.AF_INET:
                    res[interface].append((addr.address, addr.netmask))
    return res


def get_interface_src(interfaces):
    """获取指定接口的 proto kernel scope link 对应的 src 地址"""
    res = []
    try:
        # 调用系统命令 'ip route'
        result = subprocess.Popen(['ip', 'route'], stdout=subprocess.PIPE)
        output, _ = result.communicate()
        routes = output.strip().split('\n')
        for route in routes:
            parts = route.split()
            # 10.174.0.0/15 dev eds_0 proto kernel scope link src 10.174.80.215
            if len(parts) == 9 and parts[3] == 'proto' and parts[
                4] == 'kernel' and parts[5] == 'scope' and parts[
                6] == 'link' and parts[1] == 'dev' and parts[7] == 'src' and \
                    parts[2] in interfaces:
                res.append(parts[8])
        return res
    except subprocess.CalledProcessError as e:
        log.error("ip r command execution failed: {}".format(e))
        return None


def get_local_ip():
    host_status = HostStatus()
    mgr_ip = host_status.get_mgr_ip(LOCALHOST)
    stor_ip = host_status.get_stornet_ip(LOCALHOST)
    return mgr_ip, stor_ip


def get_all_host():
    vols = Volume.get_all_volumes()
    hosts = []
    for vol in vols:
        vol_conf = vol.conf.content
        hosts.extend(vol_conf['hosts'])
    return set(hosts)


def get_iscsi_ip(interfaces):
    res = {}
    zk = zk_op()
    try:
        file_list = zk.listdir(ISCSI_CONF_PATH)
        file_list = filter(lambda x: not zk.isdir(ISCSI_CONF_PATH + x),
                           file_list)
        iscsi_conf_name_list = filter(lambda x: str(x).endswith('.json'),
                                      file_list)
    except Exception as ex:
        # 获取不到文件列表说明是第一次配置高可用
        log.error('get {} failed, err: {}'.format(ISCSI_CONF_PATH, ex))
        return None
    # 获取高可用配置
    for conf_name in iscsi_conf_name_list:
        zk_file = zk.read(ISCSI_CONF_PATH + "/" + conf_name)
        log.info('get zk_iscsi_conf:{}'.format(zk_file))
        conf = ast.literal_eval(zk_file)
        if not conf:
            continue
        for iscsi_conf in conf:
            if iscsi_conf['iface'] in interfaces:
                if iscsi_conf['iface'] not in res:
                    res[iscsi_conf['iface']] = []
                res[iscsi_conf['iface']].append(iscsi_conf)
    return res


def cli_parser():
    parser = argparse.ArgumentParser(
        description="接入IP复用私网/管理网修正路由工具")
    parser.add_argument("-c", "--cluster", action='store_true',
                        help="在集群块池任意主机执行时指定该参数")
    parser.add_argument("-r", "--run", action='store_true', help="防呆")
    parser.add_argument("-i", "--interfaces", type=str, help=argparse.SUPPRESS)
    return parser


def get_local_ip_info(inf, mgr_ip):
    # 不需要检查网口是否一致，创建时已经保证同网口才能同网段
    try:
        if inf == 'eds_manager':
            url = 'http://127.0.0.1:7100/hosts/interfaces/manager'
        elif inf == 'eds_private':
            url = 'http://127.0.0.1:7100/hosts/interfaces/storage_private'
        else:
            raise Exception('unknown inf type {}'.format(inf))
        rsp_url = requests.get(url, timeout=REQUEST_TIMEOUT)
        rsp_url.raise_for_status()
        rsp = rsp_url.json()
        all_interfaces = rsp["data"]
        for host_info in all_interfaces[0]['networks']:
            if host_info['host_ip'] == mgr_ip:
                return host_info['ip'], host_info['netmask']
    except Exception as ex:
        log.error("get local {} ip failed, err: {}".format(inf, ex))
        return None, None


def distribute_script(src, dst):
    command = ["/sf/vs/bin/vs_cluster_cmd.sh", "d", src, dst]

    try:
        # 执行命令并捕获输出
        subprocess.check_output(command, stderr=subprocess.STDOUT)
        return 0
    except Exception as ex:
        log.error('run cmd({}) faild, err: {}'.format(command, ex))
        return 1


def remote_run_fix_iscsi_ip(hostname, dst_file_path, inf):
    command = ["ssh", hostname, '/sf/vs/bin/python', dst_file_path, '-r', '-i',
               ','.join(inf)]
    try:
        # 执行命令并捕获输出
        subprocess.check_output(command, stderr=subprocess.STDOUT)
        return 0
    except Exception as ex:
        log.error('run cmd({}) faild, err: {}'.format(command, ex))
        return 1


def compare_ip_and_mask(ip1, mask1, ip2, mask2):
    try:
        # 创建 IPNetwork 对象
        network1 = IPNetwork(ip1 + '/' + mask1)
        network2 = IPNetwork(ip2 + '/' + mask2)

        # 比较网络对象
        return network1 == network2
    except Exception as ex:
        log.error('compare ip mask failed, err: {}'.format(ex))
        return False


def delete_ip(interface, ip_address):
    """删除指定接口上的IP地址"""
    try:
        subprocess.check_call(
            ["ip", "addr", "del", ip_address, "dev", interface])
        log.info("IP address {} has been removed from interface {}".format(
            ip_address, interface))
        return 0
    except subprocess.CalledProcessError as e:
        log.error("Delete IP address failed: {}".format(e))
        return 1


def check_ip(interface, ip_address):
    """检查指定接口上是否存在指定IP地址"""
    try:
        result = subprocess.check_output(["ip", "addr", "show", interface])
        return ip_address.split('/')[0] in result
    except subprocess.CalledProcessError as e:
        log.error("Checking IP address failed: {}".format(e))
        return False


def monitor_ip(interface, ip_address, check_interval):
    """监控IP地址是否被重新添加"""
    num = 3
    while num:
        if check_ip(interface, ip_address):
            return 0
        time.sleep(check_interval)
        num -= 1
    return 1


def main():
    parser = cli_parser()
    args = parser.parse_args()
    if len(sys.argv) == 1 or not args.run:
        parser.print_help()
        return 1

    if args.cluster:
        # 集群单实例
        if create_cluster_instance():
            return 1
        try:
            # 检查是否存在接入ip复用管理口/私网口，并且同网段
            iscsi_conf_by_inf = get_iscsi_ip(iscsi_check_inf)
            if iscsi_conf_by_inf is None:
                raise Exception('get iscsi ip failed')
            need_fix_inf = []
            mgr_ip, _ = get_local_ip()
            if mgr_ip is None:
                raise Exception(
                    '{} There is no management IP or private IP'.format(
                        LOCALHOST))
            for inf in iscsi_conf_by_inf:
                inf_iscsi_conf = iscsi_conf_by_inf[inf]
                local_ip, local_mask = get_local_ip_info(inf, mgr_ip)
                if local_mask is None:
                    continue
                for iscsi_conf in inf_iscsi_conf:
                    access_ip = iscsi_conf['access_ip']
                    vip_mask = iscsi_conf['vip_mask']
                    if compare_ip_and_mask(local_ip, local_mask, access_ip,
                                           vip_mask):
                        need_fix_inf.append(inf)
                        break
            need_fix_inf = set(need_fix_inf)
            if not need_fix_inf:
                log.info('There is no access to the IP multiplexing '
                         'management network and private network port, '
                         'and they are in the same network segment')
                return 0
            # 同步脚本到/tmp/fix_iscsi_ip.py
            if distribute_script(file_path, dst_file_path):
                return 1
            # 每主机执行fix_iscsi_ip.py -r -i 管理网口,私网口
            hosts = get_all_host()
            for hostname in hosts:
                if not remote_run_fix_iscsi_ip(hostname, dst_file_path,
                                               need_fix_inf):
                    print('{} fix success'.format(hostname))
                else:
                    print('{} fix failed'.format(hostname))
            return 0
        except Exception as ex:
            log.error(
                "run failed： {}, stack: {}".format(ex, traceback.format_exc()))
            return 1
        finally:
            delete_cluster_instance()
    else:
        # 单实例修复本地
        vs_chk_single_instant = "/var/lock/fix_iscsi_ip.lock"
        single_instant = SingleProc(vs_chk_single_instant)
        if single_instant.lock() == -1:
            log.warning("fix iscsi ip exist, exit")
            return 1

        if args.interfaces == None:
            log.warning("非集群模式执行时，需要指定网口！")
            return 1

        inf = args.interfaces
        interfaces = inf.split(',')
        try:
            # 获取本机管理网私网ip
            mgr_ip, stor_ip = get_local_ip()
            if mgr_ip is None or stor_ip is None:
                raise Exception('{} does not have a management network '
                                'or private network IP'.format(LOCALHOST))
            # 获取iscsi接入ip
            iscsi_conf_by_inf = get_iscsi_ip(interfaces)
            if iscsi_conf_by_inf is None:
                raise Exception('get iscsi ip failed')
            inf_ips = get_ip_and_netmask(interfaces)
            vips = []
            for inf in interfaces:
                access_ips = iscsi_conf_by_inf[inf]
                ips = []
                local_ip, local_mask = get_local_ip_info(inf, mgr_ip)
                for access_ip in access_ips:
                    vip = access_ip['access_ip']
                    vmask = access_ip['vip_mask']
                    if compare_ip_and_mask(local_ip, local_mask, vip,
                                           vmask):
                        ips.extend(access_ip['vip_pool'])
                        if access_ip['access_ip'] not in ips:
                            ips.append(access_ip['access_ip'])
                if not ips:
                    continue
                # 删除接入ip等待接入ip飘回
                for inf_ip, inf_mask in inf_ips[inf]:
                    if inf_ip in ips:
                        delete_ip(inf, inf_ip + '/' + inf_mask)
                        monitor_ip(inf, inf_ip, 1)
                        vips.append(inf_ip)
            # 检查路由是否正常  不正常进行告警
            src_ips = get_interface_src(interfaces)
            if vips and not set(vips).isdisjoint(set(src_ips)):
                log.error(
                    'route exist problem,vips: {} src_ips: {}'.format(
                        vips, src_ips))
                return 1
            return 0
        except Exception as ex:
            log.error(
                "run failed： {}, stack: {}".format(ex, traceback.format_exc()))
            return 1


if __name__ == "__main__":
    sys.exit(main())
