#!/sf/vs/bin/python
# -*- coding:utf-8 -*-

"""
## 显示集群内故障文件信息
"""
import re
import json
import pylib.utils.utiltools as common


# 查找文件的副本分布复制组
def get_file_replicate_2x(file_path, hosts, replicate, include_tfile=False):
    local_paths = []
    recovery_replicate = {}
    cmdline = '/bin/ls -al /sf/data/vs/local/*/*/"{}"'.format(file_path)
    for host in hosts:
        try:
            result = common.remote_cli(host, cmdline, True)
            for line in result:
                if not line or (not include_tfile and line.split()[0] == '---------T'):
                    continue

                if not line.split()[-1].startswith('/sf/data/vs/local/'):
                    continue

                local_path = line.split()[-1]
                local_path = local_path.replace(file_path, '')
                if local_path[-1] == '/':
                    local_path = local_path[0:-1]
                local_paths.append(local_path)
        except common.CmdError as e:
            common.logger.warn('got except: {}'.format(e))
            continue

    if not local_paths:
        return recovery_replicate

    for rep_id, bricks in replicate.items():
        for brick in bricks:
            if brick['path'] in local_paths:
                recovery_replicate[rep_id] = bricks
                break
    return recovery_replicate


def get_file_replicate_3x(file_path, replicate, online_bricks):
    data_bricks = []
    recovery_replicate = {}
    first_gfid_in_replicate = [] # 存放不同副本获取的GFID与路由以及副本数量
    for brick in replicate[common.LEFT_TREE_INDEX]:
        if online_bricks.get(brick['path']) != 'y':
            continue

        host = brick['host']
        brick_path = brick['path']
        try:
            first_gfid = get_left_tree_gfid_and_route(host, brick_path, file_path)
            if not first_gfid:
                common.logger.warn('failed to get first_gfid, file_path: {}'.format(file_path))
                continue
            if not first_gfid_in_replicate:
                first_gfid_in_replicate.append([first_gfid, 1])
            else:
                found = False
                for index, item in enumerate(first_gfid_in_replicate):
                    if first_gfid == item[0]:
                        first_gfid_in_replicate[index][1] = item[1] + 1
                        found = True
                        break
                if not found:
                    first_gfid_in_replicate.append([first_gfid, 1])
        except common.CmdError as e:
            if 'No such file or directory' in str(e):
                continue
            else:
                raise
    
    if not first_gfid_in_replicate:
        common.logger.error('failed to get first_gfid_in_replicate, file_path: {}'.format(file_path))
        return {}
    
    # 不同副本获取的路由，基于获取数量的多少，取数量最多的那个副本
    sorted_first_gfids = sorted(first_gfid_in_replicate, key=lambda x: x[1], reverse=True)
    first_gfid = sorted_first_gfids[0][0]
    gfid = first_gfid['gfid']
    route = first_gfid['route']
    for rt_id in route.get('rt'):
        if gfid == '00000000-0000-0000-0000-000000000001':
            # 左子树复制组
            for data_brick in replicate[common.LEFT_TREE_INDEX]:
                if data_brick.get('id') == rt_id:
                    data_bricks.append(data_brick)
                    break
        else:
            # 右子树复制组
            for data_brick in replicate[common.RIGHT_TREE_INDEX]:
                if data_brick.get('id') == rt_id:
                    data_bricks.append(data_brick)
                    break
    if not data_bricks or not route.get('rt') or len(data_bricks) != len(route.get('rt')):
        common.logger.error('failed to get file_path: {}, bricks: {}'.format(file_path, data_bricks))
        return {}
    recovery_replicate[gfid] = data_bricks
    return recovery_replicate


def get_file_size_2x(version, file_path, hosts, replicate, online_bricks):
    if version >= common.VS_VERSION_3_0:
        recovery_replicate = get_file_replicate_3x(file_path, replicate, online_bricks)
    else:
        recovery_replicate = get_file_replicate_2x(file_path, hosts, replicate)

    if recovery_replicate:
        for rep_id, bricks in recovery_replicate.items():
            for brick in bricks:
                if online_bricks.get(brick['path']) != 'y':
                    continue
                try:
                    if version >= common.VS_VERSION_3_0:
                        gfid = rep_id  # 3.x版本，右树文件名是gfid
                        cmdline = '/sf/vs/bin/getfattr -n user.glusterfs.bd {}/{}'.format(brick['path'], gfid)
                    else:
                        cmdline = '/sf/vs/bin/getfattr -n user.glusterfs.bd {}/"{}"'.format(brick['path'], file_path)
                    result = common.remote_cli(brick['host'], cmdline, True)
                    for line in result:
                        # 正常场景使用lv2
                        if re.search(r'lv2:(\d+)', line) and line.split('=')[0] == 'user.glusterfs.bd':
                            # user.glusterfs.bd="lv2:134217728"
                            file_size = re.search(r'lv2:(\d+)', line).group(1)
                            return int(file_size)
                        # 单主机，使用lv
                        if re.search(r'lv:(\d+)', line) and line.split('=')[0] == 'user.glusterfs.bd':
                            # user.glusterfs.bd="lv:134217728"
                            file_size = re.search(r'lv:(\d+)', line).group(1)
                            return int(file_size)
                except common.CmdError as e:
                    common.logger.warn('got except: {}'.format(str(e)))
                    continue
    return 0


def print_fault_files_2x(version, hosts, replicate, online_bricks, split_brain_files, split_brain_vms):
    file_msg_print = ''
    vm_msg_print = ''
    if split_brain_files:
        for index, split_brain_file in enumerate(split_brain_files):
            file_msg_print += split_brain_file
            file_size = get_file_size_2x(version, split_brain_file, hosts, replicate, online_bricks)
            if file_size:
                # 获取到了大小，就打印出大小
                file_msg_print += ' ({}) '.format(common.to_human_readable(file_size))
            else:
                file_msg_print += ' '

            if (index + 1) % 1 == 0 and index != len(split_brain_files) - 1:
                file_msg_print += '\n'

    if split_brain_vms:
        for index, (vm_id, vm_name) in enumerate(split_brain_vms.items()):
            vm_msg_print += '{} ({}) '.format(vm_id, vm_name)
            if (index + 1) % 1 == 0 and index != len(split_brain_vms) - 1:
                vm_msg_print += '\n'

    if file_msg_print:
        print common.Colored().red('\n找到脑裂文件:')
        print common.Colored().cyan('{}'.format(file_msg_print))
    if vm_msg_print:
        print common.Colored().red('\n找到脑裂虚拟机:')
        print common.Colored().cyan('{}'.format(vm_msg_print))


def get_fault_files_2x(version, volume_name):
    split_brain_vms = []
    split_brain_files = common.get_split_brain_files(volume_name, version)
    if split_brain_files:
        split_brain_vms = common.files_path_to_vms_name(split_brain_files)

    return split_brain_files, split_brain_vms


def show_fault_files_2x(version):
    volume_name, hosts, replicate_num, has_arbiter, bricks, replicate = common.get_vs_cluster_info()
    if not volume_name:
        common.logger.error('failed to supported, cannot find volume name')
        return -1

    online_bricks = common.get_online_bricks(volume_name)
    if not online_bricks:
        common.logger.error('failed to supported, cannot find online_bricks')
        return -1

    common.logger.info('try to show_fault_files_2x')

    # 获取脑裂文件与虚拟机信息
    split_brain_files, split_brain_vms = get_fault_files_2x(version, volume_name)

    # 打印脑裂文件与虚拟机信息
    if split_brain_files or split_brain_vms:
        print_fault_files_2x(version, hosts, replicate, online_bricks, split_brain_files, split_brain_vms)
    return 0


# 判断所有的数据副本是否都是离线的
def is_all_data_child_offline(route_info, offline_bricks):
    for index, route_id in enumerate(route_info):
        if index <= len(route_info) / 2 and route_id not in offline_bricks:
            return False
    return True


# 从分片，获取分片对应的文件与虚拟机
def get_files_and_vms_from_shards(shards, all_shards):
    import pylib.rpcservice as rpcservice
    files = []
    vms_info = []
    if not shards or not all_shards:
        return files, vms_info

    for shard_gfid in shards:
        base_path = rpcservice.route_gfid_to_file_path(shard_gfid, all_shards)
        if base_path:
            files.append(base_path)

    if files:
        files = list(set(files))  # 去重处理
        vms_info = common.files_path_to_vms_name(files)
    return files, vms_info


def get_left_tree_gfid_and_route(host, brick_path, file_path):
    gfid = ''
    route = {}
    cmdline = '/sf/vs/bin/getfattr -d -m. -e hex {}/"{}"'.format(brick_path, file_path)
    result = common.remote_cli(host, cmdline, True)
    for line in result:
        if line.split('=')[0] == 'trusted.gfid':
            gfid_hex = line.split('=')[1]
            gfid = '{}-{}-{}-{}-{}'.format(gfid_hex[2:10], gfid_hex[10:14], gfid_hex[14:18], gfid_hex[18:22], gfid_hex[22:34])
        if line.split('=')[0] == 'trusted.route.info':
            route_hex = line.split('=')[1]
            cmdline = '/sf/vs/sbin/vs_route_tool --cmd=parse --rtstr={}'.format(route_hex)
            result = common.cli(cmdline, False)
            route = json.loads(result.decode('utf-8').strip())

    if gfid and route:
        return {'gfid': gfid, 'route': route}
    return {}


def get_online_brick_by_rt(rt_id, replicate, online_bricks):
    for brick in replicate[common.RIGHT_TREE_INDEX]:
        if brick.get('id') == rt_id and online_bricks.get(brick['path']) == 'y':
            return brick
    return {}


def get_file_parm_3x(first_gfid, data_brick):
    host = data_brick['host']
    path = data_brick['path']
    file_size_hex = ''
    shard_parm_hex = ''
    if data_brick.get('arbiter'):
        result = common.vs_getfattr_2x(host, path, first_gfid).split('\n')
        for line in result:
            if not line:
                continue
            if line.split('=')[0] == 'trusted.shard.size':
                file_size_hex = line.split('=')[1].strip()
            if line.split('=')[0] == 'trusted.shard.param':
                shard_parm_hex = line.split('=')[1].strip()
    else:
        cmdline = '/bin/ps aux | grep {} | /bin/grep -v supervise | /bin/grep -v grep'.format(path)
        common.logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
        result = common.remote_cli(host, cmdline, False).split()
        if result and len(result) > 1 and result[1].isdigit():
            brick_pid = result[1]
            efs_cmd = 'inode xattr {}'.format(first_gfid)
            # efs_cmd = efs_cmd.replace(' ', '\ ').replace('(', '\(').replace(')', '\)').replace('|', '\|')
            cmdline = '/sf/vs/sbin/efs_dbg -p {} -c "{}"'.format(brick_pid, efs_cmd)
            common.logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
            result = common.remote_cli(host, cmdline, True)
            for line in result:
                if not line:
                    continue
                if line.split('=')[0] == 'trusted.shard.size':
                    file_size_hex = line.split('=')[1].strip()
                if line.split('=')[0] == 'trusted.shard.param':
                    shard_parm_hex = line.split('=')[1].strip()
    if not file_size_hex or not shard_parm_hex:
        common.logger.error('failed to get first_gfid: {}, file_size_hex or shard_parm_hex'.format(first_gfid))
        return 0, 0, 0, 0

    file_size = int(file_size_hex[2:18], 16)  # 前8字节
    shard_size = int(shard_parm_hex[2:10], 16) & (~0xF0000000)
    shard_size *= 1024 * 1024  # 分片大小1MB对齐
    stripe_size = int(shard_parm_hex[10:18], 16)
    stripe_width = int(shard_parm_hex[18:26], 16)

    common.logger.info('got file first_gfid: {}, stripe_width: {}, stripe_size: {}, shard_size: {}, file_size: {}'.
                format(first_gfid, stripe_width, stripe_size, shard_size, file_size))
    return stripe_width, stripe_size, shard_size, file_size


def get_file_size_3x(file_path, replicate, online_bricks):
    for brick in replicate[common.LEFT_TREE_INDEX]:
        if online_bricks.get(brick['path']) == 'y':
            first_gfid = get_left_tree_gfid_and_route(brick['host'], brick['path'], file_path)
            if not first_gfid:
                common.logger.warn('failed to get first_gfid, file_path: {}'.format(file_path))
                continue
            rts = first_gfid.get('route').get('rt')
            if not rts:
                common.logger.error('failed to get rt, gfid: {}'.format(first_gfid.get('gfid')))
                continue
            for index, rt_id in enumerate(rts):
                data_brick = get_online_brick_by_rt(rt_id, replicate, online_bricks)
                if not data_brick:
                    common.logger.warn('data_brick id: {} is offline'.format(rt_id))
                    continue
                stripe_width, stripe_size, shard_size, file_size = get_file_parm_3x(first_gfid.get('gfid'), data_brick)
                return file_size
    return 0


def get_split_brain_shards_3x():
    import pylib.rpcservice as rpcservice
    split_brain_shards = rpcservice.route_list_all_err_shards()
    return split_brain_shards


def get_split_brain_files_3x(all_shards):
    err_shards = get_split_brain_shards_3x()
    if err_shards:
        return get_files_and_vms_from_shards(err_shards, all_shards)
    return [], []


def print_split_brain_files_3x(version, hosts, replicate, online_bricks, split_brain_files, split_brain_vms):
    file_msg_print = ''
    for index, split_brain_file in enumerate(split_brain_files):
        file_msg_print += split_brain_file
        files_blacklist = ['bcfg', 'conf', '.storage_id']
        blacklist_matched = False
        for file_blacklist in files_blacklist:
            # 两主机，不支持显示大小
            if file_blacklist in split_brain_file:
                blacklist_matched = True
                file_msg_print += ' '
                break
        if not blacklist_matched:
            if common.vs_is_two_host(hosts):
                # 两主机，采用2.x方法获取文件大小
                file_size = get_file_size_2x(version, split_brain_file, hosts, replicate, online_bricks)
            else:
                file_size = get_file_size_3x(split_brain_file, replicate, online_bricks)

            if file_size:
                file_msg_print += ' ({}) '.format(common.to_human_readable(file_size))
            else:
                file_msg_print += ' '
        if (index + 1) % 1 == 0 and index != len(split_brain_files) - 1:
            file_msg_print += '\n'
    vm_msg_print = ''
    for index, (vm_id, vm_name) in enumerate(split_brain_vms.items()):
        vm_msg_print += '{} ({}) '.format(vm_id, vm_name)
        if (index + 1) % 1 == 0 and index != len(split_brain_vms) - 1:
            vm_msg_print += '\n'

    if file_msg_print:
        print common.Colored().red('\n找到脑裂文件:')
        print common.Colored().cyan('{}'.format(file_msg_print))
    if vm_msg_print:
        print common.Colored().red('\n找到脑裂虚拟机:')
        print common.Colored().cyan('{}'.format(vm_msg_print))


def get_offline_files_3x(all_shards, offline_bricks):
    offline_shards = []  # 离线分片（数据副本都离线的分片）
    offline_files = []
    offline_vms = []

    # 存在离线brick且离线brick数量大于2，才可能有分片是数据副本双点状态
    if offline_bricks and len(offline_bricks) >= 2:
        for shard in all_shards:
            route_info = shard.get('route_info')
            if not isinstance(route_info, list):
                continue
            if is_all_data_child_offline(route_info, offline_bricks):
                offline_shards.append(shard['gfid'])

    if offline_shards:
        offline_files, offline_vms = get_files_and_vms_from_shards(offline_shards, all_shards)
    return offline_shards, offline_files, offline_vms


def print_offline_files_3x(offline_shards, offline_files, offline_vms):
    shard_msg_print = ''
    file_msg_print = ''
    vm_msg_print = ''

    if offline_shards:
        for index, err_shard in enumerate(offline_shards):
            shard_msg_print += err_shard
            shard_msg_print += ' '
            if (index + 1) % 4 == 0 and index != len(offline_shards) - 1:
                shard_msg_print += '\n'
    if offline_files:
        for index, err_file in enumerate(offline_files):
            file_msg_print += err_file
            file_msg_print += ' '
            if (index + 1) % 1 == 0 and index != len(offline_files) - 1:
                file_msg_print += '\n'
    if offline_vms:
        for index, (vm_id, vm_name) in enumerate(offline_vms.items()):
            vm_msg_print += '{} ({}) '.format(vm_id, vm_name)
            if (index + 1) % 1 == 0 and index != len(offline_vms) - 1:
                vm_msg_print += '\n'

    if shard_msg_print:
        print common.Colored().red('\n找到数据副本都离线的分片:')
        print common.Colored().cyan('{}'.format(shard_msg_print))
    if file_msg_print:
        print common.Colored().red('\n找到数据副本都离线的文件:')
        print common.Colored().cyan('{}'.format(file_msg_print))
    if vm_msg_print:
        print common.Colored().red('\n找到数据副本都离线的虚拟机:')
        print common.Colored().cyan('{}'.format(vm_msg_print))


def show_fault_files_3x(version):
    volume_name, hosts, replicate_num, has_arbiter, bricks, replicate = common.get_vs_cluster_info()
    if not volume_name:
        common.logger.error('failed to supported, cannot find volume name')
        return -1

    online_bricks = common.get_online_bricks(volume_name)
    if not online_bricks:
        common.logger.error('failed to supported, cannot find online_bricks')
        return -1

    import pylib.rpcservice as rpcservice
    all_shards = rpcservice.route_list_all_shards(volume_name)
    if not all_shards:
        common.logger.error('failed to supported, cannot find all_shards')
        return -1

    common.logger.info('try to show_fault_files_3x')

    # 获取脑裂文件与虚拟机信息
    split_brain_files, split_brain_vms = get_split_brain_files_3x(all_shards)

    # 打印脑裂文件与虚拟机信息
    if split_brain_files or split_brain_vms:
        print_split_brain_files_3x(version, hosts, replicate, online_bricks, split_brain_files, split_brain_vms)

    # 获取离线brick
    offline_bricks = []
    clients_status = rpcservice.get_client_status()
    for client in clients_status:
        # 获取的client一定有这些字段
        if client.get('no') is None or client.get('status') == 'NORMAL':
            continue
        offline_bricks.append(int(client.get('no')))

    if not offline_bricks:
        # 没有离线brick，一定没有双点文件
        common.logger.info('offline_bricks is null')
        return 0

    # 获取离线分片,文件与虚拟机信息
    offline_shards, offline_files, offline_vms = get_offline_files_3x(all_shards, offline_bricks)

    # 打印离线分片,文件与虚拟机信息
    if offline_shards:
        print_offline_files_3x(offline_shards, offline_files, offline_vms)

    return 0


def show_fault_files():
    version = common.get_vs_version()
    if not common.is_vs_version_valid(version):
        return -1

    # 支持vs3.0以上版本
    if version < common.VS_VERSION_3_0:
        return show_fault_files_2x(version)
    else:
        return show_fault_files_3x(version)


def _show_fault_files():
    ret = show_fault_files()
    if ret:
        print common.Colored().red('执行失败')
    return ret
