#!/sf/vs/bin/python
# -*- coding:utf-8 -*-

"""
## 文件恢复工具
"""

import os
import re
import sys
import uuid
import time
import json
import platform
import traceback
import pylib.utils.utiltools as common


def check_and_get_recovery_dir(version, volume_name, file_path, file_size, available_size, force_save_local):
    readline = '文件: {} (大小 {}), 存储卷: {} (可用 {})\n'.format(file_path, common.to_human_readable(file_size), 
                                                        volume_name, common.to_human_readable(available_size))
    vms_info = common.files_path_to_vms_name([file_path])
    if not vms_info:
        readline += '请确认文件, 是否没有业务访问？\n'
    else:
        vmid, vm_name = next(iter(vms_info.items()))
        if common.check_if_vmid_running(vmid):
            print common.Colored().red('检查到虚拟机vmid: {}，正在运行，退出修复'.format(vmid))
            err_msg = ('failed to supported, vmid: {} is running'.format(vmid))
            raise common.CmdError(err_msg)
        readline += '请确认文件所属虚拟机: {}, 是否没有业务访问？\n'.format(vm_name)
    readline += '确认完成后, 输入\'y\'继续，\'n\'退出'
    common.check_terminal_input(readline)

    recovery_dir = os.path.join(common.get_vs_mount_path(volume_name, version), common.vsfire_recovery_dir)
    if force_save_local:
        readline = '请输入外置存储目录路径 (路径要求为绝对路径，且路径存在)'.format(recovery_dir)
    else:
        readline = '备份路径: {}, \n输入\'y\'继续, 或者输入外置存储目录路径替换该路径 (路径要求为绝对路径，且路径存在)'.format(recovery_dir)
    while True:
        print common.Colored().fuchsia(readline)
        path_input = sys.stdin.readline().strip('\n')
        if not force_save_local and path_input.lower() == 'y':
            break
        elif path_input.startswith('/') and os.path.exists(path_input):
            # 保证输入的目录路径存在
            recovery_dir = path_input
            break
        else:
            print common.Colored().red('输入新的路径不合法，请重新输入')
    return recovery_dir

def recovery_data_2x(host, brick_path, gfid, output_path):
    if common.fault_point_result():
        raise common.CmdError('common.fault_point_result')

    # 去除gfid中间的'-'
    gfid = gfid.replace('-', '')
    vg = brick_path.split('/')[5]
    cmdline = "/sbin/lvs --unit b --nosuffix | /bin/grep {} | /bin/grep {} | /usr/bin/sort -t '.' -k 2 -b -n".format(
            gfid, vg)
    common.logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
    result = common.remote_cli(host, cmdline, True)
    seek = 0
    total_size = 0
    start_time = time.time()

    # 统计总大小
    for line in result:
        if re.search(r'{}'.format(gfid), line):
            total_size += int(line.split()[3].strip())

    # 拷贝数据
    for line in result:
        if re.search(r'{}'.format(gfid), line):
            vg = line.split()[1].strip()
            lv = line.split()[0].strip()
            size = int(line.split()[3].strip()) / (1024*1024)
            cmdline = '/bin/dd if=/dev/{}/{} of={} bs=1M seek={} iflag=direct oflag=direct conv=notrunc'.\
                format(vg, lv, output_path, seek)
            common.logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
            seek += size
            common.remote_cli(host, cmdline, True)

            # 拷贝一个LV后，打印进度
            dd_time = time.time()
            speed = (size * 1024*1024.0) / (dd_time - start_time)
            remaining_time = (total_size - seek * 1024 * 1024.0) / speed if speed > 0 else float('inf')
            common.print_with_clear('文件总大小: {}，已拷贝: {} ({:.2f} %), 速率: {}/s, 预计剩余时间: {:.2f} 秒'.
                                    format(common.to_human_readable(total_size),
                                           common.to_human_readable(seek*1024*1024),
                                           100.0 * (seek*1024*1024.0) / total_size,
                                           common.to_human_readable(speed), remaining_time))
            start_time = dd_time
    print '\n'

# 判断扩展属性中，是否存在BAD
def file_recovery_bd_xattr_has_bad(xattr):
    if not xattr:
        return False

    for line in xattr:
        if re.search(r"trusted.file_status", line):
            return True
    return False


def file_recovery_bd_xattr_get_gfid(xattr):
    for line in xattr:
        if line.split('=')[0] == 'trusted.gfid':
            gfid_hex = line.split('=')[1]
            return '{}-{}-{}-{}-{}'.format(gfid_hex[2:10], gfid_hex[10:14],
                                           gfid_hex[14:18], gfid_hex[18:22], gfid_hex[22:34])
    return None


# 判断扩展属性中，是否存在指控
def file_recovery_bd_xattr_has_changelog(xattr):
    if not xattr:
        return False

    for line in xattr:
        if re.search(r"vs_vol_rep2-client-", line) or re.search(r"vs_vol_rep2-vclnt-", line):
            if line.split('=')[1] != '0x000000000000000000000000':
                return True
    return False


# 从扩展属性中，获取指定副本被指控的值
def file_recovery_bd_xattr_get_accused(brick_id, xattr):
    if not xattr:
        return 0

    client_id = 'vs_vol_rep2-client-{}'.format(brick_id - 1)
    for line in xattr:
        if re.search(client_id, line):
            changelog = line.split('=')[1]
            accused = int(changelog[0:10], 16)  # 取前8位字符(数据指控)转化成整形
            return accused
    return 0


# 在一个复制组内，坏副本执行备份
# 选源逻辑： 1.有BAD直接选择BAD副本； 2.选择被指控最大的，相同指控选择0号副本；3. 两主机选择访问时间最小的
def file_recovery_bd_select_child_2x(file_path, bricks, online_bricks):
    stats = []  # 保存每个副本的stat信息
    xattrs = []  # 保存每个副本的扩展属性
    childs = [0, 0]  # 定义每个副本被指控的值，一定只有两个副本

    for child_id, brick in enumerate(bricks):
        if online_bricks.get(brick['path']) != 'y':
            stats.append(None)
            xattrs.append(None)
        else:
            try:
                cmdline = '/sf/bin/busybox/stat -c "%Y" "{}"'.format(os.path.join(brick['path'], file_path))
                stat_result = common.remote_cli(brick['host'], cmdline, False).split('\n')
                common.logger.info('cmdline: {}\ngot stat: {}'.format(cmdline, int(stat_result[0])))
                xattr_result = common.vs_getfattr_2x(brick['host'], brick['path'], file_path)
                common.logger.info('host: {}, path: {}, file: {}\n{}'.format(brick['host'], brick['path'], file_path, xattr_result))
                xattr_result = xattr_result.split('\n')

                stats.append(int(stat_result[0]))
                xattrs.append(xattr_result)
            except common.CmdError as e:
                if 'No such file or directory' in str(e):
                    stats.append(None)
                    xattrs.append(None)
                else:
                    raise

    need_recovery = False
    for xattr in xattrs:
        if not xattr or file_recovery_bd_xattr_has_bad(xattr) or file_recovery_bd_xattr_has_changelog(xattr):
            # 有指控或者BAD才允许修复
            need_recovery = True
            break
    if not need_recovery:
        common.logger.error('failed to select child, file_path: {} has bad or changelog in data_bricks: {}'.
                            format(file_path, bricks))
        print common.Colored().red('检查副本状态正常，不需要修复')
        return -1

    # 两主机限制副本必须在线
    # if len(bricks) == 2 and (not xattrs[0] or not xattrs[1]):
    #     common.logger.error('failed to select child, file_path: {} has brick offline'.format(file_path))
    #     return -1

    if not xattrs[0] and not xattrs[1]:
        common.logger.error('failed to select child, file_path: {} data brick all offline'.format(file_path))
        return -1

    # 0号副本离线，备份1号副本
    if not xattrs[0]:
        return 1

    # 1号副本离线，备份0号副本
    if not xattrs[1]:
        return 0

    gfid_0 = file_recovery_bd_xattr_get_gfid(xattrs[0])
    gfid_1 = file_recovery_bd_xattr_get_gfid(xattrs[1])
    if not gfid_0 or not gfid_1 or gfid_0 != gfid_1:
        # 如果数据副本都在线，GFID一定需要相等
        common.logger.error('failed to select child, gfid_0: {} not equal gfid_1: {}'.format(gfid_0, gfid_1))
        return -1

    # 判断数据副本是否有BAD
    for child_id, child in enumerate(childs):
        if file_recovery_bd_xattr_has_bad(xattrs[child_id]):
            return child_id

    # 两主机，选择访问时间最小的
    if len(bricks) == 2:
        if stats[0] < stats[1]:
            return 0
        else:
            return 1

    # 获取数据副本被指控值
    for child_id, child in enumerate(childs):
        brick_id = bricks[child_id]['id']
        for xattr in xattrs:
            accused = file_recovery_bd_xattr_get_accused(brick_id, xattr)
            childs[child_id] = max(childs[child_id], accused)

    # print 'childs: {}'.format(childs)
    # 选择被指控大的副本
    if childs[0] > childs[1]:
        return 0
    return 1


def file_recovery_bd_2x(version, file_path, recovery_dir, volume_name, hosts, replicate, online_bricks):
    # 获取文件所在的复制组，可能存在多个复制组
    if version >= common.VS_VERSION_3_0:
        from modules.file_recovery.show_fault_files import get_file_replicate_3x
        recovery_replicate = get_file_replicate_3x(file_path, replicate, online_bricks)
    else:
        from modules.file_recovery.show_fault_files import get_file_replicate_2x
        recovery_replicate = get_file_replicate_2x(file_path, hosts, replicate)
    if not recovery_replicate:
        common.logger.error('failed to find {} in replicate'.format(file_path))
        return -1

    rep_id_chosen = -1
    color = common.Colored()

    # 如果存在多个复制组下存在文件，需要手动选择一个复制组
    if common.fault_point_result() or len(recovery_replicate) > 1:
        while True:
            print color.cyan('当前存在多个复制组，请联系研发协助，选择一个需要恢复的复制组编号')
            for rep_id, bricks_info in recovery_replicate.items():
                print color.cyan('复制组号: {}, 副本列表: {}'.format(rep_id, bricks_info))
            print color.fuchsia('请输入需要修复的复制组编号数字')
            rep_id_input = sys.stdin.readline().strip('\n')
            if rep_id_input.isdigit() and recovery_replicate.get(int(rep_id_input)):
                rep_id_chosen = int(rep_id_input)
                break
            else:
                print color.red('输入字符不是合法的复制组编号，请重新输入')
    elif len(recovery_replicate) == 1:
        rep_id_chosen = list(recovery_replicate)[0]

    if not recovery_replicate or not recovery_replicate.get(rep_id_chosen):
        common.logger.error('failed to recovery, rep_id_chosen: {} is invalid'.format(rep_id_chosen))
        return -1

    bricks_chosen = recovery_replicate.get(rep_id_chosen)

    # 自动选择一个备份副本执行拷贝
    if version >= common.VS_VERSION_3_0:
        # 对于3.x版本，复制组的key是gfid
        gfid = rep_id_chosen
        if not gfid:
            common.logger.error('failed to get gfid, file_path: {}'.format(file_path))
            return -1
        child_id_chosen = file_recovery_bd_select_child_2x(gfid, bricks_chosen, online_bricks)
    else:
        child_id_chosen = file_recovery_bd_select_child_2x(file_path, bricks_chosen, online_bricks)
        gfid = common.vs_get_gfid_2x(bricks_chosen[child_id_chosen]['host'],
                                     bricks_chosen[child_id_chosen]['path'], file_path)
        if not gfid:
            common.logger.error('failed to get gfid, file_path: {}'.format(file_path))
            return -1
    if child_id_chosen < 0:
        common.logger.error('failed to recovery, child_id_chosen: {} is invalid'.format(child_id_chosen))
        return -1
    
    # 确认是否需要修复恢复路径
    nfs_recovery_dir = os.path.join(recovery_dir, 'file_backup')
    if not os.path.exists(nfs_recovery_dir):
        cmdline = '/bin/mkdir -p {}'.format(nfs_recovery_dir)
        common.logger.info('try to cmdline: {}'.format(cmdline))
        common.cli(cmdline)
    
    output_path = '{}/{}.qcow2'.format(nfs_recovery_dir, gfid)
    if os.path.exists(output_path):
        os.remove(output_path)

    print color.cyan('选择副本: {} 执行备份, 备份路径: {}'.format(child_id_chosen, output_path))
    common.logger.info('try to select replicate_id: {}, child_id: {} to backup'.format(rep_id_chosen, child_id_chosen))

    host = bricks_chosen[child_id_chosen]['host']
    brick_path = bricks_chosen[child_id_chosen]['path']

    has_force_clean_wcache = False
    try:
        if version >= common.VS_VERSION_3_0:
            # 对于3.x两主机，右树的文件名为gfid path
            filename = '.glusterfs/{}/{}/{}'.format(gfid[0:2], gfid[2:4], gfid)
        else:
            filename = file_path

        # 执行写缓存清空
        if common.is_wcc_dirty_2x(host, brick_path, filename):
            has_force_clean_wcache = True
            common.force_clean_wcache_2x(volume_name, host, brick_path, filename)

        # 执行分层清空
        if common.is_tier_dirty_2x(host, brick_path, filename):
            common.force_clean_tier_2x(host, brick_path, filename)

        # 执行数据恢复
        recovery_data_2x(host, brick_path, gfid, output_path)

        # 恢复写缓存设置
        if has_force_clean_wcache:
            common.reset_clean_wcache_2x(volume_name)

        # 获取另一个副本为好副本(数据副本的ID一定是只有[0，1])
        if child_id_chosen == 0:
            good_child_id = 1
        elif child_id_chosen == 1:
            good_child_id = 0
        else:
            good_child_id = -1

        # 如果选择的好副本在线，恢复好副本的指控状态（只有一个副本场景下，默认是备份在线副本）
        if good_child_id != -1 and online_bricks.get(bricks_chosen[good_child_id]['path']) == 'y':
            # 通过恢复指控方式，恢复好副本指控状态
            print color.cyan('启用副本: {} 为正常副本'.format(good_child_id))
            common.logger.info('try to enable child_id: {}'.format(good_child_id))
            if version >= common.VS_VERSION_3_0:
                client_ids = ['vs_vol_rep\d-vclnt-2', 'vs_vol_rep\d-vclnt-3']
            else:
                client_ids = []
                for brick in bricks_chosen:
                    client_id = 'vs_vol_rep2-client-{}'.format(brick['id'] - 1)
                    client_ids.append(client_id)
            common.enable_replica_changelogs_2x(filename, good_child_id, client_ids, bricks_chosen, online_bricks)
        else:
            print color.cyan('有数据副本离线，不启用副本')

        return 0
    except:
        common.logger.error('failed to recovery file_path: {}, got except:{}'.format(file_path, traceback.format_exc()))

    if has_force_clean_wcache:
        common.reset_clean_wcache_2x(volume_name)
    return -1


def file_recovery_filename_2x(version, hosts, file_path, volume_name, bricks, replicate, online_bricks):
    if not isinstance(file_path, str) or not file_path.startswith('/'):
        # filename表示文件路径，一定是'/'开头
        common.logger.error('failed to supported, filename: {} is not path'.format(file_path))
        return -1

    common.logger.info('try to recovery file_path: {}'.format(file_path))
    # 去掉开头的'/'
    file_path = file_path[1:]

    force_save_local = False #是否只能拷贝到本地存储
    # 锁内执行文件修复
    lock_file = common.get_vsfire_lock_file(file_path)
    with common.VsfireFlock(lock_file) as lock:
        from modules.file_recovery.show_fault_files import get_file_size_2x
        file_size = get_file_size_2x(version, file_path, hosts, replicate, online_bricks)
        mount_path = common.get_vs_mount_path(volume_name, version)
        statvfs = os.statvfs(mount_path)
        available_size = statvfs.f_bavail * statvfs.f_frsize
        # 文件大小需要大于卷大小至少16GB
        if file_size > available_size - (16*1024*1024*1024):
            common.logger.warn('failed to recovery, file_size: {}, available_size: {}'.
                                format(common.to_human_readable(file_path), common.to_human_readable(available_size)))
            force_save_local = True

        recovery_dir = check_and_get_recovery_dir(version, volume_name, file_path, file_size, available_size, force_save_local)
        result = 0
        print common.Colored().cyan('开始修复文件: {}'.format(file_path))
        if file_size > 0:
            # 获取LV大小大于0，表示BD文件
            result = file_recovery_bd_2x(version, file_path, recovery_dir, volume_name, hosts, replicate, online_bricks)
        else:
            # 3.x版本的两主机的非BD文件不支持修复
            if version >= common.VS_VERSION_3_0:
                common.logger.error('failed to supported, filename: {}'.format(file_path))
                return -1

            print common.Colored().cyan('修复文件: {} 的日志输出：'
                                        '/sf/log/today/vs/scripts/vsts-fix-vmcfg-splitbrain2x.sh.log').format(file_path)
            script_path = os.path.join(os.path.dirname(__file__), 'vsts-fix-vmcfg-splitbrain2x.sh')
            cmdline = '/bin/bash {} {}/"{}"'.format(script_path, common.get_vs_mount_path(volume_name, version), file_path)
            common.logger.info('try to cmdline: {}'.format(cmdline))
            common.cli(cmdline)
            tmp_files = ['/root/vsfire_recovery_FEC61AC2_bad_file.list',
                         '/root/vsfire_recovery_FEC61AC2_bad_noqcow2_file.list',
                         '/root/vsfire_recovery_FEC61AC2_posixfile.list']
            common.logger.info('success to cmdline: {} and try to rm files: {}'.format(cmdline, tmp_files))
            for tmp_file in tmp_files:
                if os.path.exists(tmp_file):
                    os.remove(tmp_file)
        if not result:
            print common.Colored().cyan('文件: {}，修复完成'.format(file_path))
        else:
            print common.Colored().red('文件: {}，修复失败'.format(file_path))
        return result


def file_recovery_vmid_2x(version, hosts, vmid, split_brain_files, volume_name, bricks, replicate, online_bricks):
    result = 0
    vm_dir_path = common.vs_get_vm_path_byvmid(vmid)
    print common.Colored().cyan('开始修复虚拟机路径: {}'.format(vm_dir_path))
    for entry in os.listdir(vm_dir_path):
        file_path = os.path.join(vm_dir_path, entry)
        file_path = file_path.replace(common.get_vs_mount_path(volume_name, version), '')
        if file_path not in split_brain_files:
            # 当前文件不在脑裂文件中，说明当前文件不脑裂，不需要修复
            common.logger.info('file_path: {} not need recovery'.format(file_path))
            continue

        if file_recovery_filename_2x(version, hosts, file_path, volume_name, bricks, replicate, online_bricks):
                result = -1
    return result


def file_recovery_2x(version, filename):
    volume_name, hosts, replicate_num, has_arbiter, bricks, replicate = common.get_vs_cluster_info()
    if not volume_name:
        common.logger.error('failed to supported, cannot find volume name')
        return -1

    if replicate_num != 2:
        common.logger.error('failed to supported, replicate_num: {}'.format(replicate_num))
        return -1

    online_bricks = common.get_online_bricks(volume_name)
    if common.fault_point_result() or not online_bricks:
        # 至少有一个在线brick
        common.logger.error('failed to supported, cannot find online_bricks')
        return -1

    result = 0
    if not filename:
        # 如果输入参数为空，表示自动处理所有脑裂虚拟机
        readline = '是否开始扫描所有脑裂虚拟机，输入\'y\'继续，\'n\'退出'
        common.check_terminal_input(readline)

        # 获取脑裂文件与虚拟机信息
        from modules.file_recovery.show_fault_files import get_fault_files_2x
        split_brain_files, split_brain_vms = get_fault_files_2x(version, volume_name)

        # 打印脑裂文件与虚拟机信息
        from modules.file_recovery.show_fault_files import print_fault_files_2x
        if split_brain_vms:
            print_fault_files_2x(version, hosts, replicate, online_bricks, split_brain_files, split_brain_vms)
        else:
            print common.Colored().red('找不到脑裂虚拟机')
            common.logger.error('failed to find split-brain vmids')
            return -1

        readline = '是否开始修复所有脑裂虚拟机，输入\'y\'继续，\'n\'退出'
        common.check_terminal_input(readline)

        for split_brain_vmid, vm_name in split_brain_vms.items():
            if file_recovery_vmid_2x(version, hosts, split_brain_vmid, split_brain_files,
                                     volume_name, bricks, replicate, online_bricks):
                print common.Colored().red('修复虚拟机vmid: {}，失败'.format(split_brain_vmid))
                result = -1
        return result
    elif isinstance(filename, int) and not isinstance(filename, bool):
        # 如果有值，且是数字，表示虚拟机VMID

        # 获取脑裂文件与虚拟机信息
        from modules.file_recovery.show_fault_files import get_fault_files_2x
        split_brain_files, split_brain_vms = get_fault_files_2x(version, volume_name)
        if not split_brain_vms:
            print common.Colored().red('找不到脑裂虚拟机')
            common.logger.error('failed to find split-brain vmids')
            return -1
        for split_brain_vmid, vm_name in split_brain_vms.items():
            if int(split_brain_vmid) == filename:
                return file_recovery_vmid_2x(version, hosts, split_brain_vmid, split_brain_files,
                                             volume_name, bricks, replicate, online_bricks)

        print common.Colored().red('虚拟机vmid: {}，访问正常，不需要修复'.format(filename))
        common.logger.error('failed to recovery vmid: {}'.format(filename))
        return -1
    else:
        # 默认是文件路径，文件路径必须以'/'开头
        return file_recovery_filename_2x(version, hosts, filename, volume_name, bricks, replicate, online_bricks)


def get_online_brick_by_rt(rt_id, replicate, online_bricks):
    for brick in replicate[common.RIGHT_TREE_INDEX]:
        if brick.get('id') == rt_id and online_bricks.get(brick['path']) == 'y':
            return brick
    return {}


def file_recovery_efs_xattr_normal(xattr):
    for line in xattr:
        # 判断changelog是否有BAD
        if re.search(r"trusted.file_status", line):
            return False
        # 判断changelog是否有指控
        if re.search(r"vs_vol_rep\d-vclnt", line) and line.split('=')[1] != '0x000000000000000000000000':
            return False
    return True


def get_shard_right_tree_brick(gfid, route, replicate, online_bricks):
    rts = route.get('rt')
    if not rts:
        common.logger.error('failed to get rt, gfid: {}'.format(gfid))
        return {}

    first_brick_online = None
    data_online_brick_count = 0
    need_chosen = False  # 判断是否需要人为选择，存在多个副本且存在指控就需要人为选择
    xattrs = []
    xattr_bricks = []
    for rt_id in rts:
        brick = get_online_brick_by_rt(rt_id, replicate, online_bricks)
        if not brick:
            # brick不在线，查询下一个
            common.logger.warn('brick id: {} is offline'.format(rt_id))
            continue

        if common.fault_point_result():
            raise common.CmdError('common.fault_point_result')

        if not brick.get('arbiter'):
            # 数据副本
            if not first_brick_online:
                first_brick_online = brick
            result = common.vs_getfattr_3x(brick['host'], brick['path'], gfid)
        else:
            # 仲裁副本
            result = common.vs_getfattr_2x(brick['host'], brick['path'], gfid)

        if result:
            if not file_recovery_efs_xattr_normal(result.split('\n')):
                # 有指控或者BAD，说明需要手动选源
                need_chosen = True
            if not brick.get('arbiter'):
                # 数据副本，获取到扩展属性，统计下
                data_online_brick_count += 1

            xattrs.append(result)
            xattr_bricks.append(brick)

    # 只有一个数据副本在线，不需要手动选源
    if need_chosen and data_online_brick_count == 1:
        need_chosen = False

    # 如果没有找到在线brick，直接退出
    if not first_brick_online:
        common.logger.error('failed to get online data-brick, gfid: {}'.format(gfid))
        return None

    # 如果不需要人工选源，就返回第1个在线数据副本
    if not need_chosen:
        return first_brick_online

    # 人工选源，选择返回一个数据副本
    color = common.Colored()
    brick_id_chosen = -1
    while True:
        print color.cyan('\ngfid: {} 存在多个副本，需要手动选择一个数据副本用于恢复数据'.format(gfid))
        for index, rt_id in enumerate(rts):
            brick_online = False
            for brick_index, brick in enumerate(xattr_bricks):
                if brick.get('id') == rt_id:
                    # 找到brick，说明brick是正常在线的
                    brick_online = True
                    print color.cyan('id: {}, host: {}，path: {}'.format(index, brick['host'], brick['path']))
                    print color.cyan('{}'.format(xattrs[brick_index]))
                    break
            if not brick_online:
                print color.cyan('id: {}'.format(index))
                print color.red('无法获取指控信息，副本可能已经离线\n')
        print color.fuchsia('请联系研发协助，输入一个需要恢复数据的副本编号 \'0\'或\'1\'或\'2\''.format(gfid))
        child_id_input = sys.stdin.readline().strip('\n')
        if child_id_input.isdigit() and int(child_id_input) <= len(rts) / 2:
            brick_id_chosen = rts[int(child_id_input)]
            break
        else:
            print color.red('输入字符不是合法的副本编号，请重新输入')

    for brick_index, brick in enumerate(xattr_bricks):
        # 返回选择的副本对应的brick, 一定在获取属性的brick列表中
        if brick.get('id') == brick_id_chosen:
            return brick

    return None


def kickout_for_all_shards(volume_name, shard_gfid, brick):
    host = brick['host']
    brick_path = brick['path']

    if common.is_wcc_dirty_3x(shard_gfid, host, brick_path):
        common.force_clean_wcache_3x(volume_name, shard_gfid, host, brick_path)

    if common.is_tier_dirty_3x(shard_gfid, host, brick_path):
        common.force_clean_tier_3x(shard_gfid, host, brick_path)


def copy_data_for_all_shards(first_gfid, shard_gfid, shard_idx, brick, efs_mount_magic, recovery_dir):
    host = brick['host']
    brick_vg = brick['path'].split('/')[5]  # 获取vg
    local_efs_mount_path = '/mnt/{}_{}_{}'.format(brick_vg, common.VSFIRE_MAGIC, efs_mount_magic)
    nfs_mount_path = '{}/shard_backup'.format(recovery_dir)

    # 创建nfs挂载点目录，用于备份分片数据
    if not os.path.exists(nfs_mount_path):
        cmdline = '/bin/mkdir -p {}'.format(nfs_mount_path)
        common.logger.info('try to cmdline: {}'.format(cmdline))
        common.cli(cmdline, False)

    if common.fault_point_result():
        raise common.CmdError('common.fault_point_result')

    # 拷贝efs内分片数据到nfs挂载点目录
    from_path = '{}/{}'.format(local_efs_mount_path, shard_gfid)
    to_path = '{}/{}_{}'.format(nfs_mount_path, first_gfid, shard_idx)
    cmdline = '/bin/dd if={} of={} bs=4M oflag=direct'.format(from_path, to_path)
    common.logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
    print common.Colored().cyan('\n备份分片, 在主机: {}, 从 {} 到 {} '.format(host, from_path, to_path))
    common.remote_cli(host, cmdline, False)


# 删除分片的备份目录
def clean_temp_shards(first_gfid, recovery_dir):
    if common.fault_point_result():
        raise common.CmdError('common.fault_point_result')
    nfs_mount_path = '{}/shard_backup/{}'.format(recovery_dir, first_gfid)
    cmdline = '/bin/rm -f {}*'.format(nfs_mount_path)
    common.logger.info('try to cmdline: {}'.format(cmdline))
    common.cli(cmdline, False)


def stripe_file_data_all_empty(file_datas):
    for file_data in file_datas:
        if file_data:
            return False
    return True


def stripe_join_for_shard(stripe_entries, stripe_size, shard_size, offset, nfs_mount_shard_dir, recovery_file_path,
                          file_size, flush_data_size=4*1048576, update_interval=3):
    for entry in stripe_entries:
        shard_path = os.path.join(nfs_mount_shard_dir, entry)
        if not os.path.exists(shard_path):
            # 找不到某个分片，整个条带组都不存在？
            common.logger.warn("failed to find shard_path: {}".format(shard_path))
            return

    file_datas = []
    file_handles = []
    flush_data_len = 0  # 每次flush数据大小
    loop_write_len = 0
    start_loop_time = time.time()
    # print 'offset: {}, stripe_size: {}, stripe_entries: {}'.format(offset, stripe_size, stripe_entries)
    with open(recovery_file_path, 'r+') as target_handle:
        try:
            target_handle.seek(offset)
            for entry in stripe_entries:
                shard_path = os.path.join(nfs_mount_shard_dir, entry)
                # print('shard_path: {} size: {}'.format(shard_path, os.path.getsize(shard_path)))
                file_handles.append(open(shard_path, 'rb'))
            while True:
                # 读取整个条带的数据
                for file_handle in file_handles:
                    data = file_handle.read(stripe_size)
                    file_datas.append(data)

                # 只有整个条带数据都是空，才说明读完了
                if stripe_file_data_all_empty(file_datas):
                    common.logger.info("stripe_entries: {} write offset: {} completely".format(stripe_entries, offset))
                    return

                # 将读出的数据，按照条带的方式写入恢复文件
                for data in file_datas:
                    write_data_len = len(data)
                    if write_data_len < stripe_size:  # 长度不够一个条带长度，后面补0
                        padding_size = stripe_size - write_data_len
                        if padding_size <= file_size - offset:  # 保证填充的长度不超过文件大小
                            data += b'\x00' * padding_size
                            write_data_len += padding_size
                    if write_data_len + offset > file_size:  # 保证写入数据后，不越界
                        write_data_len = file_size - offset
                    if write_data_len > 0:
                        # import binascii
                        # print 'try to write offset: {}, data_size: {}, data: 0x{}\t0x{}'.\
                        #     format(offset, write_data_len, binascii.hexlify(data[:16]).decode('utf-8'),
                        #            binascii.hexlify(data[-16:]).decode('utf-8'))
                        target_handle.write(data)
                        offset += write_data_len
                        loop_write_len += write_data_len
                        flush_data_len += write_data_len
                        # 达到flush数据量后，执行一次flush
                        if flush_data_len >= flush_data_size:
                            os.fsync(target_handle.fileno())
                            flush_data_len = 0

                    loop_write_time = time.time()
                    if loop_write_time - start_loop_time >= update_interval:
                        loop_speed = loop_write_len / (loop_write_time - start_loop_time)
                        remaining_time = (file_size - offset) / loop_speed if loop_speed > 0 else float('inf')
                        common.print_with_clear('文件总大小: {}, 已拷贝: {} ({:.2f} %), 速率: {}/s, 预计剩余时间: {:.2f} 秒'.
                                                format(common.to_human_readable(file_size),
                                                       common.to_human_readable(offset),
                                                       100.0 * offset / file_size,
                                                       common.to_human_readable(loop_speed), remaining_time))
                        start_loop_time = loop_write_time
                        loop_write_len = 0

                    # 写完文件长度后，直接返回
                    if file_size <= offset:
                        common.logger.info("stripe_entries: {} write offset: {} completely".format(stripe_entries, offset))
                        return

                # 数据处理完成，需要置空
                file_datas = []
        finally:
            for handle in file_handles:
                handle.close()
            os.fsync(target_handle.fileno())


def stripe_join_for_all_shards(first_gfid, stripe_width, stripe_size, shard_size, file_size, recovery_dir):
    nfs_mount_file_dir = '{}/file_backup'.format(recovery_dir)
    if not os.path.exists(nfs_mount_file_dir):
        common.logger.info('try to mkdir: {}'.format(nfs_mount_file_dir))
        os.mkdir(nfs_mount_file_dir)

    recovery_file_path = os.path.join(nfs_mount_file_dir, first_gfid)
    recovery_file_path += '.qcow2'  # 默认恢复出来的文件，都是qcow2文件
    if os.path.exists(recovery_file_path):
        common.logger.warn('recovery_file_path: {} is exist, and try to remove and recreate'.format(recovery_file_path))
        # 文件已经存在。将文件删除并重新创建
        os.remove(recovery_file_path)

    with open(recovery_file_path, 'a+') as f:
        common.logger.info('try to create and truncate file: {} to size: {}'.format(recovery_file_path, file_size))
        f.truncate(file_size)

    nfs_mount_shard_dir = '{}/shard_backup'.format(recovery_dir)
    entries = os.listdir(nfs_mount_shard_dir)
    sorted_entries = []
    for entry in entries:
        if entry.split('_')[0] == first_gfid:
            sorted_entries.append(entry)

    sorted_entries = sorted(sorted_entries, key=lambda x: int(x.split('_')[1]))
    if not sorted_entries or len(sorted_entries) % stripe_width != 0:
        cmd_error = 'failed to first_gfid: {}, entries num: {} is invalid'.format(first_gfid, len(sorted_entries))
        raise common.CmdError(cmd_error)

    print common.Colored().cyan('\n恢复文件: {}, 拷贝分片: {}'.format(recovery_file_path, sorted_entries))
    last_shard_idx = int(sorted_entries[-1].split('_')[1])  # 获取最后一个分片ID
    stripe_idx = 0
    stripe_entries = []
    for shard_idx in range(last_shard_idx + 1):
        entry = '{}_{}'.format(first_gfid, shard_idx)  # 中间可能存在条带组数据不存在
        stripe_entries.append(entry)
        if len(stripe_entries) == stripe_width:
            offset = stripe_idx * stripe_width * shard_size
            stripe_join_for_shard(stripe_entries, stripe_size, shard_size, offset, nfs_mount_shard_dir,
                                  recovery_file_path, file_size)
            stripe_entries = []
            stripe_idx += 1
    print common.Colored().cyan('\n恢复文件: {} 完成，手动验证文件数据正确后，需要删除该文件'.format(recovery_file_path))


def file_recovery_filename_3x(file_path, volume_name, replicate, online_bricks):
    if not isinstance(file_path, str) or not file_path.startswith('/'):
        # filename表示文件路径，一定是'/'开头
        common.logger.error('failed to supported, filename: {} is not path'.format(file_path))
        return -1

    common.logger.info('try to recovery file_path: {}'.format(file_path))
    # 去掉开头的'/'
    file_path = file_path[1:]
    color = common.Colored()

    force_save_local = False #是否只能拷贝到本地存储
    # 锁内执行文件修复
    lock_file = common.get_vsfire_lock_file(file_path)
    with common.VsfireFlock(lock_file) as lock:
        from modules.file_recovery.show_fault_files import get_file_size_3x
        file_size = get_file_size_3x(file_path, replicate, online_bricks)
        statvfs = os.statvfs(common.get_vs_mount_path(volume_name))
        available_size = statvfs.f_bavail * statvfs.f_frsize
        # 文件大小需要大于2倍卷可用大小
        if file_size >= available_size / 2:
            common.logger.warn('failed to recovery, file_size: {}, available_size: {}'.
                                format(common.to_human_readable(file_size), common.to_human_readable(available_size)))
            force_save_local = True

        # 获取恢复路径 (输入的版本号，只要不是vs2.6以下就可以)
        recovery_dir = check_and_get_recovery_dir(common.VS_VERSION_3_0, volume_name, file_path, file_size, available_size, force_save_local)

        print color.cyan('开始修复文件: {}'.format(file_path))
        # 获取文件所有的分片及路由列表
        shards_chosen = common.get_file_shards_3x(file_path, replicate, online_bricks)
        if not shards_chosen:
            common.logger.error('failed to get chosen shards, file_path: {}'.format(file_path))
            return -1

        common.logger.info('success to get shards_chosen: {}'.format(shards_chosen))
        print color.cyan('恢复分片列表:')
        print color.cyan(', '.join(shard['gfid'] for shard in shards_chosen))

        # 获取可以拷贝的数据副本
        bricks_chosen = []
        for shard in shards_chosen:
            gfid = shard['gfid']
            route = shard['route']
            # 获取右子树，可用于恢复数据的brick
            brick_chosen = get_shard_right_tree_brick(gfid, route, replicate, online_bricks)
            if not brick_chosen:
                common.logger.error('failed to get chosen brick, gfid: {}'.format(gfid))
                return -1
            bricks_chosen.append(brick_chosen)

        common.logger.info('success to get bricks_chosen: {}'.format(bricks_chosen))
        print color.cyan('恢复分片拷贝的副本:')
        for brick in bricks_chosen:
            print color.cyan('host: {}, brick_path: {}'.format(brick['host'], brick['path']))

        if len(shards_chosen) != len(bricks_chosen):
            common.logger.error('shards_chosen len: {} not equal bricks_chosen len: {}'.
                                format(len(shards_chosen), len(bricks_chosen)))
            return -1

        efs_mount_magic = common.calculate_str_md5(file_path)
        try:
            # 回刷所有brick的缓存
            for shard_idx in range(len(bricks_chosen)):
                kickout_for_all_shards(volume_name, shards_chosen[shard_idx]['gfid'], bricks_chosen[shard_idx])

            # 将所有的efs挂载出来
            for shard_idx in range(len(bricks_chosen)):
                common.mount_efs_path(bricks_chosen[shard_idx], efs_mount_magic)

            # 等待3秒，等待所有的efs都挂载完成
            # time.sleep(3)

            # 拷贝所有的选择的分片到vs存储内
            for shard_idx in range(len(bricks_chosen)):
                real_shard_idx = shards_chosen[shard_idx]['shard_idx']  # 获取shard真实的idx
                copy_data_for_all_shards(shards_chosen[0]['gfid'], shards_chosen[shard_idx]['gfid'],
                                         real_shard_idx, bricks_chosen[shard_idx], efs_mount_magic, recovery_dir)
            # 等待3秒，等待所有远端nfs挂载点flush完成数据
            time.sleep(3)

            # 获取分片条带信息
            from modules.file_recovery.show_fault_files import get_file_parm_3x
            (stripe_width, stripe_size, shard_size, file_size) = \
                get_file_parm_3x(shards_chosen[0]['gfid'], bricks_chosen[0])
            print color.cyan('获取文件条带信息: 条带宽度: {}, 条带大小: {}, 分片大小: {}, 文件大小: {}'.
                             format(stripe_width, stripe_size, shard_size, file_size))

            # 基于条带策略，将拷贝后的分片数据合并成文件数据
            stripe_join_for_all_shards(shards_chosen[0]['gfid'], stripe_width, stripe_size, shard_size, file_size, recovery_dir)

            # 卸载所有efs本地挂载点
            for shard_idx in range(len(bricks_chosen)):
                common.umount_efs_path(bricks_chosen[shard_idx], efs_mount_magic)

            # 清理分片备份目录
            clean_temp_shards(shards_chosen[0]['gfid'], recovery_dir)

            print color.cyan('文件: {}，修复完成'.format(file_path))
            return 0
        except:
            common.logger.error('failed to recovery file: {}, got except:{}'.format(file_path, traceback.format_exc()))

        # 卸载所有efs本地挂载点
        for shard_idx in range(len(bricks_chosen)):
            common.umount_efs_path(bricks_chosen[shard_idx], efs_mount_magic)
        # 清理分片备份目录
        clean_temp_shards(shards_chosen[0]['gfid'], recovery_dir)
    print color.red('文件: {}，修复失败'.format(file_path))
    return -1


def file_recovery_vmid_3x(vmid, split_brain_files, volume_name, replicate, online_bricks):
    result = 0

    # 没有脑裂文件，不需要修复
    if not split_brain_files:
        common.logger.info('failed to find split_brain_files')
        return 0

    vm_dir_path = common.vs_get_vm_path_byvmid(vmid)
    print common.Colored().cyan('开始修复虚拟机路径: {}'.format(vm_dir_path))
    nfs_vm_dir_path = ''
    if volume_name in vm_dir_path:
        # 去掉卷前缀
        nfs_vm_dir_path = vm_dir_path.replace(common.get_vs_mount_path(volume_name), '')
    else:
        composite_volumes = common.get_composite_volumes()
        for composite_volume in composite_volumes:
            if composite_volume in vm_dir_path:
                # 去掉卷前缀
                nfs_vm_dir_path = vm_dir_path.replace(common.get_vs_mount_path(composite_volume), '')
                # 复用卷场景，转化成相对主卷的路径
                nfs_vm_dir_path = '/.volume/{}{}'.format(composite_volume, nfs_vm_dir_path)
                break

    if not nfs_vm_dir_path:
        common.logger.error('failed to get nfs vm dir path')
        return -1

    filenames = common.vs_listdir_3x(nfs_vm_dir_path, replicate, online_bricks)
    for filename in filenames:
        file_path = os.path.join(nfs_vm_dir_path, filename)
        if file_path not in split_brain_files:
            # 当前文件不在脑裂文件中，说明当前文件不脑裂，不需要修复
            common.logger.info('file_path: {} not need recovery'.format(file_path))
            continue

        if file_recovery_filename_3x(file_path, volume_name, replicate, online_bricks):
            result = -1
    return result


def file_recovery_3x(version, filename):
    volume_name, hosts, replicate_num, has_arbiter, bricks, replicate = common.get_vs_cluster_info()
    if not volume_name:
        common.logger.error('failed to supported, cannot find volume name')
        return -1

    # 两主机环境，使用2.x的恢复方法
    if common.vs_is_two_host(hosts):
        return file_recovery_2x(version, filename)

    # 非三主机或没有开启仲裁的卷不支持
    if not common.vs_has_efs(version, hosts):
        # 至少要支持3主机
        common.logger.error('failed to supported, numbers of hosts is {}'.format(len(hosts)))
        return -1

    online_bricks = common.get_online_bricks(volume_name)
    if common.fault_point_result() or not online_bricks:
        # 至少有一个在线brick
        common.logger.error('failed to supported, cannot find online_bricks')
        return -1
    
    # VS3.1以内的版本，环境中的efs_standalone有内存泄漏问题，增加提示需要使用vsfire内重新编译的
    if version < common.VS_VERSION_3_1 and platform.machine().startswith("x86"):
        cmdline = 'vs_cluster_cmd.sh d {} /sf/vs/sbin/efs_standalone'.format(os.path.join(os.path.dirname(__file__), 'efs_standalone'))
        readline = '注意: 当前版本的efs_standalone工具存在内存泄漏. \n请手动执行命令: {} 完成工具替换(分发注意并发问题)\n'.format(cmdline)
        # common.check_terminal_input(readline)
        print common.Colored().fuchsia('{}'.format(readline))

    result = 0
    if not filename:
        # 如果输入参数为空，表示自动处理所有脑裂虚拟机
        readline = '是否开始扫描所有脑裂虚拟机，输入\'y\'继续，\'n\'退出'
        common.check_terminal_input(readline)

        import pylib.rpcservice as rpcservice
        all_shards = rpcservice.route_list_all_shards(volume_name)
        if not all_shards:
            common.logger.error('failed to supported, cannot find all_shards')
            return -1

        # 获取脑裂文件与虚拟机信息
        from modules.file_recovery.show_fault_files import get_split_brain_files_3x
        split_brain_files, split_brain_vms = get_split_brain_files_3x(all_shards)

        # 打印脑裂文件与虚拟机信息
        from modules.file_recovery.show_fault_files import print_split_brain_files_3x
        if split_brain_vms:
            print_split_brain_files_3x(version, hosts, replicate, online_bricks, split_brain_files, split_brain_vms)
        else:
            print common.Colored().red('找不到脑裂虚拟机')
            common.logger.error('failed to find split-brain vmids')
            return -1

        readline = '是否开始修复所有脑裂虚拟机，输入\'y\'继续，\'n\'退出'
        common.check_terminal_input(readline)

        for split_brain_vmid, vm_name in split_brain_vms.items():
            if file_recovery_vmid_3x(split_brain_vmid, split_brain_files, volume_name, replicate, online_bricks):
                print common.Colored().red('修复虚拟机vmid: {}，失败'.format(split_brain_vmid))
                result = -1

        return result
    elif isinstance(filename, int) and not isinstance(filename, bool):
        # 如果有值，且是数字，表示虚拟机VMID

        import pylib.rpcservice as rpcservice
        all_shards = rpcservice.route_list_all_shards(volume_name)
        if not all_shards:
            common.logger.error('failed to supported, cannot find all_shards')
            return -1

        # 获取脑裂文件与虚拟机信息
        from modules.file_recovery.show_fault_files import get_split_brain_files_3x
        split_brain_files, split_brain_vms = get_split_brain_files_3x(all_shards)
        if not split_brain_vms:
            print common.Colored().red('找不到脑裂虚拟机')
            common.logger.error('failed to find split-brain vmids')
            return -1
        for split_brain_vmid, vm_name in split_brain_vms.items():
            if int(split_brain_vmid) == filename:
                return file_recovery_vmid_3x(split_brain_vmid, split_brain_files, volume_name, replicate, online_bricks)

        print common.Colored().red('虚拟机vmid: {}，访问正常，不需要修复'.format(filename))
        common.logger.error('failed to recovery vmid: {}'.format(filename))
        return -1
    else:
        # 默认是文件路径，文件路径必须以'/'开头
        return file_recovery_filename_3x(filename, volume_name, replicate, online_bricks)


def _file_recovery(filename):
    version = common.get_vs_version()
    if not common.is_vs_version_valid(version):
        print common.Colored().red('执行失败')
        return -1

    if version < common.VS_VERSION_3_0:
        ret = file_recovery_2x(version, filename)
    else:
        ret = file_recovery_3x(version, filename)

    if ret:
        print common.Colored().red('执行失败')
    else:
        print common.Colored().cyan('执行成功')
    return ret
