#!/usr/bin/env python
# -- coding: utf-8 --
# 调整代码结构
# 将此文件内接口为纯粹数据处理，不应加任何控制台提示信息
import os
import subprocess
import sys
import time

from pylib.utils import color
from shard_data_repair import _brickid2brickinfo, _get_cluster_info_3, Brick, _get_cluster_info_2, \
    _remove_xattr

from pylib.fault_recover.shard_data_repair import _get_brick_meta_route, _all_vmid, _vmid2gfid, _gfids_in_file, \
    _get_good_routes, _getfattr_from_meta, _create_and_fill_arbiter, _setfattr_to_meta, _set_route_to_ebd, \
    _modify_cache_route_info, _gfid2route, _brickid2brickinfo, _get_cluster_info_3
from pylib.utils.utiltools import CmdError, remote_cli, logger, cli

mnt_dir = '/mnt/efs_badblocks_heal'
badblocks_dir = '/root/badblocks'


def _vs_getattr_from_hdd(brickid, gfid):
    # """
    # 查看bad
    # :param brickid:
    # :param gfid:
    # :return:
    # """
    try:
        info = _brickid2brickinfo(brickid)
        cmd = '/sf/vs/bin/getfattr -d -m . -e hex \"{}/{}\" 2>/dev/null'.format(info['path'], gfid)
        ret = remote_cli(info['host'], cmd, split=True)
        return ret
    except subprocess.CalledProcessError as e:
        logger.error("exec {} failed, retcode:{} output:{}".format(cmd, e.returncode, e.output))
    except Exception as e:
        raise e


def _vs_getattr_from_ebd(brickid, gfid):
    # """
    # 查看bad
    # :param brickid:
    # :param gfid:
    # :return:
    # """
    try:
        info = _brickid2brickinfo(brickid)
        cmd = "/sf/vs/sbin/efs_dbg -p {} -c 'inode xattr {}'".format(info['pid'], gfid)
        ret = remote_cli(info['host'], cmd, split=True)
        return ret
    except subprocess.CalledProcessError as e:
        logger.error("exec {} failed, retcode:{} output:{}".format(cmd, e.returncode, e.output))
    except Exception as e:
        raise e


def _set_bad(brickid, gfid):
    # """数据副本打bad"""
    info = _brickid2brickinfo(brickid)
    cmd = "/sf/vs/sbin/efs_dbg -p {} -c 'inode xattr set {} trusted.file_status=bad'".format(info['pid'], gfid)
    ret = remote_cli(info['host'], cmd)
    return ret


def _remove_bad(brickid, gfid):
    # """
    # 强制移除bad
    # :param brickid: 对应分片所在磁盘
    # :param gfid: 对应文件gfid
    # :return:
    # """
    _, cluster_info = _get_cluster_info_3()
    temp_brick = None
    for node in cluster_info:
        # print(str(node))
        if node.id == brickid:
            temp_brick = node
            break
    if temp_brick is None:
        print(color.red("找不到对应brick，请重新输入brick id!"))

    info = _brickid2brickinfo(brickid)

    if temp_brick.type == "arbiter":
        path = temp_brick.path
        path += "/.glusterfs/" + gfid[0:2] + "/" + gfid[2:4] + "/" + gfid
        ret = _remove_xattr(_host=temp_brick.hostname, _remove="trusted.file_status", _file=path)
    elif temp_brick.type == "data":
        cmd = "/sf/vs/sbin/efs_dbg -p {} -c 'inode xattr remove {} trusted.file_status'".format(info['pid'], gfid)
        ret = remote_cli(info['host'], cmd)
    else:
        print ("错误的磁盘类型，请选择数据盘或者仲裁数据盘")
        ret = False
    return ret


def _get_set_bad_time(gfid):
    # """
    # gfid查找对应最近log,example:
    # /sf/log/today/vs/log/glusterfs/glusterfs_nfs.log:71426:[2022-04-15 17:02:20.207917]
    #   E [afr-self-heal-data.c:2826:afr_sh_data_set_bad]
    #  0-f1e3bb31_vs_vol_rep3-replicate-1: <gfid:a72b30bc-e19a-4960-981b-ffa1ddd3fba3>
    #  sh_id:3196593,
    #  gfid = a72b30bc-e19a-4960-981b-ffa1ddd3fba3,
    #   set bad to index = 1
    # :return:
    # """

    def get_last_log(_cmd):
        _last_log = None
        p = os.popen(_cmd)
        while True:
            log_info = p.readline()
            if not log_info:
                break
            _last_log = log_info
        return _last_log

    cmd = r"grep 'set bad to index = 1' /sf/log/*/vs/log/glusterfs/glusterfs_nfs.log|grep {}".format(gfid)
    last_log = get_last_log(cmd)
    if not last_log or gfid not in last_log:
        # 最近没有打bad记录就到历史log里去找
        cmd = r"zgrep 'set bad to index = 1' /sf/log/[1-9]\.tar\.gz|grep {}".format(gfid)
        last_log = get_last_log(cmd)

    if not last_log or gfid not in last_log:
        cmd = r"zgrep 'set bad to index = 1' /sf/log/[1-9][1-9]\.tar\.gz|grep {}".format(gfid)
        last_log = get_last_log(cmd)

    if not last_log or gfid not in last_log:
        last_log = "没有找到gfid:{}的历史打bad日志".format(gfid)
    return last_log


def _is_wcc_dirty(host, brickpath, path):
    try:
        cmdline = '/sf/vs/bin/getfattr -d -m. -e hex {0:s}'.format(os.path.join(brickpath, path))
        result = remote_cli(host, cmdline)
        for line in result:
            if line.split('=')[0] == 'user.glusterfs.wcache' and line.split('=')[1] != '0x0000000000000000':
                return True
    except CmdError as e:
        logger.error("Failed to getfattr, reason: %s", str(e))
    return False


def _is_tier_dirty_2(host, brickpath, path):
    try:
        cmdline = '/sf/vs/bin/getfattr -d -m. -e hex {0:s}'.format(os.path.join(brickpath, path))
        result = remote_cli(host, cmdline)
        for line in result:
            if line.split('=')[0] == 'user.glusterfs.tier_status' and line.split('=')[1] != '0x0000000000000000':
                return True
    except Exception as e:
        logger.error("Failed to getfattr, reason: %s", str(e))
    return False


def _is_tier_dirty_3(host, brickpath, path):
    try:
        cmdline = '/sf/vs/bin/getfattr -d -m. -e hex {0:s}'.format(os.path.join(brickpath, path))
        result = remote_cli(host, cmdline)
        for line in result:
            if line.split('=')[0] == 'user.glusterfs.tier_status' and line.split('=')[1] != '0x0000000000000000':
                return True
    except CmdError as e:
        logger.error("Failed to getfattr, reason: %s", str(e))
    return False


def restore_wcache_config(volume):
    try:
        cmdline = 'gluster v reset {} performance.wcc-wbforce'.format(volume)
        cli(cmdline)
        cmdline = 'gluster v reset {} performance.wcc-delay-time'.format(volume)
        cli(cmdline)
    except CmdError as e:
        logger.error("Failed to restore wcache config, reason: %s", str(e))
        return -1
    return 0


def restore_tier_config(volume):
    try:
        cmdline = 'gluster v reset {} performance.st-bypass'.format(volume)
        cli(cmdline)
    except CmdError as e:
        logger.error("Failed to restore tier config, reason: %s", str(e))
        return -1
    return 0


def _force_clean_wcache(volume, host, brickpath, path):
    try:
        cmdline = '/sf/vs/glusterfs/sbin/gluster v set {} performance.wcc-wbforce on'.format(volume)
        cli(cmdline)
        cmdline = '/sf/vs/glusterfs/sbin/gluster v set {} performance.wcc-delay-time 0'.format(volume)
        cli(cmdline)
        while True:
            if not _is_wcc_dirty(host, brickpath, path):
                restore_wcache_config(volume)
                return 0
            time.sleep(3)
    except CmdError as e:
        logger.error("Failed to set wcc-wbforce, reason: %s", str(e))

    restore_wcache_config(volume)
    return -1


def _force_clean_tier_2(volume, host, brickpath, gfid, path):
    while True:
        try:
            # 执行失败也不管，只要后面的检查能够成功就可以
            cmdline = '/sf/vs/bin/vs_tier_cli.py -c kickout -a brick_id={},gfid={}'.format(brickpath, gfid)
            remote_cli(host, cmdline)
        except Exception as e:
            logger.warn("Failed to kickout brickpath: %s, reason: %s", brickpath, str(e))

        if not _is_tier_dirty_2(host, brickpath, path):
            return 0
        time.sleep(3)
    return -1


def _force_clean_tier_3(volume, host, brickpath, gfid):
    while True:
        try:
            # 执行失败也不管，只要后面的检查能够成功就可以
            cmdline = '/sf/vs/bin/vs_tier_cli.py -c kickout -a brick_id={},gfid={}'.format(brickpath, gfid)
            remote_cli(host, cmdline)
        except CmdError as e:
            logger.warn("Failed to kickout brickpath: %s, reason: %s", brickpath, str(e))

        if not _is_tier_dirty_3(host, brickpath, gfid):
            return 0
        time.sleep(3)
    return -1


def set_bad_sectors():
    pass


def _get_disk_bad_sectors_2(host, brick_path):
    # """vs2.x 获取对应磁盘所有的坏道位置"""
    # path与gfid是否有关系？

    time.sleep(5)
    cmd = 'badblocks -v -b 4096 {} >/tmp/badblocks_vs2x.log'.format(brick_path)
    print(color.cyan("扫描挂载路径的坏道：{}".format(cmd)))
    offests = remote_cli(host, cmd)
    print(color.cyan("坏道偏移：{}".format(offests)))

    return offests.split()


def _get_disk_bad_sectors_3(host, brick_path, gfid):
    # """获取对应磁盘所有的坏道位置"""
    # 1.从brick日志中，找出坏道位置的文件和对应的偏移
    # 2.对应log在efs.c static void efs_preadv_handler(efs_stub_t *stub)
    # cmd = r'grep "op_errno\: 5" /sf/log/today/vs/log/glusterfs/bricks/ -nr'
    # color.cyan("请使用{}根据EFS:xxxx:gfid自行判断坏道位置、对应gfid".format(cmd))

    cmd = '/sf/vs/sbin/efs_standalone -i ulvm -p /dev/{}/lv_efs /mnt/efs_badblocks_heal -o readonly' \
        .format(brick_path.split('/')[5])
    print(color.cyan("对应主机进行只读挂载：{}".format(cmd)))
    remote_cli(host, cmd)

    # 执行挂载命令之后无法立刻做坏道扫描，必须等待挂载完成后再执行
    # sleep(5)只是临时测试做法
    time.sleep(5)
    cmd = 'badblocks -v -b 4096 -c 1 -s {}/{} > /tmp/badblocks.log'.format(mnt_dir, gfid)
    print(color.cyan("扫描挂载路径的坏道：{}".format(cmd)))
    offests = remote_cli(host, cmd)
    print(color.cyan("坏道偏移：{}".format(offests)))

    time.sleep(5)
    cmd = "umount {}".format(mnt_dir)
    print(color.cyan("去掉临时挂载点"))
    remote_cli(host, cmd)

    return offests.split()


def clear_wcahe_3(volume, source, dest, gfid):
    # """强制清空写缓存"""
    _, bricks = _get_cluster_info_3()
    src_brick = Brick()
    dst_brick = Brick()
    for brick in bricks:
        if brick.id == source:
            src_brick = brick
        elif brick.id == dest:
            dst_brick = brick

    print(color.cyan('1. 强制清空写缓存数据'))
    if _is_wcc_dirty(src_brick.hostname, src_brick.path, gfid):
        ret = _force_clean_wcache(volume, src_brick.host, src_brick.path, gfid)
        if ret != 0:
            print(color.red('*' * 20 + '[失败]' + '*' * 20))
            return ret
    if _is_wcc_dirty(dst_brick.hostname, dst_brick.path, gfid):
        ret = _force_clean_wcache(volume, dst_brick.hostname, dst_brick.path, gfid)
        if ret != 0:
            print(color.red('*' * 20 + '[失败]' + '*' * 20))
            return ret
    print(color.cyan('*' * 20 + '[成功]' + '*' * 20))


def clear_tier_3(volume, source, dest, gfid):
    # """强制清空分层数据"""
    _, bricks = _get_cluster_info_3()
    src_brick = Brick()
    dst_brick = Brick()
    for brick in bricks:
        if brick.id == source:
            src_brick = brick
        elif brick.id == dest:
            dst_brick = brick

    print(color.cyan('2. 强制清空分层数据'))
    if _is_tier_dirty_3(src_brick.hostname, src_brick.path, gfid):
        ret = _force_clean_tier_3(volume, src_brick.hostname, src_brick.path, gfid)
        if ret != 0:
            print(color.red('*' * 20 + '[失败]' + '*' * 20))
            return ret
    if _is_tier_dirty_3(dst_brick.hostname, dst_brick.path, gfid):
        ret = _force_clean_tier_3(volume, dst_brick.hostname, dst_brick.path, gfid)
        if ret != 0:
            print(color.red('*' * 20 + '[失败]' + '*' * 20))
            return ret
    print(color.cyan('*' * 20 + '[成功]' + '*' * 20))


def try_badblocks_heal_2(gfid, src_host, src_path, dst_host, dst_path, path,
                         dev, offset, length, badblocks_dir, force):
    try:
        src_dev = ''
        src_offset = offset * length
        cmd = '/bin/cat {}'.format(os.path.join(dst_path, path))
        result = remote_cli(dst_host, cmd)
        for line in result:
            if line.split(':')[0] == 'SHARD':
                # 目标端找到分片号，直接退出
                if int(line.split(':')[-4]) == int(dev.split('.')[1]):
                    break
                else:
                    src_offset += int(line.split(':')[-2]) * (128 * 1024 * 1024)

        cmd = '/bin/cat {}'.format(os.path.join(src_path, path))
        result = remote_cli(src_host, cmd)
        for line in result:
            if line.split(':')[0] == 'SHARD':
                if src_offset >= int(line.split(':')[-2]) * (128 * 1024 * 1024):
                    src_offset -= int(line.split(':')[-2]) * (128 * 1024 * 1024)
                else:
                    src_vg_gfid = '{}_{}.{}'.format(src_path.split('/')[5],
                                                    dev.split('_')[1].split('.')[0], int(line.split(':')[-4]))
                    cmd = "/sbin/lvs --unit b --nosuffix | /bin/grep '{}' | /usr/bin/sort -t '.' -k 2 -b -n" \
                        .format(src_vg_gfid)
                    result = remote_cli(src_host, cmd)
                    src_dev = '/dev/{}/{}'.format(result[0].split()[1], result[0].split()[0])
                    break

        src_offset = src_offset / length
        tmp_file = '{}/{}_{}_{}'.format(badblocks_dir, gfid, offset, length)
        # 读源位置数据
        cmd = '/bin/dd if={} of={} iflag=direct bs={} count=1 skip={} conv=notrunc' \
            .format(src_dev, tmp_file, length, src_offset)
        print(color.cyan('{} {}'.format(src_host, cmd)))
        remote_cli(src_host, cmd)

        # 把数据拷贝到目标主机
        cmd = '/usr/bin/scp -r {} root@{}:{}'.format(tmp_file, dst_host, badblocks_dir)
        remote_cli(src_host, cmd)

        cmd = '/bin/dd of={} if={} oflag=direct bs={} count=1 seek={} conv=notrunc' \
            .format(dev, tmp_file, length, offset)
        print(color.cyan('{} {}'.format(dst_host, cmd)))

        while not force:
            print(color.cyan('输入y确认是否坏道修复！！！'))
            step = sys.stdin.readline().strip('\n')
            if step.lower() == 'y':
                break
            else:
                print(color.red('输入字符错误，请重新输入'))

        remote_cli(dst_host, cmd)
        return 0
    except Exception as e:
        logger.error("Failed to heal badblocks, reason: %s", str(e))
    return -1


def try_badblocks_heal_3(gfid, src_brick_host, dst_brick_host, offset, length):
    try:
        tmp_file = '{}/{}_{}_{}'.format(badblocks_dir, gfid, offset, length)

        # 读源位置数据
        cmd = '/bin/dd if={}/{} of={} iflag=direct bs={} count=1 skip={} conv=notrunc' \
            .format(mnt_dir, gfid, tmp_file, length, offset)
        print(color.cyan('{} {}'.format(src_brick_host, cmd)))
        remote_cli(src_brick_host, cmd)

        # 把数据拷贝到目标主机
        cmd = '/usr/bin/scp -r {} root@{}:{}'.format(tmp_file, dst_brick_host, badblocks_dir)
        remote_cli(src_brick_host, cmd)

        cmd = '/bin/dd of={}/{} if={} oflag=direct bs={} count=1 seek={} conv=notrunc' \
            .format(mnt_dir, gfid, tmp_file, length, offset)
        print(color.cyan('{} {}'.format(dst_brick_host, cmd)))

        while True:
            print(color.cyan('数据拷贝完成后，输入y确认进行坏道修复！！！'))
            step = sys.stdin.readline().strip('\n')
            if step.lower() == 'y':
                break
            elif step.lower() == 'n':
                print(color.red('放弃坏道修复'))
                return 0
            else:
                print(color.red('输入字符错误，请重新输入'))

        remote_cli(dst_brick_host, cmd)
        return 0
    except CmdError as e:
        logger.error("Failed to heal badblocks, reason: %s", str(e))
    return -1


def _fix_bad_sectors_init(src_host, dst_host):
    # 确保源挂载点目录存在
    cmd = '/bin/mkdir -p {}'.format(mnt_dir)
    remote_cli(src_host, cmd)
    remote_cli(dst_host, cmd)
    # 确保保存坏道数据文件目录存在
    cmd = '/bin/mkdir -p {}'.format(badblocks_dir)
    remote_cli(src_host, cmd)
    remote_cli(dst_host, cmd)


def _fix_disk_bad_sectors_2(dev, offsets, length=512):
    # 遍历所有复制组，判断文件是否存在，如果存在多个复制组，需要指定复制组
    (hosts, has_arbiter, replicate_num, replicate, bricks) = _get_cluster_info_2()
    dst_vg = dev.split('/')[2]
    src_host = ''
    dst_host = ''
    src_path = ''
    dst_path = ''
    gfid = dev.split('/')[3].split('_')[1]
    gfid = '{}-{}-{}-{}-{}'.format(gfid[0:8], gfid[8:12], gfid[12:16], gfid[16:20], gfid[20:32])
    path = '.glusterfs/{}/{}/{}'.format(gfid[0:2], gfid[2:4], gfid)
    for k, v in replicate.items():
        # 只判断0号与1号副本
        if dst_vg in v[0].split(':')[2]:
            src_host = v[1].split(':')[1]
            src_path = v[1].split(':')[2]
            dst_host = v[0].split(':')[1]
            dst_path = v[0].split(':')[2]
            break
        if dst_vg in v[1].split(':')[2]:
            src_host = v[0].split(':')[1]
            src_path = v[0].split(':')[2]
            dst_host = v[1].split(':')[1]
            dst_path = v[1].split(':')[2]
            break

    if src_path == '' or dst_path == '':
        logger.error('Failed to get src path or dst path')
        return -1

    # 确保保存坏道数据文件目录存在
    badblocks_dir = '/root/badblocks'
    cmd = '/bin/mkdir -p {}'.format(badblocks_dir)
    remote_cli(src_host, cmd)
    remote_cli(dst_host, cmd)

    print(color.cyan('VS2.X 场景下修复指定位置的逻辑坏道'))
    print(color.cyan('开始坏道修复, 文件：{}, 修复大小：{}'.format(dev, length)))
    print(color.cyan('src host: {} path: {}'.format(src_host, src_path)))
    print(color.cyan('dst host: {} path: {}'.format(dst_host, dst_path)))
    print(color.cyan('坏道偏移 {}'.format(offsets)))
    try:
        cmdline = '/sf/vs/glusterfs/sbin/gluster vol list'
        volume = cli(cmdline)[0]
    except Exception as e:
        logger.error("Failed to get volume, reason: %s", str(e))
        return -1

    if volume == '':
        logger.error("Failed to get volume: {}").format(volume)
        return -1

    print(color.cyan('1. 强制清空写缓存数据'))
    if _is_wcc_dirty(src_host, src_path, path):
        ret = _force_clean_wcache(volume, src_host, src_path, path)
        if ret != 0:
            print(color.red('*' * 20 + ' [失败]' + '*' * 20))
            return ret
    print(color.cyan('*' * 20 + ' [成功]' + '*' * 20))

    print(color.cyan('2. 强制清空分层数据'))
    if _is_tier_dirty_2(src_host, src_path, path):
        ret = _force_clean_tier_2(volume, src_host, src_path, gfid, path)
        if ret != 0:
            print(color.red('*' * 20 + ' [失败]' + '*' * 20))
            return ret
    print(color.cyan('*' * 20 + ' [成功]' + '*' * 20))

    print(color.cyan('3. 开始坏道修复'))
    for offset in offsets:
        ret = try_badblocks_heal_2(gfid, src_host, src_path, dst_host, dst_path, path,
                                   dev, offset, length, badblocks_dir, force=False)
        if ret:
            print(color.red('*' * 20 + ' [失败]' + '*' * 20))
            return ret
    print(color.cyan('*' * 20 + ' [成功]' + '*' * 20))
    print(color.cyan('坏道修复完成！！！'))


def _fix_disk_bad_sectors_3(gfid, src_brick, dst_brick, length=512):
    if not src_brick.get('bad_sector_blocks'):
        print(color.red("没有输入偏移！无法修复"))
        return

    # 3.1 回刷分层数据
    # 协调有坏道的虚拟机关机，回刷待处理分片文件的缓存数据，两个副本都要回刷，回刷完成之后，需要将坏道盘对应的brick杀掉且不允许拉起来。
    # （杀brick前，需要用rpc_tool检查副本，看杀这个brick会影响哪些虚拟机，除了坏道虚拟机，如果还有别的虚拟机也检查不通过，同样需要协调关机）
    # 找到两个数据副本所在brick， 刷掉缓存并检查
    cmd = r'/sf/vs/bin/vs_rpc_tool --cmd file_pathinfo --file {} | grep tier'.format(gfid)

    print(color.red("请确认brick src和dst的所有虚拟机都移走"))
    time.sleep(5)
    # 3.2 停掉坏道brick服务
    # 没问题就把 好副本（坏道盘） 所在主机 的glusterfsd 移走
    # kill掉 好副本的坏道盘的进程（`kill -SIGSTOP ${坏道brick的supervise}`， `kill -9 ${坏道brick}`）
    print(color.red("即将关闭坏道副本所对应glusterfsd服务"))
    time.sleep(5)
    # 再次检查分层刷干净了，就把glusterfsd kill掉
    # （需要杀brick是因为efs不允许多个客户端同时可写，因为修坏道需要通过步骤2中的挂载点写数据，那brick就不能存在）

    # 3.3 挂载2个副本
    # 将无坏道副本的efs用只读挂载，有坏道副本的efs用读写挂载（不加readonly参数，之前已经挂载过了，所以要先卸载再挂载）
    print(color.red("确保修复副本和源端都已经挂载两个挂载点"))
    # """挂载源端和目的端到对应本地"""

    cmd = '/sf/vs/sbin/efs_standalone -i ulvm -p /dev/{}/lv_efs /mnt/efs_badblocks_heal -o readonly' \
        .format(src_brick.get('path').split('/')[5])
    remote_cli(src_brick.get('host'), cmd)
    time.sleep(5)

    cmd = '/sf/vs/sbin/efs_standalone -i ulvm -p /dev/{}/lv_efs /mnt/efs_badblocks_heal -o readonly' \
        .format(dst_brick.get('path').split('/')[5])
    remote_cli(dst_brick.get('host'), cmd)
    time.sleep(5)

    # 3.4 读取好道副本对应偏移数据
    # （最好校验下坏道位置附近的数据md5，看2副本这些位置的数据是否一致。 --不一致也没办法，还是要修坏道）
    # （把之前坏道扫描的结果放到一个txt中）开始从坏副本里面备数据，并拷到好副本主机上
    # `dd if=${好道brick挂载点}/${坏道文件}
    # of=/sf/data/local/workdir/${坏道文件}.bad.${坏道n} bs=4096 count=1 skip=${坏道n} iflag=direct`

    # 3.5 将读出来的数据发送到对应坏道副本的主机上

    # 3.6 将数据写到对应的文件的坏道上
    # 最重要的一步，搞错就gg了，请反复检查参数
    # `/bin/dd if=${坏道n数据} of=${坏道brick挂载点}/${坏道文件} oflag=direct bs=4096 count=1 seek=${坏道n} conv=notrunc`

    for offset in src_brick.get('bad_block_offsets'):
        ret = try_badblocks_heal_3(gfid, src_brick.get('host'), dst_brick.get('host'), offset, length)
        if ret:
            print(color.red('*' * 20 + '[失败]'))
            cmd = '/bin/umount /mnt/efs_badblocks_heal'
            remote_cli(src_brick.get('host'), cmd)
            remote_cli(dst_brick.get('host'), cmd)
            return ret

    # 3.7 检查坏道是否修复完成
    # 再次检查可以读出来，是正常（通过badblock再扫一遍也可以，不急的话）

    # 4 恢复坏道磁盘服务
    # 先umount之前的挂载（不然brick起来后就发现已经有已读挂载了），`kill -9 ${坏道brick的supervise}`，
    cmd = '/bin/umount /mnt/efs_badblocks_heal'
    remote_cli(src_brick.get('host'), cmd)
    remote_cli(dst_brick.get('host'), cmd)
    print(color.cyan('*' * 20 + '[成功]'))
    # 再把glusterfsd mv回去，/sf/vs/bin/vs_brick_dog.sh拉起brick，检查进程起来就正常了。
    print(color.cyan('请自行修复glusterfsd进程文件，并调用/sf/vs/bin/vs_brick_dog.sh拉起brick'))
    time.sleep(5)
    # 修完后，如果关机的只有相关虚拟机（而不是全部都关机），并且有业务，那么会有数据同步。等数据同步完了继续搞后面的

def _badsector_repair_v2(dev, offset=""):
    """仅限 vs2.x 用有bad无坏道的数据副本修复有坏道的good数据副本"""
    print("VS2.x请自行使用以下命令查看坏道盘brick日志，无法读取文件数量:")
    print("""grep "E \[" /sf/vs/glusterfs/var/log/glusterfs/bricks/ -nr|grep -v open|grep ret|awk -F' ' '{print $7}'|sort|uniq""")

    offsets = []
    if offset != '':
        offsets.append(int(offset))

    while True:
        print(color.cyan('请输入需要坏道修复的偏移文件，并输入回车结束:'))
        offset_path = sys.stdin.readline().strip('\n')
        if not os.path.exists(offset_path):
            print(color.red('输入路径错误，请重新输入'))
        else:
            break
    with open(offset_path, 'r') as f:
        for line in f.readlines():
            offsets.append(int(line.strip()))

    ret = _fix_disk_bad_sectors_2(dev, offsets)

    return ret


def _badsector_repair_v3(gfid, volume):
    """仅限 vs3.x 用有bad无坏道的数据副本修复有坏道的good数据副本"""
    while True:
        print("!!! 在进行坏道扫描时，可能触发卡慢盘识别错误，误判拔盘，请先确认当前版本没有正在运行卡慢盘检测服务 !!!")
        print("在进行唯一好副本修复坏道时,需要使用到坏副本的存储数据，修复后可能产生数据丢失，是否继续修复，yes/no?")
        if sys.stdin.readline().strip('\n') != "yes":
            return
        else:
            break
    routes = _gfid2route(gfid)
    bricks = [_brickid2brickinfo(route) for route in routes]
    # print(bricks)

    _fix_bad_sectors_init(bricks[0]['host'], bricks[1]['host'])

    # 扫描 前两个副本的对应brick的坏道
    blockoffset0 = bricks[0]['bad_block_offsets'] = _get_disk_bad_sectors_3(bricks[0]['host'], bricks[0]['path'], gfid)
    blockoffset1 = bricks[1]['bad_block_offsets'] = _get_disk_bad_sectors_3(bricks[1]['host'], bricks[1]['path'], gfid)
    if not blockoffset0 and not blockoffset1:
        print(color.green("没有需要修复的坏道, brick信息:\n{}\n".format(bricks)))
        return

    if blockoffset0 and blockoffset1:
        print(color.green("两个副本都有坏道，请自行修复!!!"))
        return

    print("数据副本坏道情况为：\n{}\n另一个数据副本坏道为：\n{}\n".format(blockoffset0, blockoffset1))

    while True:
        print(color.cyan('扫描的坏道记录位于/tmp/badblocks.log，是否进行修复？yes/no'))
        step = sys.stdin.readline().strip('\n')
        if step.lower() == "yes":
            break
        elif step.lower() == "no":
            print(color.red('放弃修复,直接退出'))
            return
        else:
            print(color.red('输入字符错误，请重新输入'))

    # 到这里一定是一个副本有坏道，一个副本没坏道
    if blockoffset0 and not blockoffset1:
        src_brick = bricks[0]
        dst_brick = bricks[1]
    else:
        src_brick = bricks[1]
        dst_brick = bricks[0]

    while not volume:
        print(color.cyan('请输入卷名：'))
        volume = sys.stdin.readline().strip('\n')
        print(color.cyan('确认卷名为{}吗？yes/no'.format(volume)))
        if sys.stdin.readline().strip('\n') != "yes":
            volume = None
    print(color.cyan('1. 强制清空写缓存数据'))
    if _is_wcc_dirty(src_brick.get('host'), src_brick.get('path'), gfid):
        ret = _force_clean_wcache(volume, src_brick.get('host'), src_brick.get('path'), gfid)
        if ret != 0:
            print(color.red('*' * 20 + ' [失败]' + '*' * 20))
            return ret
    if _is_wcc_dirty(dst_brick.get('host'), dst_brick.get('path'), gfid):
        ret = _force_clean_wcache(volume, dst_brick.get('host'), dst_brick.get('path'), gfid)
        if ret != 0:
            print(color.red('*' * 20 + ' [失败]' + '*' * 20))
            return ret

    print(color.cyan('*' * 20 + ' [成功]' + '*' * 20))

    print(color.cyan('2. 强制清空分层数据'))
    if _is_tier_dirty_3(src_brick.get('host'), src_brick.get('path'), gfid):
        ret = _force_clean_tier_3(volume, src_brick.get('host'), src_brick.get('path'), gfid)
        if ret != 0:
            print(color.red('*' * 20 + ' [失败]' + '*' * 20))
            return ret
    if _is_tier_dirty_3(dst_brick.get('host'), dst_brick.get('path'), gfid):
        ret = _force_clean_tier_3(volume, dst_brick.get('host'), dst_brick.get('path'), gfid)
        if ret != 0:
            print(color.red('*' * 20 + ' [失败]' + '*' * 20))
            return ret
    print(color.cyan('*' * 20 + ' [成功]' + '*' * 20))
    print(color.cyan('3. 开始修复brick'))
    _fix_disk_bad_sectors_3(gfid, src_brick, dst_brick)

    return bricks
