#!/usr/bin/env python
# -- coding: utf-8 --
# 与模块无关的通用工具
import os
import sys
import re
import time
import json
import uuid
import fcntl
import socket
import hashlib
import logging
import subprocess
import ConfigParser
from functools import wraps
# from libcommon import config

VSFIRE_WORK_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
VSFIRE_WORK_DIR = os.path.abspath(VSFIRE_WORK_DIR)
# 定义8个日志文件用于打印日志
max_concurrency = 8  # 最大8个并发任务
logger = logging.getLogger(__name__)
VSFIRE_MAGIC = 'FEC61AC2'  # vsfire处理过的临时文件，携带标记
vsfire_recovery_dir = 'vsfire_recovery_{}'.format(VSFIRE_MAGIC)
str_time = str(int(time.time()))
DISK_CONF = '/sf/cfg/vs/disk/'
BACK_PATH = '/sf/data/local/disk_conf_back_' + str_time + '/'
DISK_PARTITION = '/sf/cfg/vs/partition/'
LEFT_TREE_INDEX = 0
RIGHT_TREE_INDEX = 1
vsfire_fault_inject_enable = True
vsfire_fault_config = os.path.dirname(__file__) + '/../../vsfire_fault_inject.json'
vsfire_faults = None
vsfire_lock_file = '/cfs/vs/vsfire_{}.lock'.format(VSFIRE_MAGIC)
vsfire_concurrency_lock_file = '/cfs/vs/vsfire_{}_concurrency.lock'.format(VSFIRE_MAGIC)
s_vsfire_config_file = os.path.join(VSFIRE_WORK_DIR, 'config', 'vsfire.conf')

s_getfattr_bin = '/sf/vs/bin/getfattr_orig'
if not os.path.exists(s_getfattr_bin):
    s_getfattr_bin = '/sf/vs/bin/getfattr'
s_setfattr_bin = '/sf/vs/bin/setfattr_orig'
if not os.path.exists(s_setfattr_bin):
    s_setfattr_bin = '/sf/vs/bin/setfattr'

class CmdError(Exception):
    pass


class VsfireConfig(object):
    def __init__(self, config_file):
        self._file = config_file
        self._cf = ConfigParser.ConfigParser()
        self._cf.read(self._file)

    def get_int(self, field, key):
        try:
            result = self._cf.getint(field, key)
        except:
            result = 0
        return result

    def get_string(self, field, key):
        try:
            result = self._cf.get(field, key)
        except:
            result = None
        return result
    
    def get_bool(self, field, key):
        try:
            result = self._cf.get(field, key)
            if result.strip().lower() == 'true':
                return True
        except:
            result = False
        return False


class VsfireFlock(object):
    def __init__(self, lock_file, blocked=False, removed_file=True):
        self._lock_file = lock_file
        self._fd_handler = None
        if blocked:
            self._lock_flag = fcntl.LOCK_EX  # 加阻塞锁
        else:
            self._lock_flag = fcntl.LOCK_EX | fcntl.LOCK_NB  # 加非阻塞锁
        self._unlock_flag = fcntl.LOCK_UN
        self._removed_file = removed_file
        self._locked = False
        if not os.path.exists(self._lock_file):
            os.mknod(self._lock_file)

    def __enter__(self):
        self._fd_handler = open(self._lock_file, 'r')
        fcntl.flock(self._fd_handler.fileno(), self._lock_flag)
        self._locked = True
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        if self._locked:
            fcntl.flock(self._fd_handler.fileno(), self._unlock_flag)

        if self._fd_handler:
            self._fd_handler.close()
            self._fd_handler = None

        # 删除锁文件
        if self._removed_file and os.path.exists(self._lock_file):
            os.remove(self._lock_file)


class VsfireConcurrencyFlock(object):
    def __init__(self, lock_file, max_concurrency):
        self._lock_file = lock_file
        self._max_concurrency = max_concurrency
        self._hostname = socket.gethostname()
        self._pid = os.getpid()
        self._sequence = 0

    def get_sequence(self):
        return self._sequence

    def __enter__(self):
        with VsfireFlock(self._lock_file, True, False) as lock:
            locks_data = []
            with open(self._lock_file, 'r') as f:
                if os.path.exists(self._lock_file) and os.path.getsize(self._lock_file) != 0:
                    locks_data = json.load(f)
                    if not isinstance(locks_data, list):
                        # 读到的数据不列表，直接设置为空列表
                        locks_data = []
                
                # 允许相同主机的相同进程重复加锁
                for index, lock_data in enumerate(locks_data):
                    if lock_data.get('host') == self._hostname and lock_data.get('pid') == self._pid:
                        self._sequence = index
                        return self
                self._sequence = len(locks_data)
                if self._sequence >= self._max_concurrency:
                    err_msg = 'too many vsfire are running, current: {}, max_concurrency: {}'.\
                        format(self._sequence, self._max_concurrency)
                    raise CmdError(err_msg)
                lock_data = {'host': self._hostname, 'pid': self._pid}
                locks_data.append(lock_data)
            with open(self._lock_file, 'w') as f:
                json.dump(locks_data, f, ensure_ascii=False, indent=4)
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        if not os.path.exists(self._lock_file):
            return

        remove_lock_file = False
        with VsfireFlock(self._lock_file, True, False) as lock:
            try:
                with open(self._lock_file, 'r') as f:
                    locks_data = json.load(f)
                    # 移除当前进程的记录
                    locks_data = [item for item in locks_data
                                  if item.get('host') != self._hostname or item.get('pid') != self._pid]

                if locks_data:
                    with open(self._lock_file, 'w') as f:
                        json.dump(locks_data, f, ensure_ascii=False, indent=4)
                else:
                    remove_lock_file = True
                    if os.path.exists(self._lock_file):
                        os.remove(self._lock_file)
            except Exception as e:
                    # 处理异常，直接删除锁文件，后果是锁控制失效，影响不大
                    logger.error('got except:{}'.format(str(e)))
                    remove_lock_file = True

        # 所有vsire退出了，或者出现异常，把锁文件删除
        if remove_lock_file and os.path.exists(self._lock_file):
            os.remove(self._lock_file)

# 判断字符串是否是GFID格式（带'-'格式）
def check_str_is_gfid(gfid_str):
    if re.match(r"^\w{8}-\w{4}-\w{4}-\w{4}-\w{12}$", gfid_str):
        return True
    return False

def calculate_str_md5(input_string):
    hash_object = hashlib.md5()
    hash_object.update(input_string.encode('utf-8'))
    return hash_object.hexdigest()


# 输入参数，计算hash值后，生成锁文件
def get_vsfire_lock_file(value=''):
    if value == '' or not isinstance(value, str):
        return vsfire_lock_file

    lock_file = '{}_{}'.format(vsfire_lock_file, calculate_str_md5(value))
    return lock_file


def fault_point_result():
    if not vsfire_fault_inject_enable or not os.path.exists(vsfire_fault_config):
        return False

    global vsfire_faults
    if not vsfire_faults:
        with open(vsfire_fault_config, 'r') as f:
            vsfire_faults = json.load(f)

    # 获取上级故障函数注入位置
    frame = sys._getframe(1)
    if not vsfire_faults or not frame:
        return False

    for fault in vsfire_faults:
        if fault.get('filename') != frame.f_code.co_filename.split('/')[-1]:
            continue

        # 故障点，可以设置函数名或者行号
        if fault.get('line') and int(fault.get('line')) == frame.f_lineno:
            logger.warn('fault inject, filename: {}, line: {}'.format(fault.get('filename'), fault.get('line')))
            return True
        else:
            if fault.get('func') == frame.f_code.co_name:
                logger.warn('fault inject, filename: {}, func: {}'.format(fault.get('filename'), fault.get('func')))
                return True
    return False

# 支持通过配制文件的开关，关闭着色输出打印
s_output_colored = VsfireConfig(s_vsfire_config_file).get_bool('common', 'output_colored')
class Colored(object):
    # 显示格式: \033[显示方式;前景色;背景色m
    # 只写一个字段表示前景色,背景色默认
    RED = '\033[31m'  # 红色
    GREEN = '\033[32m'  # 绿色
    YELLOW = '\033[33m'  # 黄色
    BLUE = '\033[34m'  # 蓝色
    FUCHSIA = '\033[35m'  # 紫红色
    CYAN = '\033[36m'  # 青蓝色
    WHITE = '\033[37m'  # 白色

    #: no color
    RESET = '\033[0m'  # 终端默认颜色

    def color_str(self, color, s):
        if s_output_colored:
            return '{}{}{}'.format(getattr(self, color), s, self.RESET)
        else:
            return '{}'.format(s)

    def red(self, s):
        return self.color_str('RED', s)

    def green(self, s):
        return self.color_str('GREEN', s)

    def yellow(self, s):
        return self.color_str('YELLOW', s)

    def blue(self, s):
        return self.color_str('BLUE', s)

    def fuchsia(self, s):
        return self.color_str('FUCHSIA', s)

    def cyan(self, s):
        return self.color_str('CYAN', s)

    def white(self, s):
        return self.color_str('WHITE', s)


def print_with_clear(text):
    # 使用 \r 将光标移动到行首
    # 使用 ANSI 转义序列清空当前行
    sys.stdout.write('\r\033[K' + text)
    sys.stdout.flush()


def cli(cmdline, split=False):
    if not cmdline:
        raise CmdError("cli input cmd is empty")

    # logger.info('cli {}'.format(cmdline))
    process = subprocess.Popen(cmdline,
                               shell=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               bufsize=-1)

    (stdoutdata, stderrdata) = process.communicate()
    if process.returncode != 0:
        stdstr = ''
        if stdoutdata:
            stdstr += 'stdout: {0:s}'.format(stdoutdata)
        if stderrdata:
            stdstr += 'stderr: {0:s}'.format(stderrdata)
        errstr = "Failed to exec {0:s}. {1:s}".format(cmdline, stdstr)
        raise CmdError(errstr)

    if split:
        lines = stdoutdata.split('\n')
    else:
        lines = stdoutdata
    return lines

s_mgr_ssh_access = VsfireConfig(s_vsfire_config_file).get_bool('common', 'mgr_ssh_access')
def get_ssh_host(host):
    if s_mgr_ssh_access and host.startswith('host-'):
        host_conf = '/etc/hosts'
        if not os.path.exists(host_conf):
            return host
        with open(host_conf, 'r') as f:
            for line in f:
                if host.split('-')[1] not in line:
                    continue
                host_info = line.split()
                if len(host_info) == 2 and host_info[1] != host and host_info[1].startswith('host-'):
                    # 返回 host-mgr_50af73292317
                    return host_info[1]
    return host

def remote_cli(host, cmdline, split=False):
    if not host or not cmdline:
        raise CmdError("remote cli input host or cmd is empty")

    cmdline = "/usr/bin/ssh root@{0:s} \'{1:s}\'".format(get_ssh_host(host), cmdline)
    return cli(cmdline, split)


def cluster_cli(current_host, hosts, cmdline):
    result = []
    for host in hosts:
        if host == current_host:
            result += cli(cmdline)
        else:
            result += remote_cli(host, cmdline)

    return result


def remote_vt_cli(host, cmdline, split):
    if not host:
        host = socket.gethostname()

    if not host or not cmdline:
        raise CmdError("remote cli input host or cmd is empty")

    # result = remote_check_output(host, cmdline)
    cmdline = "/usr/bin/ssh -p 22346 root@{0:s} '{1:s}'".format(get_ssh_host(host), cmdline)
    return cli(cmdline, split)


VS_VERSION_INVALID = 0
VS_VERSION_2_0 = 6
VS_VERSION_2_1 = 7
VS_VERSION_2_2 = 8
VS_VERSION_2_3 = 9
VS_VERSION_2_6 = 10  # 卷名格式不同，没有ZK
VS_VERSION_2_8 = 20  # 正常支持多卷的卷名格式，有ZK
VS_VERSION_3_0 = 100  # 新架构，有ZK
VS_VERSION_3_0_1 = 101
VS_VERSION_3_0_2 = 102
VS_VERSION_3_0_3 = 103 # 分层升级(从V1到V2)
VS_VERSION_3_0_4 = 104
VS_VERSION_3_0_5 = 105
VS_VERSION_3_0_6 = 106
VS_VERSION_3_1 = 110  #
VS_VERSION_3_2 = 120  #
VS_VERSION_3_3 = 130  # 有mongo
VS_VERSION_3_4 = 140  #
VS_VERSION_3_5 = 150  # 有容器
VS_VERSION_3_6 = 160
VS_VERSION_3_7 = 170
VS_VERSION_3_8 = 180
VS_VERSION_MAX = 190  # 下一个版本号


# 获取VS版本号，不同版本号，处理方式不同
def get_vs_version():
    version_file = '/sf/vs/version'
    if fault_point_result() or not os.path.exists(version_file) or os.path.getsize(version_file) == 0:
        return VS_VERSION_INVALID

    with open(version_file, 'r') as version_file_fd:
        first_line = version_file_fd.readlines()[0]  # 获取版本号只提取第1行
        version_string = first_line.strip().split('_')[0].strip()
        if version_string.startswith('2.6'):
            return VS_VERSION_2_6
        elif version_string.startswith('2.0'):
            return VS_VERSION_2_0
        elif version_string.startswith('2.1'):
            return VS_VERSION_2_1
        elif version_string.startswith('2.2'):
            return VS_VERSION_2_2
        elif version_string.startswith('2.3'):
            return VS_VERSION_2_3
        elif version_string.startswith('2.8'):
            return VS_VERSION_2_8
        elif version_string.startswith('3.'):
            # 3X版本，取中间字段再比较
            mid_version = version_string.split('.')[1]
            if not mid_version.isdigit():
                return VS_VERSION_INVALID
            mid_version = int(mid_version)
            if mid_version == 0:
                last_version = version_string.split('.')[2]
                last_version = int(last_version)
                if last_version == 0:
                    return VS_VERSION_3_0
                elif last_version == 1:
                    return VS_VERSION_3_0_1
                elif last_version == 2:
                    return VS_VERSION_3_0_2
                elif last_version == 3:
                    return VS_VERSION_3_0_3
                elif last_version == 4:
                    return VS_VERSION_3_0_4
                elif last_version == 5:
                    return VS_VERSION_3_0_5
                else:
                    # 当前只到VS306版本
                    return VS_VERSION_3_0_6
            elif mid_version == 1:
                return VS_VERSION_3_1
            elif mid_version == 2:
                return VS_VERSION_3_2
            elif mid_version == 3:
                return VS_VERSION_3_3
            elif mid_version == 4:
                return VS_VERSION_3_4
            elif mid_version == 5:
                return VS_VERSION_3_5
            elif mid_version == 6:
                return VS_VERSION_3_6
            elif mid_version == 7:
                return VS_VERSION_3_7
            elif mid_version == 8:
                return VS_VERSION_3_8

    return VS_VERSION_INVALID

# 判断当前环境是否是EDS集群
def vs_is_eds():
    sf_version_conf = '/sf/etc/version'
    eds_version_conf = '/sf/etc/version.eds'
    if not os.path.exists(sf_version_conf) or not os.path.exists(eds_version_conf):
        return False
    
    with open(sf_version_conf, 'r') as f1, open(eds_version_conf, 'r') as f2:
        if f1.readlines() == f2.readlines():
            return True
    return False

# 通用VS版本检查
def is_vs_version_valid(version):
    if version < VS_VERSION_2_0 or version > VS_VERSION_MAX:
        logger.error('failed to supported, version: {}'.format(version))
        return False
    return True

# 判断VS集群是否是两主机（包含单主机）
def vs_is_two_host(hosts):
    
    # 输入的hosts是集群的主机列表
    assert isinstance(hosts, list)
    
    if len(hosts) <= 2:
        return True
    return False

# 判断VS集群是否有EFS(VS3.x架构且3主机以上)
def vs_has_efs(version, hosts):
    if version >= VS_VERSION_3_0 and not vs_is_two_host(hosts):
        return True
    return False

def is_vs2x():
    version = get_vs_version()
    if version == VS_VERSION_INVALID:
        return False

    if version <= VS_VERSION_2_8:
        return True
    return False


def is_vs3x():
    version = get_vs_version()
    if version > VS_VERSION_2_8:
        return True
    return False


def get_vs_cluster_info():
    volume_name = None
    has_arbiter = False
    replicate_num = 2
    hosts = []
    replicate = {}
    bricks = {}
    vs2x = is_vs2x()

    if fault_point_result():
        return volume_name, hosts, replicate_num, has_arbiter, bricks, replicate

    cmdline = "/sf/vs/glusterfs/sbin/gluster vol info"
    lines = cli(cmdline, True)
    i = -1
    for line in lines:
        # 当前集群没有卷, 直接退出
        if line == 'No volumes present':
            break

        # 获取卷名与副本数量
        m_replicate_num = re.search(r"vs_vol_rep\d", line)
        if m_replicate_num:
            replicate_num = int(m_replicate_num.group()[10:])
            volume_name = line.split(':')[1].strip()

        if re.search(r"host-\w{12}:", line):
            # 生成主机列表
            host = line.split(':')[1].strip()
            hosts.append(host)

            arbiter = False
            if re.search(r"(arbiter)", line) or re.search(r"(meta-arbiter)", line):
                arbiter = True
                # 设置是否有仲裁副本
                if not has_arbiter:
                    has_arbiter = True

            # 生成brick_path信息
            brick_id = int(line.split(':')[0].replace('Brick', '').strip())
            path = line.split(':')[2].strip().split()[0]
            brick_path = {
                'id': brick_id,
                'host': host,
                'path': path,
                'arbiter': arbiter
            }

            # 生成brick信息
            if not bricks.get(host):
                bricks[host] = []
            bricks[host].append(brick_path)

            if vs2x:
                # 2x版本生成复制组信息
                if not re.search(r"(arbiter)", line):
                    i += 1
                rep = i / replicate_num
                if not replicate.get(rep):
                    replicate[rep] = []
                replicate[rep].append(brick_path)
            else:
                # 3x版本生成左右子树信息 0--左子树 1--右子树
                if re.search(r"\(meta", line):
                    rep = LEFT_TREE_INDEX
                else:
                    rep = RIGHT_TREE_INDEX
                if not replicate.get(rep):
                    replicate[rep] = []
                replicate[rep].append(brick_path)
    # bricks
    # {'host-0050569526eb':
    # [{'path': '/sf/data/vs/local/RMIXXG-3yl6-R0kc-UrfP-FNyj-aQnf-pYhahY/72609b34-a19d-11ef-90da-005056959419',
    #   'host': 'host-0050569526eb',
    #   'id': 'Brick3'}]}
    return volume_name, list(set(hosts)), replicate_num, has_arbiter, bricks, replicate


def get_online_bricks(volume_name):
    cmdline = "/sf/vs/glusterfs/sbin/gluster vol status {} detail".format(volume_name)
    lines = cli(cmdline, True)

    bricks = {}
    for line in lines:
        m_brick = re.search(r"\w{6}-\w{4}-\w{4}-\w{4}-\w{4}-\w{4}-\w{6}", line)
        if m_brick and lines.index(line) + 2 < len(lines) and lines[lines.index(line) + 2] is not None:
            path = line.split(':')[2].strip()
            status = lines[lines.index(line) + 2]
            bricks[path] = status.split()[2].lower()

    # bricks
    # {'/sf/data/vs/local/rq1gmf-yaXj-xeZf-7pGS-vkJV-btAx-tCpApe-meta/31e4a922-d07c-46dc-af0c-b909af88fb6e': 'y',
    # '/sf/data/vs/local/4wpZIM-HK3i-Hlff-d1V3-VXVj-hDtl-IwQ3aW/595990bf-611b-40ac-bf12-f0c02533cf6e': 'y',
    # '/sf/data/vs/local/bOc6ym-Kr4Z-cOun-bGB3-8ljb-Kqba-RKjYL7/595990bf-611b-40ac-bf12-f0c02533cf6e': 'n'}

    return bricks


# 判断指定vmid对应的虚拟机是否正在运行
def check_if_vmid_running(vmid):
    # cmdline = '/sf/bin/qm-c list'
    # if get_vs_version() >= VS_VERSION_3_5:
    #     result = remote_vt_cli(None, cmdline, True)
    # else:
    #     result = cli(cmdline, True)
    # for line in result:
    #     vm_status = line.split()
    #     if len(vm_status) > 2 and \
    #             vm_status[0].isdigit() and int(vm_status[0]) == int(vmid) and \
    #             vm_status[2] != 'stopped' and vm_status[2] != 'offline':
    #         return True
    # return False 

    # EDS环境，都返回没有开机
    if vs_is_eds():
        return False

    # 将vmid转化成str
    if not isinstance(vmid, str):
        vmid = str(vmid)

    # HCI680之后的版本使用vmstatus.json
    vmstat_json = '/cfs/vmstatus.json'
    # HCI680之前的版本使用vmstatus.info
    vmstat_info = '/cfs/nodes/cluster/vmstatus.info'

    # 虚拟机状态文件不在，返回异常，无法判断虚拟机是否开机
    if not os.path.exists(vmstat_json) and not os.path.exists(vmstat_info):
        raise CmdError("cannot to get vm status")
    
    if os.path.exists(vmstat_json):
        with open(vmstat_json, 'r') as f:
            vmstat = json.load(f)
            if vmstat.get(vmid) and vmstat.get(vmid).get('power') == 1:
                return True    

    if os.path.exists(vmstat_info):
        with open(vmstat_info, 'r') as f:
            for line in f:
                if not line:
                    continue
                vmstat = line.split(',')
                if len(vmstat) >= 2 and vmstat[0].isdigit() and int(vmstat[0]) == int(vmid) and \
                    vmstat[1] != 'stopped' and vmstat[1] != 'offline':
                    return True
    return False

# 判断指定目录路径是否是虚拟机目录，并判断对应虚拟机是否正在运行
def check_if_vm_running(vm_dir_path):
    if not vm_dir_path or '/images/' not in vm_dir_path or not os.path.exists(vm_dir_path):
        return False

    vmid = None
    files = os.listdir(vm_dir_path)
    for file_base_name in files:
        if re.search(r'^\d+\.conf$', file_base_name):
            vmid = file_base_name.split('.')[0]
            break
    if not vmid:
        return False

    return check_if_vmid_running(vmid)


# 输入文件路径列表，转化成虚拟机信息key vmid; value vm_name
def files_path_to_vms_name(files_path):
    vms_info = {}
    vmlist_file = '/cfs/.vmlist'
    if not files_path or not os.path.exists(vmlist_file) or os.path.getsize(vmlist_file) == 0:
        return vms_info

    with open(vmlist_file, 'r') as vms_file_fd:
        vms = json.load(vms_file_fd, encoding='utf-8')
        if 'ids' not in vms:
            return vms_info

        for file_path in files_path:
            if not file_path:
                continue
            for vmid, vminfo in vms['ids'].items():
                if vms_info.get(vmid):
                    continue
                vm_dir = os.path.join('images', vminfo['node'], vminfo['dir'], vminfo['name'] + '.vm')
                if vm_dir in file_path:
                    if vminfo.get('vm_name'):
                        vm_name = vminfo['vm_name']
                    else:
                        vm_name = vminfo['name']

                    vms_info[vmid] = vm_name
                    break
    # 返回虚拟机信息
    return vms_info


# 判断终端输入是yes/no
def check_terminal_input(readline):
    if not readline:
        raise CmdError("readline is None")

    color = Colored()
    while True:
        print color.fuchsia('{}'.format(readline))
        step = sys.stdin.readline().strip('\n')
        if step.lower() == 'y':
            break
        elif step.lower() == 'n':
            raise CmdError("Manual check cancellation")
        else:
            print color.red('输入字符错误，请重新输入')


def vs_getfattr_2x(host, brick_path, file_path):
    cmdline = '{} -d -m. -e hex "{}"'.format(s_getfattr_bin, os.path.join(brick_path, file_path))
    return remote_cli(host, cmdline, False)


def vs_setfattr_2x(host, brick_path, file_path, key, value=None):
    if not value:
        cmdline = '{} -x "{}" "{}"'.format(s_setfattr_bin, key, os.path.join(brick_path, file_path))
    else:
        cmdline = '{} -n "{}" -v "{}" "{}"'.format(s_setfattr_bin, key, value, os.path.join(brick_path, file_path))
    logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
    return remote_cli(host, cmdline, False)


def vs_getfattr_3x(host, brick_path, gfid):
    cmdline = '/bin/ps aux | grep {} | /bin/grep -v supervise | /bin/grep -v grep'.format(brick_path)
    # common.logger.info('try to host: {}, cmdline: {}'.format(brick['host'], cmdline))
    result = remote_cli(host, cmdline, False).split()
    if result and len(result) > 1 and result[1].isdigit():
        brick_pid = result[1]
        efs_cmd = 'inode xattr {}'.format(gfid)
        cmdline = '/sf/vs/sbin/efs_dbg -p {} -c "{}"'.format(brick_pid, efs_cmd)
        # common.logger.info('try to host: {}, cmdline: {}'.format(brick['host'], cmdline))
        return remote_cli(host, cmdline, False)
    return None


def vs_setfattr_3x(host, brick_path, gfid, key, value=None):
    cmdline = '/bin/ps aux | grep {} | /bin/grep -v supervise | /bin/grep -v grep'.format(brick_path)
    # common.logger.info('try to host: {}, cmdline: {}'.format(brick['host'], cmdline))
    result = remote_cli(host, cmdline, False).split()
    if result and len(result) > 1 and result[1].isdigit():
        brick_pid = result[1]
        if not value:
            efs_cmd = 'inode xattr remove {} {}'.format(gfid, key)
        else:
            efs_cmd = 'inode xattr set {} {}={}'.format(gfid, key, value)
        cmdline = '/sf/vs/sbin/efs_dbg -p {} -c "{}"'.format(brick_pid, efs_cmd)
        logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
        return remote_cli(host, cmdline, False)
    return None

def vs_remove_file_3x(host, brick_path, gfid):
    # 删除EFS数据
    cmdline = '/bin/ps aux | grep {} | /bin/grep -v supervise | /bin/grep -v grep || /bin/echo'.format(brick_path)
    # common.logger.info('try to host: {}, cmdline: {}'.format(brick['host'], cmdline))
    result = remote_cli(host, cmdline, False).split()
    if result and len(result) > 1 and result[1].isdigit():
        brick_pid = result[1]
        efs_cmd = 'itable unlink {}'.format(gfid)
        cmdline = '/sf/vs/sbin/efs_dbg -p {} -c "{}"'.format(brick_pid, efs_cmd)
        logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
        remote_cli(host, cmdline, False)
    
    # 删除其它文件
    short_gfid = gfid.replace('-', '')
    cmdline = '/bin/rm -f {}/.glusterfs/vs_difference/{};' \
              '/bin/rm -f {}/.glusterfs/vs_difference/{}.drlog;' \
              '/bin/rm -f {}/.glusterfs/vs_fiemap/{};' \
              '/bin/rm -f {}/.glusterfs/vs_lease/{}'.format(brick_path, short_gfid, 
                                                            brick_path, short_gfid, 
                                                            brick_path, short_gfid, 
                                                            brick_path, short_gfid)
    logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
    remote_cli(host, cmdline, False)

def vs_remove_file_2x(host, brick_path, file_name, landfill_name):
    # 如果输入文件是gfid path需要转化成real path
    if re.search(r"^.glusterfs/\w{2}/\w{2}/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}$", file_name):
        cmdline = '/usr/bin/find {} -samefile {}/{} -not -path */.glusterfs/*'.format(brick_path, brick_path, file_name)
        result = remote_cli(host, cmdline, False)
        if result and result.startswith(brick_path):
            logger.info('got gfid path: {}, real path: {}'.format(file_name, result))
            file_name = result.replace(brick_path, '').strip()
        else:
            err_msg = '{} failed to get real path'.format(file_name)
            raise CmdError(err_msg)

    # 将文件移动到回收站
    local_filepath = '{}/{}'.format(brick_path, file_name)
    local_landfill = '{}/.glusterfs/landfill/{}'.format(brick_path, landfill_name)
    cmdline = '/bin/mv "{}" "{}"'.format(local_filepath, local_landfill)
    logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
    remote_cli(host, cmdline, False)
    
    # 设置垃圾回收标记
    cmdline = '{} -n user.glusterfs.rubbish_allow_delete -v {} "{}"'.format(s_setfattr_bin, vsfire_recovery_dir, local_landfill)
    logger.info('try to cmdline: {}'.format(cmdline))
    remote_cli(host, cmdline, False)

def vs_get_vm_path_byvmid(vmid):
    cmdline = '/sf/vs/bin/vs_quick_get_vmpath_by_vmid.sh {}'.format(vmid)
    result = cli(cmdline, False)
    result = result.strip('\n').strip()
    if result.startswith('/sf/data/') and not result.startswith('/sf/data/vs/gfs/'):
        # vt挂载点转化成vs挂载点
        result = result.replace('/sf/data/', '/sf/data/vs/gfs/')
    # result = unicode(result.strip('\n').strip(), 'utf-8')
    return result


# 正常情况下，数据面看到的只有一个卷，复合卷场景下，存在两个卷。
# 获取所有卷列表
def get_composite_volumes():
    volume_conf = '/sf/cfg/vs/gluster_conf.json'
    if not os.path.exists(volume_conf) or os.path.getsize(volume_conf) == 0:
        return []

    composite_volumes = []
    with open(volume_conf, 'r') as volume_conf_fd:
        volumes_info = json.load(volume_conf_fd, encoding='utf-8')
        if volumes_info.get('volumes'):
            volumes = volumes_info['volumes']
            for volume in volumes:
                composite_volumes.append(volume['volname'])
    return composite_volumes


# 遍历挂载点获取脑裂的文件路径
def get_split_brain_files(volume_name, version):
    split_brain_files = []
    cmdline = '/usr/bin/find {} -type f '.format(get_vs_mount_path(volume_name, version))
    cmdline += "-exec sh -c 'ls -l \"$1\" >/dev/null' shell {} \; 2>/dev/stdout | awk -F\"rep2|\'\" '{print $2}'"
    logger.info('try to cmdline: {}'.format(cmdline))

    result = cli(cmdline, True)
    for line in result:
        if line and line.startswith('/'):
            split_brain_files.append(line.strip())

    # ['/test.qcow2', '/tt.qcow2']
    return split_brain_files


# 遍历挂载点获取脑裂的文件对应的虚拟机VMID
def get_split_brain_vmids(volume_name, version):
    split_brain_vmids = []
    split_brain_files = get_split_brain_files(volume_name, version)
    for split_brain_file in split_brain_files:
        nfs_path = '{}/{}'.format(get_vs_mount_path(volume_name, version), split_brain_file)
        vm_dir_path = os.path.dirname(nfs_path)
        if vm_dir_path != get_vs_mount_path(volume_name, version) and os.path.exists(vm_dir_path):
            files = os.listdir(vm_dir_path)
            for filename in files:
                if re.match(r'\d+\.conf$', filename) and filename.split('.')[0].isdigit():
                    split_brain_vmids.append(int(filename.split('.')[0]))
                    break
    # ['553592958130', '1523476242006']
    return split_brain_vmids


def is_wcc_dirty_2x(host, brick_path, file_path):
    result = vs_getfattr_2x(host, brick_path, file_path).split('\n')
    for line in result:
        key_and_val = line.split('=')
        if len(key_and_val) > 1 and key_and_val[0] == 'user.glusterfs.wcache' and key_and_val[1] != '0x0000000000000000':
            return True
    return False


def force_clean_wcache_2x(volume_name, host, brick_path, file_path):
    cmdline = '/sf/vs/glusterfs/sbin/gluster v set {} performance.wcc-delay-time 0'.format(volume_name)
    logger.info('try to cmdline: {}'.format(cmdline))
    cli(cmdline)
    cmdline = '/sf/vs/glusterfs/sbin/gluster v set {} performance.wcc-wbforce on'.format(volume_name)
    logger.info('try to cmdline: {}'.format(cmdline))
    cli(cmdline)
    while True:
        time.sleep(3)
        if not is_wcc_dirty_2x(host, brick_path, file_path):
            return


def reset_clean_wcache_2x(volume_name):
    cmdline = '/sf/vs/glusterfs/sbin/gluster v reset {} performance.wcc-delay-time'.format(volume_name)
    logger.info('try to cmdline: {}'.format(cmdline))
    cli(cmdline)
    cmdline = '/sf/vs/glusterfs/sbin/gluster v reset {} performance.wcc-wbforce'.format(volume_name)
    logger.info('try to cmdline: {}'.format(cmdline))
    cli(cmdline)


def is_tier_dirty_2x(host, brick_path, file_path):
    result = vs_getfattr_2x(host, brick_path, file_path).split('\n')
    for line in result:
        key_and_val = line.split('=')
        if len(key_and_val) > 1 and key_and_val[0] == 'user.glusterfs.tier_status' and key_and_val[1] != '0x0000000000000000':
            return True
    return False


def vs_get_gfid_2x(host, brick_path, file_path):
    cmdline = '{} -d -m. -e hex "{}"'.format(s_getfattr_bin, os.path.join(brick_path, file_path))
    result = remote_cli(host, cmdline, True)
    for line in result:
        key_and_val = line.split('=')
        if len(key_and_val) > 1 and key_and_val[0] == 'trusted.gfid':
            gfid_hex = key_and_val[1]
            return '{}-{}-{}-{}-{}'.format(gfid_hex[2:10], gfid_hex[10:14],
                                           gfid_hex[14:18], gfid_hex[18:22], gfid_hex[22:34])
    return None

# 获取文件在分层中的信息
def get_tier_file_info(shard_gfid, host, brick_path):
    # result: {
    # "uuid": "e15b3b1b-c59f-43e0-9977-59d6f2989dc6",
    # "i_no": 13,
    # "i_priority": 2,
    # "i_use": 1,
    # "i_brick_no": 0,
    # "i_size": 2281701376,
    # "atime": "2025-04-28 02:37:12",
    # "ctime": "2025-03-11 10:11:10",
    # "mtime": "2025-03-14 17:39:17",
    # "block_cnt": 0,
    # "dirty_block_cnt": 0,
    # "clean_block_cnt": 0,
    # "unlink": 0,
    # "hit": "0/16384 = 0%",
    # "kvm_hit": "0/16384 = 0%",
    # "kvm_hit_req": "0/4 = 0%",
    # "utilization ratio": "0.00%(0/0)",
    # "new data": 0,
    # "blocks": 17408
    # }
    # 依赖jq工具，2.6版本无法支持
    if not os.path.exists('/sf/vs/bin/jq'):
        return 0, 0, 0
    
    cmdline = '/usr/bin/ssh root@{} /sf/vs/bin/vs_tier_cli.py -c dump -a inode | /sf/vs/bin/jq -r '.format(get_ssh_host(host))
    cmdline += " ' "
    cmdline += '.ssd[].brick[] | select(.bi_brickid == "{}") | .inodes[] | select(.uuid == "{}")'.format(brick_path, shard_gfid)
    cmdline += " ' "
    result = cli(cmdline, False)
    tier_file = json.loads(result.decode('utf-8').strip())
    dirty_block_cnt = tier_file.get('dirty_block_cnt')
    clean_block_cnt = tier_file.get('clean_block_cnt')
    return dirty_block_cnt, clean_block_cnt

def force_clean_tier_2x(host, brick_path, file_path):
    gfid = vs_get_gfid_2x(host, brick_path, file_path)
    if not gfid:
        raise CmdError('failed to get gfid for file_path: {}'.format(file_path))

    sleep_time = 3 # 每隔3秒检查一次是否分层是否淘汰干净
    sleep_cnt = 0 # 等待次数
    kickout_time = 60 # 每隔1分钟执行一次淘汰
    waiting_time = 0
    while True:
        try:
            if sleep_cnt == 0 or kickout_time < sleep_cnt * sleep_time:
                sleep_cnt = 0
                cmdline = '/sf/vs/bin/vs_tier_cli.py -c kickout -a brick_id={},gfid={}'.format(brick_path, gfid)
                logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
                remote_cli(host, cmdline)
        except CmdError as e:
            if 'is demoting or unlink' in str(e):
                # 当前SSD正在迁出或者删除中
                logger.warn('got except: {}'.format(str(e)))
            else:
                raise
        time.sleep(sleep_time)
        sleep_cnt += 1
        waiting_time += sleep_time
        # dirty_block_cnt, clean_block_cnt = get_tier_file_info(gfid, host, brick_path)
        print_with_clear('{} 当前正在淘汰分层数据, 已经执行: {} 秒'.format(gfid, waiting_time))
        if not is_tier_dirty_2x(host, brick_path, file_path):
            print '\n'
            return

# 3.x版本，基于分片路由获取副本数量
def get_replicate_num_3x(route):
    rep_type = route.get('rep_type')
    if rep_type == 2:
        return 2
    elif rep_type == 1 or rep_type == 3:
        return 3
    elif rep_type == 4:
        return 5
    return 0


def is_wcc_dirty_3x(shard_gfid, host, brick_path):
    cmdline = '/bin/ps aux | grep {} | /bin/grep -v supervise | /bin/grep -v grep'.format(brick_path)
    result = remote_cli(host, cmdline, False).split()
    if result and len(result) > 1 and result[1].isdigit():
        brick_pid = result[1]
        efs_cmd = 'inode xattr {}'.format(shard_gfid)
        cmdline = '/sf/vs/sbin/efs_dbg -p {} -c "{}"'.format(brick_pid, efs_cmd)
        result = remote_cli(host, cmdline, True)
        for line in result:
            if line.split('=')[0] == 'user.glusterfs.wcache' and line.split('=')[1] != '0x0000000000000000':
                return True
    return False


def force_clean_wcache_3x(volume_name, shard_gfid, host, brick_path):
    cmdline = '/sf/vs/glusterfs/sbin/gluster v set {} performance.wcc-delay-time 0'.format(volume_name)
    logger.info('try to cmdline: {}'.format(cmdline))
    cli(cmdline)
    cmdline = '/sf/vs/glusterfs/sbin/gluster v set {} performance.wcc-wbforce on'.format(volume_name)
    logger.info('try to cmdline: {}'.format(cmdline))
    cli(cmdline)
    while True:
        time.sleep(3)
        if not is_wcc_dirty_3x(shard_gfid, host, brick_path):
            return


def is_tier_dirty_3x(shard_gfid, host, brick_path):
    cmdline = '/bin/ps aux | grep {} | /bin/grep -v supervise | /bin/grep -v grep'.format(brick_path)
    result = remote_cli(host, cmdline, False).split()
    if result and len(result) > 1 and result[1].isdigit():
        brick_pid = result[1]
        efs_cmd = 'inode xattr {}'.format(shard_gfid)
        cmdline = '/sf/vs/sbin/efs_dbg -p {} -c "{}"'.format(brick_pid, efs_cmd)
        result = remote_cli(host, cmdline, True)
        for line in result:
            if line.split('=')[0] == 'user.glusterfs.tier_status' and line.split('=')[1] != '0x0000000000000000':
                return True
    return False


def force_clean_tier_3x(shard_gfid, host, brick_path):
    sleep_time = 3 # 每隔3秒检查一次是否分层是否淘汰干净
    sleep_cnt = 0 # 等待次数
    kickout_time = 60 # 每隔1分钟执行一次淘汰
    waiting_time = 0
    while True:
        try:
            if sleep_cnt == 0 or kickout_time < sleep_cnt * sleep_time:
                sleep_cnt = 0
                cmdline = '/sf/vs/bin/vs_tier_cli.py -c kickout -a brick_id={},gfid={}'.format(brick_path, shard_gfid)
                logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
                remote_cli(host, cmdline)
        except CmdError as e:
            if 'is demoting or unlink' in str(e):
                # 当前SSD正在迁出或者删除中
                logger.warn('got except: {}'.format(str(e)))
            else:
                raise

        time.sleep(sleep_time)
        sleep_cnt += 1
        waiting_time += sleep_time
        # dirty_block_cnt, clean_block_cnt = get_tier_file_info(shard_gfid, host, brick_path)
        print_with_clear('{} 当前正在淘汰分层数据, 已经执行: {} 秒'.format(shard_gfid, waiting_time))
        if not is_tier_dirty_3x(shard_gfid, host, brick_path):
            print '\n'
            return

def reset_clean_wcache_3x(volume_name):
    cmdline = '/sf/vs/glusterfs/sbin/gluster v reset {} performance.wcc-delay-time'.format(volume_name)
    logger.info('try to cmdline: {}'.format(cmdline))
    cli(cmdline)
    cmdline = '/sf/vs/glusterfs/sbin/gluster v reset {} performance.wcc-wbforce'.format(volume_name)
    logger.info('try to cmdline: {}'.format(cmdline))
    cli(cmdline)


def stop_brick_process(host, brick_path):
    # SIGSTOP supervise进程
    logger.info('try to stop host: {}, brick: {}'.format(host, brick_path))
    cmdline = '/bin/ps aux | grep {} | /bin/grep supervise | /bin/grep -v grep || /bin/echo'.format(brick_path)
    result = remote_cli(host, cmdline, False).split()
    if result and len(result) > 7 and result[1].isdigit() and 'T' not in result[7]:
        super_brick_pid = result[1]
        cmdline = '/bin/kill -19 {}'.format(super_brick_pid)
        logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
        remote_cli(host, cmdline, False)

    # kill -9 brick进程
    cmdline = '/bin/ps aux | grep {} | /bin/grep -v supervise | /bin/grep -v grep || /bin/echo'.format(brick_path)
    result = remote_cli(host, cmdline, False).split()
    if not result:
        # 找不到进程，可以返回成功
        return
    elif len(result) > 1 and result[1].isdigit():
        brick_pid = result[1]
        cmdline = '/bin/kill -9 {}'.format(brick_pid)
        logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
        remote_cli(host, cmdline, False)

    # 确认brick进程不在
    cmdline = '/bin/ps aux | grep {} | /bin/grep -v supervise | /bin/grep -v grep || /bin/echo'.format(brick_path)
    result = remote_cli(host, cmdline, False).split()
    if not result:
        # 找不到进程，可以返回成功
        return
    elif fault_point_result() or (len(result) > 1 and result[1].isdigit()):
        brick_pid = result[1]
        raise_msg = 'failed to kill brick process: {}, pid: {}'.format(brick_path, brick_pid)
        raise CmdError(raise_msg)


def continue_brick_process(host, brick_path):
    # SIGCONT supervise进程
    logger.info('try to continue host: {}, brick: {}'.format(host, brick_path))
    cmdline = '/bin/ps aux | grep {} | /bin/grep supervise | /bin/grep -v grep || /bin/echo'.format(brick_path)
    result = remote_cli(host, cmdline, False).split()
    if result and len(result) > 7 and result[1].isdigit() and 'T' in result[7]:
        super_brick_pid = result[1]
        cmdline = '/bin/kill -18 {}'.format(super_brick_pid)
        logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
        remote_cli(host, cmdline, False)

def check_brick_process_alived(host, brick_path):
    # SIGCONT supervise进程
    cmdline = '/bin/ps aux | grep {} | /bin/grep -v supervise | /bin/grep -v grep || /bin/echo'.format(brick_path)
    result = remote_cli(host, cmdline, False).split()
    if not result:
        # 找不到进程，表示不存在
        return False
    return True


# 挂载efs到本地目录
# magic 增加magic字段，用于区分不同进程同时执行
# readonly 只读挂载用于拷贝数据，非只读挂载用于写数据
def mount_efs_path(brick, magic=None, readonly=True):
    host = brick['host']
    brick_vg = brick['path'].split('/')[5]  # 获取vg
    if magic and isinstance(magic, str):
        efs_mount_path = '/mnt/{}_{}_{}'.format(brick_vg, VSFIRE_MAGIC, magic)
    else:
        efs_mount_path = '/mnt/{}_{}'.format(brick_vg, VSFIRE_MAGIC)

    cmdline = '/bin/mount'
    if fault_point_result():
        raise CmdError('common.fault_point_result')
    result = remote_cli(host, cmdline, True)
    for line in result:
        if line and VSFIRE_MAGIC in line and \
                        line.split()[0] == 'efs_standalone' and line.split()[2].strip() == efs_mount_path:
            # 已经挂载过，不需要重复挂载
            return

    # 创建efs挂载目录
    cmdline = '/bin/mkdir -p {}'.format(efs_mount_path)
    logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
    remote_cli(host, cmdline, False)

    vg_short = brick_vg.split('-')[0]
    uuid_short = str(uuid.uuid4()).split('-')[0] # 保证每次挂载，打印的日志路径不同
    efs_log_path = '/sf/log/today/vs/scripts/{}_efs_{}_{}.log'.format(vsfire_recovery_dir, vg_short, uuid_short)
    # 挂载EFS
    if readonly:
        cmdline = 'EFS_LOG_TARGET={} /sf/vs/sbin/efs_standalone  -i ulvm -p  /dev/{}/lv_efs -o readonly {}'.\
            format(efs_log_path, brick_vg, efs_mount_path)
    else:
        cmdline = 'EFS_LOG_TARGET={} /sf/vs/sbin/efs_standalone  -i ulvm -p  /dev/{}/lv_efs {}'.\
            format(efs_log_path, brick_vg, efs_mount_path)
    logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
    remote_cli(host, cmdline, False)

    # 等待2秒，防止EFS挂载进程，还没有生成日志文件
    time.sleep(2)
    
    # 检查日志，EFS挂载点是否准备就绪
    sleep_time = 1
    waiting_time = 0
    while True:
        cmdline = '/bin/grep "efs_init_wrapper] efs init" {} || /bin/echo'.format(efs_log_path)
        result = remote_cli(host, cmdline, False)
        if result and 'efs_init_wrapper' in result:
            # 匹配到特定日志，说明EFS挂载完成
            if waiting_time > 0:
                print '\n'
            return
        else:
            # EFS正在挂载中，等待
            time.sleep(sleep_time)
            waiting_time += sleep_time
            print_with_clear('host: {}, VG: {} 当前正在挂载中, 已经等待: {} 秒'.format(host, brick_vg, waiting_time))


# 卸载指定主机上的efs挂载点
# 如果magic非空，卸载指定的efs挂载目录并删除挂载目录
# 如果magic有值，卸载所有的efs挂载目录并删除挂载目录
def umount_efs_path(brick, magic=None):
    host = brick['host']
    efs_mount_path = ''
    if magic and isinstance(magic, str):
        brick_vg = brick['path'].split('/')[5]  # 获取vg
        efs_mount_path = '/mnt/{}_{}_{}'.format(brick_vg, VSFIRE_MAGIC, magic)

    cmdline = '/bin/mount'
    if fault_point_result():
        raise CmdError('common.fault_point_result')
    result = remote_cli(host, cmdline, True)
    for line in result:
        if not line or VSFIRE_MAGIC not in line or line.split()[0] != 'efs_standalone':
            # 非EFS挂载点，不允许处理
            continue

        if efs_mount_path and line.split()[2].strip() == efs_mount_path:
            # 只卸载指定的挂载点并删除挂载目录
            cmdline = '/bin/umount {}'.format(efs_mount_path)
            logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
            remote_cli(host, cmdline, False)
            # 删除挂载目录(确保只删除空目录)
            cmdline = 'if [ -d "{}" ] && [ -z "$(ls -A {})" ]; then /bin/rmdir {}; fi'.\
                format(efs_mount_path, efs_mount_path, efs_mount_path)
            logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
            remote_cli(host, cmdline, False)
            return
        else:
            # 卸载所有EFS挂载点并删除挂载目录
            efs_mount_path_tmp = line.split()[2].strip()
            cmdline = '/bin/umount {}'.format(efs_mount_path_tmp)
            logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
            remote_cli(host, cmdline, False)
            # 删除挂载目录(确保只删除空目录)
            cmdline = 'if [ -d "{}" ] && [ -z "$(ls -A {})" ]; then /bin/rmdir {}; fi'.\
                format(efs_mount_path_tmp, efs_mount_path_tmp, efs_mount_path_tmp)
            logger.info('try to host: {}, cmdline: {}'.format(host, cmdline))
            remote_cli(host, cmdline, False)


def nfs_mount_check_file_splitbrain(file_path):
    try:
        os.stat(file_path)
    except OSError as e:
        # if e.errno == 5:
        # 目录readdir能够读到，有异常就认为脑裂（可能返回EIO，ENOENT，EINVAL错误码）
        logger.warn('stat file: {}, got except: {}'.format(file_path, str(e)))
        return True
    return False

# 支持通过配制文件的开关，控制修复坏道之前确认是否真正坏道
s_check_badblock = VsfireConfig(s_vsfire_config_file).get_bool('common', 'check_badblock')
def check_dev_has_badblock(host, dev, offset, block_size):
    if not s_check_badblock:
        return True
    
    retry_times = 3  # 重试3次，有一次返回EIO，说明有坏道
    try:
        while retry_times > 0:
            retry_times -= 1
            cmdline = '/bin/dd if={} of=/dev/null bs={} count=1 skip={} iflag=direct'.format(dev, block_size, offset/block_size)
            remote_cli(host, cmdline, False)
    except CmdError as e:
        # 坏道位置DD读，可能返EIO，ENODATA，EREMOTEIO错误码
        logger.warn('got except: {}'.format(str(e)))
        # if 'Input/output error' in str(e):
        return True
    return False

# 支持通过配制文件的开关，控制是否计算块设备的数据块md5
s_calc_block_md5 = VsfireConfig(s_vsfire_config_file).get_bool('common', 'calc_block_md5')
def calc_dev_block_md5(host, dev, offset, block_size):
    if not s_calc_block_md5:
        return
    try:
        cmdline = '/bin/dd if={} bs={} count=1 skip={} iflag=direct conv=notrunc 2>/dev/null | /usr/bin/md5sum'.format(dev, block_size, offset/block_size)
        result = remote_cli(host, cmdline, False)
        if re.search(r"^\w{32}", result):
            logger.info('cmdline: {}, md5: {}'.format(cmdline, result.split()[0]))
    except CmdError as e:
        logger.warn('got except: {}'.format(str(e)))


def to_human_readable(size_bytes):
    """
    将字节转换为更易读的单位（KB, MB, GB, TB）
    """
    # 定义单位
    units = ['B', 'KB', 'MB', 'GB', 'TB']
    # 计算单位索引
    index = 0
    while size_bytes >= 1024 and index < len(units) - 1:
        size_bytes /= 1024.0
        index += 1
    # 返回格式化的字符串
    return '{:.2f} {}'.format(size_bytes, units[index])


def get_vs_mount_path(volume_name, version=VS_VERSION_2_8):
    if version == VS_VERSION_2_6:
        mount_path = '/sf/data/vs/gfs/{}'.format('rep2')
    else:
        mount_path = '/sf/data/vs/gfs/{}'.format(volume_name)
    return mount_path


def vs_listdir_3x(dir_path, replicate, online_bricks):
    filenames = []
    for brick in replicate[LEFT_TREE_INDEX]:
        if online_bricks.get(brick['path']) == 'y':
            local_path = '{}/{}'.format(brick['path'], dir_path)
            cmdline = '/bin/ls -l {}'.format(local_path)
            result = remote_cli(brick['host'], cmdline, True)
            for line in result:
                # 只返回文件，不返回目录
                if line and not line.startswith('d') and len(line.split()) == 9:
                    filename = line.split()[-1]
                    filenames.append(filename.strip())
            return filenames
    return []


# 在复制组列表中，启用一个副本为OK状态，其它副本设置为BAD状态
def enable_replica_changelogs_2x(file_path, good_child_id, client_ids, replicate_bricks, 
                                 online_bricks, set_others_bad=False):
    if len(client_ids) != len(replicate_bricks) or good_child_id >= len(replicate_bricks):
        err_msg = 'file_path: {}, or client_ids: {} or bricks: {} invalid'.format(
            file_path, client_ids, replicate_bricks)
        raise CmdError(err_msg)

    good_brick = replicate_bricks[good_child_id]
    good_client_id = client_ids[good_child_id]
    # 针对坏副本，设置BAD
    for index, brick in enumerate(replicate_bricks):
        if online_bricks.get(brick['path']) != 'y' or brick == good_brick:
            continue

        # 设置BAD 2.x版本，不对仲裁副本设置BAD。因为设置了BAD无法触发修复
        if set_others_bad or not brick['arbiter']:
            logger.info('index: {} try to set bad'.format(index))
            vs_setfattr_2x(brick['host'], brick['path'], file_path, 'trusted.file_status', 'bad')
    
    # 针对好副本，清理BAD，所有副本，清理对好副本的指控
    for index, brick in enumerate(replicate_bricks):
        if online_bricks.get(brick['path']) != 'y':
            continue

        result = vs_getfattr_2x(brick['host'], brick['path'], file_path).split('\n')
        for line in result:
            key_and_val = line.split('=')
            # 好副本，清理BAD
            if brick == good_brick and re.search(r'trusted.file_status', line):
                logger.info('index: {} try to remove bad'.format(index))
                vs_setfattr_2x(brick['host'], brick['path'], file_path, key_and_val[0])

            # 坏副本的仲裁副本，如果有new_entry，需要去掉，防止选择不了源
            if brick != good_brick and brick['arbiter'] and re.search(r'user.glusterfs.new_entry', line):
                logger.info('index: {} try to remove new_entry'.format(index))
                vs_setfattr_2x(brick['host'], brick['path'], file_path, key_and_val[0])

            # 所有副本，清理对好副本的指控
            if re.search(good_client_id, line) and key_and_val[1] != '0x000000000000000000000000':
                logger.info('index: {} try to remove pending'.format(index))
                vs_setfattr_2x(brick['host'], brick['path'], file_path, key_and_val[0], '0x000000000000000000000000')


def enable_replica_changelogs_3x(gfid, good_child_id, client_ids, replicate_bricks, online_bricks):
    if len(client_ids) != len(replicate_bricks) or good_child_id >= len(replicate_bricks):
        err_msg = 'gfid: {}, or client_ids: {} or bricks: {} invalid'.format(gfid, client_ids, replicate_bricks)
        raise CmdError(err_msg)
    
    file_path = '.glusterfs/{}/{}/{}'.format(gfid[0:2], gfid[2:4], gfid)
    good_brick = replicate_bricks[good_child_id]
    good_client_id = client_ids[good_child_id]

    # 针对坏副本，设置BAD
    for index, brick in enumerate(replicate_bricks):
        if online_bricks.get(brick['path']) != 'y' or brick == good_brick:
            continue

        # 只对数据副本设置BAD
        if not brick['arbiter']:
            logger.info('index: {} try to set bad'.format(index))
            vs_setfattr_3x(brick['host'], brick['path'], gfid, 'trusted.file_status', 'bad')
    
    # 针对好副本，清理BAD，所有副本，清理对好副本的指控
    for index, brick in enumerate(replicate_bricks):
        if online_bricks.get(brick['path']) != 'y':
            continue

        if brick['arbiter']:
            result = vs_getfattr_2x(brick['host'], brick['path'], file_path).split('\n')
        else:
            result = vs_getfattr_3x(brick['host'], brick['path'], gfid).split('\n')
        for line in result:
            key_and_val = line.split('=')
            # 好副本，清理BAD
            if brick == good_brick and re.search(r'trusted.file_status', line):
                logger.info('index: {} try to remove bad'.format(index))
                if brick['arbiter']:
                    vs_setfattr_2x(brick['host'], brick['path'], file_path, key_and_val[0])
                else:
                    vs_setfattr_3x(brick['host'], brick['path'], gfid, key_and_val[0])

            # 坏副本，清理对好副本的指控
            if re.search(good_client_id, line) and key_and_val[1] != '0x000000000000000000000000':
                logger.info('index: {} try to remove pending'.format(index))
                if brick['arbiter']:
                    vs_setfattr_2x(brick['host'], brick['path'], file_path, key_and_val[0], '0x000000000000000000000000')
                else:
                    vs_setfattr_3x(brick['host'], brick['path'], gfid, key_and_val[0], '0x000000000000000000000000')



def get_shard_gfid_and_route(host, brick_path, file_path):
    gfid = ''
    route = {}
    cmdline = '{} -d -m. -e hex "{}"'.format(s_getfattr_bin, os.path.join(brick_path, file_path))
    result = remote_cli(host, cmdline, True)
    for line in result:
        if line.split('=')[0] == 'trusted.gfid':
            gfid_hex = line.split('=')[1]
            gfid = '{}-{}-{}-{}-{}'.format(gfid_hex[2:10], gfid_hex[10:14], gfid_hex[14:18], gfid_hex[18:22], gfid_hex[22:34])
        if line.split('=')[0] == 'trusted.route.info':
            route_hex = line.split('=')[1]
            cmdline = '/sf/vs/sbin/vs_route_tool --cmd=parse --rtstr={}'.format(route_hex)
            result = cli(cmdline, False)
            route = json.loads(result.decode('utf-8').strip())

    if gfid and route:
        return {'gfid': gfid, 'route': route}
    return {}


# 获取一个元数据brick的分片列表
def get_file_shards_by_meta_brick(host, brick_path, file_path):
    shards = []
    # 使用getfattr命令获取首分片的gfid与route
    first_shard = get_shard_gfid_and_route(host, brick_path, file_path)
    if not first_shard:
        logger.error('failed to get first_shard, file_path: {}'.format(file_path))
        return []
    first_shard['shard_idx'] = 0  # 首分片shard_idx为0
    first_shard['path'] = file_path
    shards.append(first_shard)

    # 获取其它分片的gfid与route
    shard_files = []
    local_shard_dir = os.path.join(brick_path, '.vs/shard', first_shard['gfid'][0:2], first_shard['gfid'])
    try:
        cmdline = '/bin/ls {}'.format(local_shard_dir)
        # common.logger.info('try to host: {}, cmdline: {}'.format(brick['host'], cmdline))
        result = remote_cli(host, cmdline, True)
        for line in result:
            if line.split('.')[0] == first_shard['gfid']:
                shard_files.append(line.strip())
    except CmdError as e:
        if 'No such file or directory' in str(e):
            logger.warn('got except: {}'.format(str(e)))
            # 环境条带为1场景，可能不存在分片目录
            return shards
        else:
            raise

    # 只存在首分片
    if not shard_files:
        return shards

    # 对分片名进行排序
    sorted_shard_files = sorted(shard_files, key=lambda x: int(x.split('.')[1]))
    # common.logger.info('success to get sorted_shard_files: {}'.format(sorted_shard_files))
    for shard_file in sorted_shard_files:
        shard_path = '.vs/shard/{}/{}/{}'.format(first_shard['gfid'][0:2], first_shard['gfid'], shard_file)
        shard = get_shard_gfid_and_route(host, brick_path, shard_path)
        if not shard:
            logger.error('failed to get gfid, shard_path: {}'.format(shard_path))
            return []
        shard['shard_idx'] = int(shard_file.split('.')[1])  # 保存shard_idx
        shard['path'] = shard_path
        shards.append(shard)
    return shards

# 获取文件对应的所有分片与分片对应的路由
def get_file_shards_3x(file_path, replicate, online_bricks):
    for brick in replicate[LEFT_TREE_INDEX]:
        if online_bricks.get(brick['path']) == 'y':
            shards = get_file_shards_by_meta_brick(brick['host'], brick['path'], file_path)
            if not shards:
                # 没有找到分片列表，尝试从下一个元数据brick里面找
                continue
            else:
                # 找到了分片列表，可以返回
                return shards
    return []


def check_brick_2x(exclude_brick_id, replicate_bricks):
    # 除了排除副本以外，文件不能有BAD
    for brick in replicate_bricks:
        if brick['id'] == exclude_brick_id:
            continue

        cmdline = 'for file in $(/usr/bin/find {} -path {}/.glusterfs -prune -o -print);' \
            'do {} -d -m. -e hex "$file" | /bin/grep -q trusted.file_status;' \
            'if [ $? -eq 0 ]; then echo "$file";break;fi; done'.format(brick['path'], brick['path'], s_getfattr_bin)
        logger.info('try to host: {}, cmdline: {}'.format(brick['host'], cmdline))
        result = remote_cli(brick['host'], cmdline, True)
        for line in result:
            # 匹配到了输入文件，说明找到BAD
            if line and line.startswith('/sf/data/vs/local'):
                logger.error('host: {}, file: {} has BAD'.format(brick['host'], line))
                return False

    # 除了排除副本以外，文件副本不能被指控
    for brick in replicate_bricks:
        cmdline = 'for file in $(/usr/bin/find {} -path {}/.glusterfs -prune -o -print);' \
            'do {} -d -m. -e hex "$file" | /bin/grep vs_vol_rep2-client | ' \
            '/bin/grep -v 0x000000000000000000000000 | /bin/grep -v vs_vol_rep2-client-{} | /bin/grep -q vs_vol_rep2-client; ' \
            'if [ $? -eq 0 ]; then echo "$file";break;fi; done'.format(brick['path'], brick['path'], s_getfattr_bin, exclude_brick_id - 1)
        logger.info('try to host: {}, cmdline: {}'.format(brick['host'], cmdline))
        result = remote_cli(brick['host'], cmdline, True)
        for line in result:
            # 匹配到了输入文件，说明找到指控
            if line and line.startswith('/sf/data/vs/local'):
                logger.error('host: {}, file: {} has changelog'.format(brick['host'], line))
                return False
        
    return True


def only_support_3x(func):
    @wraps(func)
    def judge_3x():
        if not is_vs3x():
            sys.exit("此功能只支持vs3.x版本！")
        return func

    return judge_3x


def only_support_2x(func):
    @wraps(func)
    def judge_2x():
        if not is_vs2x():
            sys.exit("此功能只支持vs2.x版本！")
        return func

    return judge_2x


def only_support_2host(func):
    @wraps(func)
    def judge_2host():
        try:
            # 执行 gluster v i 命令并获取输出
            output = subprocess.check_output("gluster v i", shell=True)

            # 处理输出，提取主机名
            data_hosts = set()
            for line in output.splitlines():
                if " host-" in line:
                    host = line.split()[1].split(':')[0]
                    data_hosts.add(host)

            data_host_num = len(data_hosts)

            if data_host_num != 2:
                log_err_msg = "数据节点({})不是2主机!".format(
                        ' '.join(data_hosts))
                print(log_err_msg)
                sys.exit(1)

        except subprocess.CalledProcessError as e:
            print("执行命令失败: {}".format(e))
            sys.exit(1)
        return func

    return judge_2host


def _get_host_name_id():
    try:
        cmd = 'hostname'
        hostname = subprocess.check_output(cmd, shell=True)
        host_list = hostname.split('-')
        return host_list[1].strip('\n')
    except Exception as e:
        print ('----------!!!error :: get_host_name_id failed!!!----------')
        raise


def _is_localhost_exist_device(scsi_id):
    for f in os.listdir(DISK_CONF):
        if scsi_id in f:
            return True
    return False


def _get_disk_status(scis_id):
    host_id = _get_host_name_id()
    disk_path = DISK_CONF + host_id + '_' + scis_id + '.json'

    content = config.json_load(disk_path)
    return content['status']


def _print_disk_info(scis_id):
    host_id = _get_host_name_id()
    disk_path = DISK_CONF + host_id + '_' + scis_id + '.json'
    content = config.json_load(disk_path)
    print(content)


def is_valid_uuid(uuid_string):
    # 定义 UUID 的正则表达式
    uuid_regex = re.compile(
            r'^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'
    )
    # 使用正则表达式匹配输入的 UUID
    return bool(uuid_regex.match(uuid_string))


class RangeSet(object):
    u"""
    区间集合工具类, 将多个区间段抽象成一个集合. 类似于 set, 支持 intersection, union
    等集合方法. 与 set 的主要区别为:
        * RangeSet 的元素必须能够表示为整数, 可与整数进行加减运算
        * RangeSet 内部只会记录每个区间的起始值, 中间的元素不会被展开
        * RangeSet 内部进行了排序, 可使用索引取值
        * RangeSet difference, symmetric_difference 等方法目前未用到, 暂未实现
    """

    def __init__(self, range_tuples):
        u"""
        初始化
        :param range_tuples: 各区间的起止点列表, 起止点使用元组形式传入.
            左开区间, 右闭区间. 如: [(3, 5), (10, 12)] 代表区间集合:
            [3, 5) U [10, 12)
        """
        self._ranges = range_tuples
        self._sort_merge_ranges()

    def __len__(self):
        u"""返回长度"""
        length = 0
        for start, stop in self._ranges:
            length += stop - start

        # length 可能并未int类型, 可能会是元素类型.
        # len() 应该返回int类型, 故这里转换一下.
        return int(length)

    def __getitem__(self, index):
        u"""
        通过索引取值
        :param index: 索引
        :return: 对应索引的数值
        """
        length = 0
        if index < 0:
            # 索引负数, 从后往前数
            index = -1 - index
            for start, stop in reversed(self._ranges):
                # 从右往左数, 截止此区间, 一共包含的元素个数
                length += int(stop - start)

                if index < length:
                    return start + length - index - 1

            raise IndexError("%s is out of range %s" % (-1 - index, self))
        else:
            for start, stop in self._ranges:
                # 从左往右数, 截止此区间, 一共包含的元素个数
                length += int(stop - start)

                if index < length:
                    return stop + index - length

            raise IndexError("%s is out of range %s" % (index, self))

    def __contains__(self, item):
        u"""
        判断某元素是否在区间集合范围内
        :param item: 待判断的元素
        :return: bool
        """
        for start, stop in self._ranges:
            if start <= item < stop:
                return True

        return False

    def __iter__(self):
        u"""返回迭代元素"""
        for start, stop in self._ranges:
            # 这里不用range或xrange函数, 避免元素被转为int.
            # 使用while循环与加法运算代替.
            value = start
            while value < stop:
                yield value
                # 这里不能用'+='运算符, 因为元素可能是可变对象,
                # '+='会导致start被修改
                value = value + 1

    def __and__(self, another):
        u"""
        a & b, 两个区间集合取交集
        :param another: 另一个区间集合
        :return: 交集
        """
        return self._intersect_once(another)

    def __or__(self, another):
        u"""
        a | b, 两个区间集合取并集
        :param another: 另一个区间集合
        :return: 并集
        """
        return self._union_once(another)

    def __str__(self):
        u"""返回供显示用的字符串"""
        range_str_list = []
        for start, stop in self._ranges:
            if stop == start + 1:
                range_str_list.append(str(start))
            else:
                range_str_list.append('-'.join([str(start), str(stop - 1)]))

        return ','.join(range_str_list)

    def __repr__(self):
        u"""返回供构造用的字符串"""
        return "%s(%s)" % (self.__class__.__name__, repr(self._ranges))

    def __eq__(self, other):
        u"""判断两个区间集合是否相等"""
        return self._ranges == other._ranges

    @property
    def ranges(self):
        u"""返回内部区间列表"""
        return copy.deepcopy(self._ranges)

    def add(self, elem):
        u"""
        区间集合添加元素
        :param elem: 待添加元素
        :return: None
        """
        self._ranges.append((elem, elem + 1))
        self._sort_merge_ranges()

    def remove(self, elem):
        u"""
        区间集合删除元素
        :param elem: 待删除元素
        :return: None
        """
        for index, (start, stop) in enumerate(self._ranges):
            if start <= elem < stop:
                self._ranges.pop(index)

                # 添加被删除元素的右侧区间
                if elem + 1 < stop:
                    self._ranges.insert(index, (elem + 1, stop))

                # 添加被删除元素的左侧区间
                if elem > start:
                    self._ranges.insert(index, (start, elem))

                return

        raise ValueError("%s is not in range %s" % (elem, self))

    def union(self, *others):
        u"""
        区间集合取并集
        :param others: 其它区间集合
        :return: 并集
        """
        result = self
        for other in others:
            result = result._union_once(other)

        return result

    def intersection(self, *others):
        u"""
        区间集合取交集
        :param others: 其它区间集合
        :return: 交集
        """
        result = self
        for other in others:
            result = result._intersect_once(other)

        return result

    def _intersect_once(self, another):
        u"""
        区间集合取交集
        :param another: 另一个区间集合
        :return: 交集
        """
        range_list = []

        for self_start, self_stop in self._ranges:
            for another_start, another_stop in another._ranges:
                start = max(self_start, another_start)
                stop = min(self_stop, another_stop)

                if stop > start:
                    range_list.append((start, stop))

        return RangeSet(range_list)

    def _union_once(self, another):
        u"""
        区间集合取并集
        :param another: 另一个区间集合
        :return: 并集
        """
        range_list = self._ranges[:]
        range_list.extend(another._ranges)

        return RangeSet(range_list)

    def _sort_merge_ranges(self):
        u"""
        排序及合并重合的区间
        :return: None
        """
        range_list = []

        for start, stop in sorted(self._ranges):
            if not range_list:
                # 第一个区间, 直接添加
                range_list.append((start, stop))
                continue

            last_start, last_stop = range_list[-1]
            if start <= last_stop:
                # 此区间起点在上一区间内, 需要合并
                range_list[-1] = (last_start, max(last_stop, stop))
            else:
                # 区间未相交, 直接添加
                range_list.append((start, stop))

        self._ranges = range_list
