#!/sf/vs/bin/python
# -*- coding:utf-8 -*-

"""
## tier坏道修复工具
"""
import os
import re
import sys
import time
import json
import uuid
import stat
import gzip
import socket
import struct
import traceback
import pylib.utils.utiltools as common

TIER_V1 = "0x10000"
TIER_V2 = "0x20000"
TIER_V3 = "0x30000"
TIER_SB_OFF = 4096
TIER_SB_RSIZE = 131072 #超级块实际不会超过4K，但v1、v2加载超级块都是读128K，故128K区域都认为是超级块
TIER_META_UNIT = 4096
CACHE_CONF = '/sf/cfg/vs/cache/wcache.json'
TIER_CONF = '/sf/cfg/vs/cache/tier.json'
TFS_OPLOG = '/sf/log/vs/tierd/tfs-oplog.log'
TIER_INODE_EXP_MAGIC = "0x4558"
TIER_INODE_EXP_BLOCK = 100 * 1024 * 1024
TIER_INODE_EXP_RESERVE = 1024 * 1024
REPAIR_TOOL_WORK_DIR = os.path.dirname(__file__)
TIER_REPAIR_TOOL = os.path.join(REPAIR_TOOL_WORK_DIR, 'vs_tier_repair')
TFS_REPAIR_TOOL = '/sf/vs/sbin/tfs_repair_tool'
s_tier_online = True # 分层是否在线(SSD元数据是否已经在内存中缓存)
s_tier_version = '' # 分层版本号
s_tier_superblock = {} # 分层的超级块

s_tier_force_offline = False # 调试开关, 是否强行设置分层离线，便于构造分层离线场景测试
s_tier_flush_meta = True # 调试开关，模拟690版本不支持flush meta修复

def tier_send_request(data):
    server_address = '/run/vs/tier_cmd_sock'
    sockfd = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
    try:
        sockfd.connect(server_address)
        sockfd.sendall(data)
    except Exception as e:
        return {'ret' : -1, 'msg' : str(e)}
    
    response, address = sockfd.recvfrom(4096)
    sockfd.close()

    # 将响应解析成字典返回：{'ret': -1, ' msg': 'magic error:0xbab4'}
    dictresp ={}
    resp = response.strip('{}').split(',', 1)
    #print resp

    for res in resp:
        line = res.split(':', 1)
        value = line[1].strip(' \"\"')
        if len(line) > 2:
            value += ':' + line[2].strip(' \"\"')
        dictresp[line[0].strip('\' ')] = value
    dictresp['ret'] = int(dictresp['ret'])
    return dictresp

def tier_do_command(cmd, arg):
    # 按照固定格式封装命令
    assert cmd == "dump" or cmd == "file"
    
    with common.VsfireFlock('/var/lock/vs_tier_cli.lock', True, True) as lock:
        if arg:
            cmdarg = 'tier.' + cmd + ':' + arg + ';'
        else:
            cmdarg = 'tier.' + cmd + ':' + ';'
        
        data = struct.pack('<HH%ds' % len(cmdarg), 0xB4BA, len(cmdarg), cmdarg)
        ret = tier_send_request(data)
        if ret['ret'] == 0:
            # 返回的数据内容记录在文件中
            rsp_file = ret['msg']
            bak_file = '{}.{}_repair'.format(rsp_file, common.VSFIRE_MAGIC)
            # 将返回的文件改名，防止解锁后出现并发问题
            os.rename(rsp_file, bak_file)
            ret['msg'] = bak_file
        return ret

def tier_dump_meta_online(ssd_uuid, type):
    arg = ''
    if type != 'superblock':
        arg = '-a {}'.format(type)
    
    ret = tier_do_command('dump', arg)
    if ret['ret'] != 0:
        common.logger.error('arg: {} dump failed: {}'.format(arg, ret['msg']))
        return None
    rsp_file = ret['msg']
    dump_objs = json.load(file(rsp_file))
    os.remove(rsp_file)
    for dump_obj in dump_objs['ssd']:
        if dump_obj['ssd_uuid'] == ssd_uuid:
            return dump_obj
    return None

def json_dump(path, obj, indent=None):
    """
    将json对象内容写入配置
    """
    bakfile = path + '.bak'
    with open(bakfile, 'w') as f:
        if not indent:
            json.dump(obj, f, None)
        else:
            json.dump(obj, f, indent=indent, ensure_ascii=False, separators=(',', ': '))
    os.rename(bakfile, path)

def get_expect_tier_version():
    version = common.get_vs_version()
    expect_tier_version = TIER_V1
    if common.vs_is_eds():
        if version >= common.VS_VERSION_3_0_3:
            expect_tier_version = TIER_V2
    else:
        if version <= common.VS_VERSION_3_0_3:
            expect_tier_version = TIER_V1
        elif version < common.VS_VERSION_3_6:
            expect_tier_version = TIER_V2
        else:
            with open(TIER_CONF, 'r') as f:
                tier_conf = json.load(f)
                expect_tier_version = TIER_V2 if tier_conf['tier_version'] == '2.0' else TIER_V3
    return expect_tier_version

def get_tier_superblock_online(ssd_uuid):
    return tier_dump_meta_online(ssd_uuid, 'superblock')

def get_tier_superblock_offline(ssd_uuid):
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cmdline = '{} -p {} -c dump_super'.format(TIER_REPAIR_TOOL, tier_dev)
    result = common.cli(cmdline, False)
    ssd_info = json.loads(result.decode('utf-8').strip())
    return ssd_info

def get_tier_superblock_backup(ssd_uuid):
    tier_dev = '/sf/cfg/vs/cache/{}'.format(ssd_uuid)
    cmdline = '{} -p {} -c dump_super'.format(TIER_REPAIR_TOOL, tier_dev)
    result = common.cli(cmdline, False)
    superblock = json.loads(result.decode('utf-8').strip())
    if superblock['ssd_uuid'] != ssd_uuid:
        common.logger.error('backup ssd: {} not match current ssd: {}'.format(superblock['ssd_uuid'], ssd_uuid))
        return {}
    
    # V3版本的备份超级块，可以直接使用
    if superblock['version'] == TIER_V3:
        return superblock
    
    # HCI6110版本，超级块启用了inode扩展区间，备份超级块没有，说明不能直接使用
    if superblock['version'] == TIER_V2 and \
            common.get_vs_version() >= common.VS_VERSION_3_8 and \
            superblock.get('exp_inode_magic') != TIER_INODE_EXP_MAGIC:
        common.logger.error('vs3.8 has not exp_inode_magic, backup superblock maybe too old')
        return {}
    
    # 获取备份的超级块，不能直接使用，分层可能发生格式转化
    tier_version_expect = get_expect_tier_version()
    if tier_version_expect == superblock['version']:
        return superblock
    
    # 如果备份的超级块是V1版本，预期是V2版本, 说明发生过版本转换, 需要处理转换
    if superblock['version'] != TIER_V1 or tier_version_expect != TIER_V2:
        common.logger.error('tier_version_expect: {}, version: {}'.format(tier_version_expect, superblock['version']))
        return {}
    
    # 将V1版本转化成V2版本
    assert tier_version_expect == TIER_V2
    repair_conf = '{}/{}_convert_sb.json'.format(REPAIR_TOOL_WORK_DIR, common.VSFIRE_MAGIC)
    json_dump(repair_conf, superblock)
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cmdline = '{} -p {} -c convert_super -f {} -v 2'.format(TIER_REPAIR_TOOL, tier_dev, repair_conf)
    common.logger.info('try to cmdline: {}'.format(cmdline))
    result = common.cli(cmdline, False)
    superblock = json.loads(result.decode('utf-8').strip())
    return superblock

def get_tier_brickinfo_online(ssd_uuid):
    return tier_dump_meta_online(ssd_uuid, 'brickinfo')

def get_tier_brickinfo_offline(ssd_uuid, brick_no=-1):
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    if brick_no == -1:
        cmdline = '{} -p {} -c dump_brick -g "use=1"'.format(TIER_REPAIR_TOOL, tier_dev)
    else:
        cmdline = '{} -p {} -c dump_brick -n {}'.format(TIER_REPAIR_TOOL, tier_dev, brick_no)
    result = common.cli(cmdline, False)
    bricks_info = json.loads(result.decode('utf-8').strip())
    return bricks_info

def get_tier_inode_online(ssd_uuid):
    return tier_dump_meta_online(ssd_uuid, 'inode')

def get_tier_inode_offline(ssd_uuid, inode_no=-1):
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    if inode_no == -1:
        cmdline = '{} -p {} -c dump_inode -g "use=1"'.format(TIER_REPAIR_TOOL, tier_dev)
    else:
        cmdline = '{} -p {} -c dump_inode -n {}'.format(TIER_REPAIR_TOOL, tier_dev, inode_no)
    result = common.cli(cmdline, False)
    inodes_info = json.loads(result.decode('utf-8').strip())
    return inodes_info

def get_tier_extent_offline(ssd_uuid, extent_no=-1, inode_no=-1):
    # 不允许同时设置inode_no和extent_no
    assert extent_no >= 0 or inode_no >= 0
    
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cmdline = ''
    if inode_no >= 0:
        cmdline = '{} -p {} -c dump_extent -g "owner={}"'.format(TIER_REPAIR_TOOL, tier_dev, inode_no)
    if extent_no >= 0:
        if s_tier_version == TIER_V3:
            cmdline = '{} -p {} -c dump_extent -n {} -g "use=1"'.format(TIER_REPAIR_TOOL, tier_dev, extent_no)
        else:
            cmdline = '{} -p {} -c dump_extent -n {}'.format(TIER_REPAIR_TOOL, tier_dev, extent_no)
    assert cmdline != ''
    
    result = common.cli(cmdline, False)
    extents_info = json.loads(result.decode('utf-8').strip())
    return extents_info

def get_tier_shard_offline(ssd_uuid, shard_no=-1):
    assert shard_no >= 0
    
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cmdline = ''
    if shard_no >= 0:
        cmdline = '{} -p {} -c dump_shard -n {} -g "use=1"'.format(TIER_REPAIR_TOOL, tier_dev, shard_no)
    assert cmdline != ''
    
    result = common.cli(cmdline, False)
    shards_info = json.loads(result.decode('utf-8').strip())
    return shards_info

def get_tier_hot_offline(ssd_uuid, hot_no=-1):
    assert hot_no >= 0
    
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cmdline = ''
    if hot_no >= 0:
        cmdline = '{} -p {} -c dump_hot -n {}'.format(TIER_REPAIR_TOOL, tier_dev, hot_no)
    assert cmdline != ''
    
    result = common.cli(cmdline, False)
    hots_info = json.loads(result.decode('utf-8').strip())
    return hots_info

def get_tier_brickinfo_backup(ssd_uuid, brick_no):
    volume_name, hosts, replicate_num, has_arbiter, bricks, replicate = common.get_vs_cluster_info()
    if not volume_name:
        common.logger.error('failed to supported, cannot find volume name')
        return {}
    
    cur_cache_bricks = []
    all_cache_bricks = []
    glusterfs_bricks = []
    localhost = socket.gethostname()

    # 获取所有的缓存盘配制中的brick
    if not os.path.exists(CACHE_CONF) or os.path.getsize(CACHE_CONF) == 0:
        common.logger.error('cache conf: {} invalid'.format(CACHE_CONF))
        return {}
    
    with open(CACHE_CONF, 'r') as f:
        cache_data = json.load(f)
        if not cache_data.get('maps'):
            common.logger.error('failed to get maps in cache_data: {}'.format(cache_data))
            return ''
        
        for cache in cache_data['maps']:
            for brick in cache['bricks']:
                if ssd_uuid in cache['uuid']:
                    cur_cache_bricks.append(brick['brickId'])
                all_cache_bricks.append(brick['brickId'])
    
    # 获取所有图配制中的brick
    for brick in bricks.get(localhost):
        if not brick['arbiter'] and '-meta/' not in brick['path']:
            glusterfs_bricks.append(brick['path'])
    
    # 卷配制与缓存配制信息不相等，不能直接使用(FIXME复合卷场景不能够修复)
    if sorted(glusterfs_bricks) != sorted(all_cache_bricks):
        common.logger.error('glusterfs_bricks: {}, all_cache_bricks: {} not equal'.format(glusterfs_bricks, all_cache_bricks))
        return {}
    
    error_bricks = []
    normal_bricks = []
    bricks_info = get_tier_brickinfo_offline(ssd_uuid)
    for brick_info in bricks_info:
        if 'error' in brick_info:
            if brick_info['bi_no'] == brick_no:
                error_bricks.append(brick_info['bi_no'])
        else:
            normal_bricks.append(brick_info['bi_brickid'])

    not_found_brick = [b for b in cur_cache_bricks if b not in normal_bricks]
    
    # 只能支持一个brick有坏道损坏
    if len(not_found_brick) != 1 or len(error_bricks) != 1:
        common.logger.error('not_found_brick: {}, error_bricks: {} not equal'.format(not_found_brick, error_bricks))
        return {}
    
    brick_info = {'bi_use': 1,
                  'bi_no': brick_no,
                  'bi_uuid': str(uuid.uuid4()),
                  'bi_brickid': not_found_brick[0],
                  'bi_unlink': 0}
    return brick_info

# 提示使用者，手动构造修复数据
def check_and_build_repair_object(meta_type, no):
    if meta_type not in ['superblock', 'brick', 'inode', 'shard', 'extent']:
        return None
    
    repair_object = {}
    if meta_type == 'inode':
        repair_object["i_no"] = no
        repair_object["i_use"] = 1
        repair_object["i_atime"] = 0
        repair_object["i_ctime"] = 0
        repair_object["i_mtime"] = 1000000000 * int(time.time()) # 设置为当前时间
        repair_object["i_dtime"] = 0
        repair_object["i_priority"] = 2
        repair_object["i_unlink"] = 0

        repair_object["i_size"] = '[填写]分片大小'
        repair_object["i_uuid"] = '[填写]分片GFID'
        repair_object["i_brick_no"] = '[填写]所属brick号'
    elif meta_type == 'shard':
        shard = {}
        shards = []

        sno_start = no * 512 + 1
        sno_end = sno_start + 511
        shard['sno'] = '[填写]shard编号[{}, {})'.format(sno_start, sno_end)
        shard['ino'] = '[填写]所属inode号'
        shard['index'] = '[填写]shard分片偏移'
        shards.append(shard)
        shards.append(shard)
        repair_object['shards'] = shards
        repair_object['meta_offset'] = s_tier_superblock['shard_table_offset'] + no * TIER_META_UNIT
    
    if not repair_object:
        return None
    
    repair_conf = '{}/{}_repair_object.json'.format(REPAIR_TOOL_WORK_DIR, common.VSFIRE_MAGIC)
    json_dump(repair_conf, repair_object, 4)
    readline = '修复类型: {}, 修复文件: {}, 请修改该修复文件后, 输入\'y\'继续，\'n\'退出'.format(meta_type, repair_conf)
    common.check_terminal_input(readline)
    
    # 重新读取修复后的JSON文件
    repair_object = json.load(file(repair_conf))
    os.remove(repair_conf)
    return repair_object

# 判断分层是否支持flush元数据功能
def tier_has_flush_meta(is_online, vs_version, tier_version):
    # 当时只有VS370以上的V3版本分层才支持
    if s_tier_flush_meta and is_online and vs_version >= common.VS_VERSION_3_7 and tier_version == TIER_V3:
        return True
    return False

def tier_superblock_repair(ssd_uuid):
    # 校验是否存在坏道
    # if common.s_check_badblock:
    #     try:
    #         superblock = get_tier_superblock_offline(ssd_uuid)
    #         common.logger.info('[check badblocks], superblock: {}'.format(json.dumps(superblock, indent=4)))
    #         return 0
    #     except common.CmdError as e:
    #         common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    
    is_online = True
    superblock = None
    vs_version = common.get_vs_version()
    # VS360版本，不在线获取超级块
    if not s_tier_force_offline and s_tier_flush_meta and vs_version != common.VS_VERSION_3_6:
        superblock = get_tier_superblock_online(ssd_uuid)
    if not superblock:
        # 无法在线获取超级块，尝试从备份配制中获取
        is_online = False
        superblock = get_tier_superblock_backup(ssd_uuid)

    if not superblock:
        common.logger.error('failed to get superblock')
        return -1
    
    if superblock['version'] not in [TIER_V1, TIER_V2, TIER_V3]:
        common.logger.error('get superblock version: {} invalid'.format(superblock['version']))
        return -1
    if superblock['version'] == TIER_V1:
        version = 1
    elif superblock['version'] == TIER_V2:
        version = 2
    else:
        version = 3

    # V3版本，在线场景下，通过flush_meta命令修复
    if tier_has_flush_meta(is_online, vs_version, superblock['version']):
        tfs_metadata_repair_online(ssd_uuid)
        time.sleep(5) # 等待5秒，flush_meta完成修复
        return 0
    
    # 修复超级块   
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    repair_conf = '{}/{}_repair_sb.json'.format(REPAIR_TOOL_WORK_DIR, common.VSFIRE_MAGIC)
    json_dump(repair_conf, superblock)
    cmdline = '{} -p {} -c repair_super -f {} -v {} -y'.format(TIER_REPAIR_TOOL, tier_dev, repair_conf, version)
    common.logger.info('try to cmdline: {}, repair_conf: {}'.format(cmdline, json.dumps(superblock, indent=4)))
    
    # 提示是否修复
    readline = '是否执行分层: {} [{}], superblock区间坏道修复, 修复文件: {}, 修复数据: \n{}, 输入\'y\'继续，\'n\'退出'.format(
        ssd_uuid, '在线' if is_online else '离线', repair_conf, json.dumps(superblock, indent=4))
    common.check_terminal_input(readline)
    
    common.cli(cmdline, False)
    os.remove(repair_conf)

    # 再次校验，确认坏道是否修复成功
    try:
        superblock = get_tier_superblock_offline(ssd_uuid)
        common.logger.info('[check badblocks], superblock: {}'.format(json.dumps(superblock, indent=4)))
        return 0
    except common.CmdError as e:
        common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    return -1

def tier_reserved_repair(ssd_uuid, badblock_offset):
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    # 校验是否存在坏道
    if common.s_check_badblock:
        try:
            cmdline = '/bin/dd if={} of=/dev/null bs={} count=1 skip={} iflag=direct conv=notrunc 2>/dev/null'.format(
                tier_dev, TIER_META_UNIT, badblock_offset/TIER_META_UNIT)
            common.cli(cmdline, False)
            common.logger.info('[check badblocks], badblock_offset: {}'.format(badblock_offset))
            return 0
        except common.CmdError as e:
            common.logger.warn('[check badblocks], except: {}'.format(str(e)))

    # 写0，修复坏道
    cmdline = '/bin/dd if=/dev/zero of={} bs={} count=1 seek={} oflag=direct conv=notrunc 2>/dev/null'.format(
        tier_dev, TIER_META_UNIT, badblock_offset/TIER_META_UNIT)
    common.logger.info('try to cmdline: {}'.format(cmdline))
    common.cli(cmdline, False)

    # 再次校验，确认坏道是否修复成功
    try:
        cmdline = '/bin/dd if={} of=/dev/null bs={} count=1 skip={} iflag=direct conv=notrunc 2>/dev/null'.format(
            tier_dev, TIER_META_UNIT, badblock_offset/TIER_META_UNIT)
        common.cli(cmdline, False)
        common.logger.info('[check badblocks], badblock_offset: {}'.format(badblock_offset))
        return 0
    except common.CmdError as e:
        common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    return -1

def tier_brick_repair(ssd_uuid, brick_no):
    # 校验是否存在坏道
    if common.s_check_badblock:
        try:
            brick_info = get_tier_brickinfo_offline(ssd_uuid, brick_no)
            common.logger.info('[check badblocks], brick_info: {}'.format(json.dumps(brick_info, indent=4)))
            return 0
        except common.CmdError as e:
            common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    
    if s_tier_online:
        # 在线修复
        online_bricks = get_tier_brickinfo_online(ssd_uuid)
        if not online_bricks:
            common.logger.error('failed to get brick_repair from online')
            return -1
        brick_repair = {}
        for online_brick in online_bricks.get('brick'):
            if online_brick['bi_no'] != brick_no:
                continue
            if s_tier_version == TIER_V3:
                # 只有690版本会进入当前流程，V3版本dump brickinfo有BUG，获取不到UUID
                brick_repair = {'bi_use': 1,
                            'bi_no': brick_no,
                            'bi_uuid': str(uuid.uuid4()),
                            'bi_brickid': online_brick['bi_brickid'],
                            'bi_unlink': int(online_brick['bi_unlink'])}
            else:
                brick_repair = {'bi_use': 1,
                            'bi_no': brick_no,
                            'bi_uuid': online_brick['bi_uuid'],
                            'bi_brickid': online_brick['bi_brickid'],
                            'bi_unlink': int(online_brick['bi_unlink'])}
            break
        if not brick_repair:
            brick_repair = {'bi_use': 0,
                            'bi_no': brick_no}
    else:
        # 离线修复
        brick_repair = get_tier_brickinfo_backup(ssd_uuid, brick_no)
        if not brick_repair:
            common.logger.error('failed to get brick_repair from backup')
            return -1
    
    # 执行修复
    assert brick_repair
    repair_conf = '{}/{}_repair_brick.json'.format(REPAIR_TOOL_WORK_DIR, common.VSFIRE_MAGIC)
    json_dump(repair_conf, brick_repair)
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cmdline = '{} -p {} -c repair_brick -f {} -y'.format(TIER_REPAIR_TOOL, tier_dev, repair_conf)
    common.logger.info('try to cmdline: {}, repair_conf: {}'.format(cmdline, json.dumps(brick_repair, indent=4)))
    
    # 提示是否修复
    readline = '是否执行分层: {} [{}], brick区间坏道修复, 修复文件: {}, 修复数据: \n{}, 输入\'y\'继续，\'n\'退出'.format(
        ssd_uuid, '在线' if s_tier_online else '离线', repair_conf, json.dumps(brick_repair, indent=4))
    common.check_terminal_input(readline)
    
    common.cli(cmdline, False)
    os.remove(repair_conf)
    
    # 再次校验是否存在坏道
    try:
        brick_info = get_tier_brickinfo_offline(ssd_uuid, brick_no)
        common.logger.info('[check badblocks], brick_info: {}'.format(json.dumps(brick_info, indent=4)))
        return 0
    except common.CmdError as e:
        common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    return -1

# 基于GFID前4字节，从本地挂载点上，找到完整的GFID
def get_gfid_from_localpath(brick_id, gfid_prefix):
    gfids = []
    brick_path = '{}/.glusterfs/{}/{}'.format(brick_id, gfid_prefix[0:2], gfid_prefix[2:4])
    if os.path.exists(brick_path):
        files = os.listdir(brick_path)
        for file in files:
            # 校验是GFID格式
            if not common.check_str_is_gfid(file):
                continue
            file_path = os.path.join(brick_path, file)
            # 找到的一定是文件
            if not os.path.isfile(file_path):
                continue
            file_stat = os.stat(file_path)
            # 找不到不能是T文件
            if not bool(file_stat.st_mode & stat.S_ISVTX):
                gfids.append(file)
    return gfids

# 基于GFID前4字节，从EFS，找到完整的GFID
def get_gfid_from_efs(brick_id, gfid_prefix):
    gfids = []
    cmdline = '/bin/ps aux | grep {} | /bin/grep -v supervise | /bin/grep -v grep || /bin/echo'.format(brick_id)
    # common.logger.info('try to host: {}, cmdline: {}'.format(brick['host'], cmdline))
    result = common.cli(cmdline, False).split()
    if result and len(result) > 1 and result[1].isdigit():
        brick_pid = result[1]
        efs_cmd = 'itable list'
        cmdline = '/sf/vs/sbin/efs_dbg -p {} -c "{}" 2>/dev/null | grep {} || /bin/echo'.format(brick_pid, efs_cmd, gfid_prefix)
        # common.logger.info('try to host: {}, cmdline: {}'.format(brick['host'], cmdline))
        result = common.cli(cmdline, False)
        for line in result.split():
            if common.check_str_is_gfid(line):
                gfids.append(line)
                break
    return gfids

def get_tier_inode_from_file(tfs_oplog, is_gzip, cdev, inode_no):
    inode_repair = {}
    if not is_gzip:
        with open(tfs_oplog, 'r') as f:
            lines = f.readlines()
    else:
        with gzip.open(tfs_oplog, 'rt') as f:
            lines = f.readlines()
    for line in reversed(lines):
        # 不匹配SSD，跳过
        if not re.search(r'\[cdev:{}\]'.format(cdev), line):
            continue
        # 不匹配ino，跳过
        if not re.search(r'\[ino:{}\]'.format(inode_no), line):
            continue
        
        # inode被删除过
        if re.search(r'\:tfs_inode_unlink\]', line):
            common.logger.warn('find tfs_inode_unlink inode_no: {}'.format(inode_no))
            inode_repair["i_no"] = inode_no
            inode_repair["i_use"] = 0
            return inode_repair
        
        # 找到inode创建的记录
        if re.search(r'\:tfs_inode_create\]', line) and re.search(r'\[uuid:([0-9a-f\-]+)\]', line) and re.search(r'\[bno:(\d+)\]', line):
            inode_repair["i_uuid"] = re.search(r'\[uuid:([0-9a-f\-]+)\]', line).group(1)
            inode_repair["i_no"] = inode_no
            inode_repair["i_brick_no"] = int(re.search(r'\[bno:(\d+)\]', line).group(1))
            inode_repair["i_use"] = 1
            inode_repair["i_priority"] = 2
            inode_repair["i_unlink"] = 0
            inode_repair["i_atime"] = 0
            inode_repair["i_ctime"] = 0
            inode_repair["i_dtime"] = 0
            inode_repair["i_mtime"] = 1000000000 * int(time.time()) # 设置为当前时间
            inode_repair["i_size"] = 0

            # 特殊的UUID不能够修复
            if inode_repair["i_uuid"] == '00000000-0000-0000-0000-000000000002':
                common.logger.error('failed to repair uuid: {}'.format(inode_repair["i_uuid"]))
                return {}

            # 通过route层获取文件大小
            import pylib.rpcservice as rpcservice
            file_size = rpcservice.route_get_file_info(inode_repair["i_uuid"]).get('file_size')
            if int(file_size) >= 0:
                inode_repair["i_size"] = int(file_size)
            common.logger.info('find tfs_inode_create inode_no: {}'.format(inode_no))
            return inode_repair
    return {}

def get_tier_inode_from_oplog(ssd_uuid, inode_no):
    inode_repair = {}
    cdev = ssd_uuid.split('-')[0]
    
    # 依次遍历所有的OPLOG文件，获取inode信息
    for i in range(20):
        if i == 0:
            tfs_oplog = TFS_OPLOG
        else:
            tfs_oplog = '{}.{}.gz'.format(TFS_OPLOG, i)
        if not os.path.exists(tfs_oplog):
            common.logger.error('failed to find tfs_oplog: {}'.format(tfs_oplog))
            break
        
        is_gzip = False if i == 0 else True
        inode_repair = get_tier_inode_from_file(tfs_oplog, is_gzip, cdev, inode_no)
        if inode_repair:
            # 获取到信息，退出循环
            break
    return inode_repair

def get_tier_shard_from_file(tfs_oplog, is_gzip, cdev, whole_oplog, shards, shards_no, shards_found_no):
    if not is_gzip:
        with open(tfs_oplog, 'r') as f:
            lines = f.readlines()
    else:
        with gzip.open(tfs_oplog, 'rt') as f:
            lines = f.readlines()
    for line in reversed(lines):
        # 匹配到SSD初始化，说明OPLOG完整
        if re.search(r'\[cdev:/dev/{}'.format(cdev), line) and re.search(r'\:tfs_mkfs_super_block\]', line):
            whole_oplog = True
            continue
        
        # 不匹配SSD，跳过
        if not re.search(r'\[cdev:{}\]'.format(cdev), line):
            continue

        # shard被删除过
        if re.search(r'\:tfs_shard_unlink\]', line) and re.search(r'\[sno:(\d+)\]', line):
            sno = int(re.search(r'\[sno:(\d+)\]', line).group(1))
            if sno in shards_no and sno not in shards_found_no:
                common.logger.warn('find tfs_shard_unlink sno: {}'.format(sno))
                shards_found_no.append(sno)
                continue

        # 找到shard创建的记录
        if re.search(r'\:tfs_shard_create\]', line) and re.search(r'\[sno:(\d+)\]', line):
            sno = int(re.search(r'\[sno:(\d+)\]', line).group(1))
            if sno in shards_no and sno not in shards_found_no:
                shard = {}
                shard['sno'] = sno
                shard['ino'] = int(re.search(r'\[ino:(\d+)\]', line).group(1))
                shard['index'] = int(re.search(r'\[index:(\d+)\]', line).group(1))
                shards.append(shard)
                shards_found_no.append(sno)
                common.logger.info('find tfs_shard_create sno: {}'.format(sno))
    return whole_oplog, shards, shards_no, shards_found_no

def get_tier_shard_from_oplog(ssd_uuid, shard_no):
    shard_repair = {}
    cdev = ssd_uuid.split('-')[0]
    whole_oplog = False
    sno_start = shard_no * 512 + 1
    sno_end = sno_start + 511
    shards_no = [] # 保存需要查找的所有的sno
    shards_found_no = [] # OPLOG中找到的sno
    shards = [] # 保存找到有效的shard
    for sno in range(sno_start, sno_end):
        shards_no.append(sno)
    
    # 依次遍历所有的OPLOG文件，获取shard信息
    for i in range(20):
        if i == 0:
            tfs_oplog = TFS_OPLOG
        else:
            tfs_oplog = '{}.{}.gz'.format(TFS_OPLOG, i)
        if not os.path.exists(tfs_oplog):
            common.logger.error('failed to find tfs_oplog: {}'.format(tfs_oplog))
            break
        
        is_gzip = False if i == 0 else True
        whole_oplog, shards, shards_no, shards_found_no = get_tier_shard_from_file(tfs_oplog, is_gzip, cdev, whole_oplog, shards, shards_no, shards_found_no)

    # 将找到的shards生成修复的JSON对象
    if shards or len(shards_found_no):
        shard_repair['shards'] = shards
        shard_repair['meta_offset'] = s_tier_superblock['shard_table_offset'] + shard_no * TIER_META_UNIT
    
    # 如何找到的OPLOG没有SSD初始化的记录，说明OPLOG日志不完整，提示是否继续修复,继续修复可能导致丢失数据
    if not whole_oplog:
        readline = '找到的shard不完整, 是否继续执行修复数据，继续执行可能会丢失数据: \n{}, 输入\'y\'继续，\'n\'退出'.format(json.dumps(shard_repair, indent=4))
        common.check_terminal_input(readline)
    return shard_repair

def get_tier_inode_from_extent(ssd_uuid, inode_no):
    extents_info = get_tier_extent_offline(ssd_uuid, -1, inode_no)
    if not extents_info:
        # 基于inode_no找不到extent，说明该inode没有数据
        common.logger.warn('failed to find extents by inode_no: {}'.format(inode_no))
        return {"i_no": inode_no, "i_use": 0}
    
    valid_extents = [extent for extent in extents_info if 'error' not in extent]
    if not valid_extents:
        # extents区间都是坏道，应该先修复extent区间
        common.logger.error('failed to find valid extents, pls repair extent first')
        return {}
    
    # 获取te_sampling_low值为GFID的前4字节
    gfid_prefix = valid_extents[0]['te_sampling_low']
    for extent in valid_extents:
        # 每个extent区间的te_sampling_low值应该相同
        assert extent['te_sampling_low'] == gfid_prefix

    # 分别从posix或者EFS中找到前面4字节相同的GFID
    if not os.path.exists(CACHE_CONF) or os.path.getsize(CACHE_CONF) == 0:
        common.logger.error('cache conf: {} invalid'.format(CACHE_CONF))
        return {}
    
    volume_name, hosts, replicate_num, has_arbiter, bricks, replicate = common.get_vs_cluster_info()
    if not volume_name:
        common.logger.error('failed to supported, cannot find volume name')
        return {}
    
    has_efs = True if common.vs_has_efs(common.get_vs_version(), hosts) else False  
    brick_gfids = {}
    with open(CACHE_CONF, 'r') as f:
        cache_data = json.load(f)
        if not cache_data.get('maps'):
            common.logger.error('failed to get maps in cache_data: {}'.format(cache_data))
            return ''
        
        for cache in cache_data['maps']:
            if ssd_uuid not in cache['uuid']:
                continue
            for brick in cache['bricks']:
                gfids = get_gfid_from_localpath(brick['brickId'], gfid_prefix)
                if not gfids and has_efs:
                    gfids = get_gfid_from_efs(brick['brickId'], gfid_prefix)
                
                if gfids:
                    brick_gfids[brick['brickId']] = gfids
        
    # 找到的GFID太多了，不能修坏道
    if len(brick_gfids) != 1:
        common.logger.error('found to many gfids: {}, cannot to repair'.format(brick_gfids))
        return {}
    else:
        common.logger.info('success found to gfid: {}, try to repair'.format(brick_gfids))
    
    gfid = ''
    brick_no = -1
    brick_id, gfids = brick_gfids.items()[0]
    # 找到的GFID太多了，不能修坏道
    if len(gfids) != 1:
        common.logger.error('found to many gfids: {}, cannot to repair'.format(gfids))
        return {}
    gfid = gfids[0]
    bricks_info = get_tier_brickinfo_offline(ssd_uuid)
    for brick_info in bricks_info:
        if brick_info['bi_brickid'] == brick_id:
            brick_no = brick_info['bi_no']
            break
    if not gfid or brick_no == -1:
        common.logger.error('failed to find gfid: {}, brick_no: {}'.format(gfids, brick_no))
        return {}
    
    # 找到gfid后去构造用于repair inode需要的配置文件
    inode_repair = {}
    inode_repair["i_uuid"] = gfid
    inode_repair["i_no"] = inode_no
    inode_repair["i_brick_no"] = brick_no
    inode_repair["i_use"] = 1
    inode_repair["i_atime"] = 0
    inode_repair["i_ctime"] = 0
    inode_repair["i_mtime"] = 1000000000 * int(time.time()) # 设置为当前时间
    inode_repair["i_dtime"] = 0
    inode_repair["i_size"] = 0
    inode_repair["i_priority"] = 2
    inode_repair["i_unlink"] = 0
    return inode_repair


def tier_inode_repair(ssd_uuid, inode_no):
    # 校验是否存在坏道
    if common.s_check_badblock:
        try:
            inode_info = get_tier_inode_offline(ssd_uuid, inode_no)
            common.logger.info('[check badblocks], inode_info: {}'.format(json.dumps(inode_info, indent=4)))
            return 0
        except common.CmdError as e:
            common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    
    inode_repair = {}
    if s_tier_online:
        # 在线修复
        online_inodes = get_tier_inode_online(ssd_uuid)
        if not online_inodes:
            common.logger.error('failed to get inode_repair from online')
            return -1
        # common.logger.info('get_tier_inode_online: {}'.format(json.dumps(online_inodes, indent=4)))

        for online_brick in online_inodes['brick']:
            # 设置过inode_repair可以直接退出
            if inode_repair:
                break
            for inode in online_brick['inodes']:
                if inode['i_no'] != inode_no:
                    continue
                inode_repair['i_uuid'] = inode['uuid']
                inode_repair['i_no'] = inode_no
                inode_repair['i_brick_no'] = online_brick['bi_no']
                inode_repair['i_use'] = 1
                inode_repair['i_atime'] = 1000000000 * int(time.mktime(time.strptime(inode['atime'], "%Y-%m-%d %H:%M:%S")))
                inode_repair['i_ctime'] = 1000000000 * int(time.mktime(time.strptime(inode['ctime'], "%Y-%m-%d %H:%M:%S")))
                inode_repair['i_mtime'] = 1000000000 * int(time.mktime(time.strptime(inode['mtime'], "%Y-%m-%d %H:%M:%S")))
                inode_repair["i_size"] = 0
                inode_repair["i_priority"] = inode['i_priority']
                inode_repair["i_unlink"]   = inode['unlink']
                
                if s_tier_version == TIER_V3:
                    # 特殊的UUID不能够修复
                    if inode_repair["i_uuid"] == '00000000-0000-0000-0000-000000000002':
                        common.logger.error('failed to repair uuid: {}'.format(inode_repair["i_uuid"]))
                        return -1
                    # V3版本需要获取inode size, 通过route层获取文件大小
                    import pylib.rpcservice as rpcservice
                    file_size = rpcservice.route_get_file_info(inode_repair["i_uuid"]).get('file_size')
                    if int(file_size) >= 0:
                        inode_repair["i_size"] = int(file_size)
                break
        
        # 没有找到inode，说明inode已经删除，直接设置未使用
        if not inode_repair:
            common.logger.warn('failed to find inode_no: {}, try to unlink it'.format(inode_no))
            inode_repair = {"i_no": inode_no, "i_use": 0}
    else:
        # 离线修复
        if s_tier_version == TIER_V3:
            # V3版本，尝试从OPLOG中获取
            inode_repair = get_tier_inode_from_oplog(ssd_uuid, inode_no)
            if not inode_repair:
                common.logger.warn('failed to get inode_repair from oplog')
        else:
            # V1/V2版本，尝试自动构造brick信息
            inode_repair = get_tier_inode_from_extent(ssd_uuid, inode_no)
            if not inode_repair:
                common.logger.warn('failed to get inode_repair from backup')

    # 无法自动构造inode, 提示手动写配制文件
    if not inode_repair:
        inode_repair = check_and_build_repair_object('inode', inode_no)
        if not inode_repair:
            common.logger.error('failed to build inode_repair')
            return -1

    # 执行修复
    repair_conf = '{}/{}_repair_inode.json'.format(REPAIR_TOOL_WORK_DIR, common.VSFIRE_MAGIC)
    json_dump(repair_conf, inode_repair)
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cmdline = '{} -p {} -c repair_inode -f {} -y'.format(TIER_REPAIR_TOOL, tier_dev, repair_conf)
    common.logger.info('try to cmdline: {}, repair_conf: {}'.format(cmdline, json.dumps(inode_repair, indent=4)))

    # 提示是否修复
    readline = '是否执行分层: {} [{}], inode区间坏道修复, 修复文件: {}, 修复数据: \n{}, 输入\'y\'继续，\'n\'退出'.format(
        ssd_uuid, '在线' if s_tier_online else '离线', repair_conf, json.dumps(inode_repair, indent=4))
    common.check_terminal_input(readline)
    
    common.cli(cmdline, False)
    os.remove(repair_conf)
    
    # 再次校验是否存在坏道
    try:
        inode_info = get_tier_inode_offline(ssd_uuid, inode_no)
        common.logger.info('[check badblocks], inode_info: {}'.format(json.dumps(inode_info, indent=4)))
        return 0
    except common.CmdError as e:
        common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    return -1

""" v1版本-c file输出格式
size = 4
hit = 0
miss = 0
uniliztion ratio = 0.39%(4096/1048576)
block_cnt = 1
dirty_block_cnt = 1
clean_block_cnt = 0

file_no  block_no data     hot                meta
0        0        4096     0                  1 0 0 0 
"""
def generate_extent_v1(inodes_info, extent_no):
    extent_repair = {}
    for brick in inodes_info['brick']:
        for inode in brick['inodes']:
            arg = 'brick_id={},gfid={}'.format(brick['bi_brickid'], inode['uuid'])
            ret = tier_do_command('file', arg)
            if ret['ret'] != 0:
                err_msg = 'arg: {} failed: {}'.format(arg, ret['msg'])
                raise common.CmdError(err_msg)
            rsp_file = ret['msg']
            with open(rsp_file, 'r') as f:
                lines = f.readlines()
                if len(lines) < 9:
                    raise common.CmdError('response file output invalid')
                
                for line in lines[9:]:
                    values = line.strip().split()
                    if int(values[1]) != extent_no:
                        continue
                    common.logger.info('file: {} found extent: {}, line: {}'.format(inode['uuid'], extent_no, line))
                    
                    size  = lines[0].strip().split('=')[1]
                    extent_repair['te_no'] = extent_no
                    extent_repair['te_block'] = int(values[0])
                    extent_repair['te_size'] = int(size)
                    extent_repair['te_inode_no'] = int(inode['i_no'])
                    extent_repair['te_sampling_low'] = int(inode['uuid'][0:8], 16)
                    extent_repair['te_use'] = 1
                    extent_repair['te_clean'] = 0 #默认标记为dirty
                    extent_repair['te_hot'] = int(values[3])
                    extent_repair['te_reserved1'] = extent_no
                    extent_repair['te_bitmap'] = [int(v, 16) for v in values[4:]]
                    return extent_repair
    return None

"""v2版本-c file输出格式
{
  "ver": 131072,
  "size": 4096,
  "hit": 0,
  "miss": 0,
  "file info": [
    {
      "offset": 0,
      "len": 3072,
      "clean": 1,
      "mapno": 0,
      "begin": 0,
      "cnt": 8,
      "blockno": 3334,
      "ssd_offset": 11209277440
    },
    {
      "end line": 1
    }
  ]
}

"""
def generate_extent_v2(inodes_info, extent_no):
    extent_repair = {}
    for brick in inodes_info['brick']:
        for inode in brick['inodes']:
            arg = 'brick_id={},gfid={}'.format(brick['bi_brickid'], inode['uuid'])
            ret = tier_do_command('file', arg)
            if ret['ret'] != 0:
                err_msg = 'arg: {} failed: {}'.format(arg, ret['msg'])
                raise common.CmdError(err_msg)
            rsp_file = ret['msg']
            dump_objs = json.load(file(rsp_file))
            os.remove(rsp_file)
            ext_map = []
            for map_meta in dump_objs.get('file info'):
                if map_meta.get('end line') or map_meta.get('blockno') != extent_no:
                    continue
                ext_map.append(map_meta)
            if ext_map:
                common.logger.info('file: {} found extent: {}, ext_map: {}'.format(inode['uuid'], extent_no, json.dumps(ext_map, indent=4)))
                extent_repair['te_inode_no'] = inode['i_no']
                extent_repair['te_sampling_low'] = int(inode['uuid'][0:8], 16)
                extent_repair['te_sampling_mid'] = 0
                extent_repair['te_sampling_high'] = 0
                extent_repair['te_use'] = 1
                extent_repair['te_no'] = extent_no
                extent_repair['te_mapped_meta'] = ext_map
                return extent_repair
    return None

# 根据extent里面的shard，从shard区获取index
def tier_get_index_from_shard(ssd_uuid, shard):
    shards_info = get_tier_shard_offline(ssd_uuid, shard/512)
    for shard_info in shards_info['shards']:
        if shard_info['sno'] == shard:
                return shard_info['index']
    
    # 找不到需要异常退出
    err_msg = 'failed to find shard: {}'.format(shard)
    raise common.CmdError(err_msg)

"""v3版本-c file输出格式
{
    "block_err-eno": -1,
    "kvm_miss_req": 0,
    "ver": 196608,
    "dirty_block_cnt": 0,
    "clean_block_cnt": 1,
    "fatal_err": 0,
    "shard_num": 2,
    "kvm_hit": 0,
    "kvm_miss": 0,
    "demote_err": 0,
    "state": 1,
    "ino": 0,
    "file info": [
        {
            "sense": 0,
            "in_use": 1,
            "dm_err": 0,
            "shard": 1,
            "eno": 0,
            "hot": 254,
            "dirty": 0,
            "offset": 0,
            "demote": 0,
            "bitmap": "[ 1 0 0 0 ]",
            "phy-offset": 466616320
        },
        {
            "end line": 1
        }
    ],
    "hit": 0,
    "kvm_hit_req": 0,
    "miss": 0,
    "block_err-time": 0,
    "size": 1081081856
}
"""
def generate_extent_v3(ssd_uuid, inodes_info, extent_no):
    extent_repair = {}
    eno_start = extent_no * 512
    eno_end = eno_start + 512
    extents_no = []
    index_offset = 4*1024*1024*1024
    block_size = s_tier_superblock['block_size']
    for eno in range(eno_start, eno_end):
        extents_no.append(eno)
    extent_entries = []
    for brick in inodes_info['brick']:
        for inode in brick['inodes']:
            arg = 'brick_id={},gfid={}'.format(brick['bi_brickid'], inode['uuid'])
            ret = tier_do_command('file', arg)
            if ret['ret'] != 0:
                err_msg = 'arg: {} failed: {}'.format(arg, ret['msg'])
                raise common.CmdError(err_msg)
            rsp_file = ret['msg']
            dump_objs = json.load(file(rsp_file))
            os.remove(rsp_file)
            inode_extent_entries = []
            for extent_dump in dump_objs.get('file info'):
                if extent_dump.get('end line') or extent_dump.get('eno') not in extents_no:
                    continue

                # common.logger.info('file: {} found extent: {}, extent_dump: {}'.format(inode['uuid'], extent_no, json.dumps(extent_dump, indent=4)))

                eno = extent_dump['eno']
                shard = extent_dump['shard']
                offset = extent_dump['offset']
                if offset < index_offset:
                    index = 0
                else:
                    index = tier_get_index_from_shard(ssd_uuid, shard)
                lba = (offset - index * index_offset) / block_size
                extent_entry = {}
                extent_entry['lba'] = lba
                extent_entry['eno'] = eno
                extent_entry['shard'] = shard
                extent_entry['dirty'] = extent_dump['dirty']
                extent_entry['in_use'] = extent_dump['in_use']
                extent_entry['sense'] = extent_dump['sense']
                extent_entry['demote'] = extent_dump['demote']
                extent_entry['dm_err'] = extent_dump['dm_err']
                
                bitmap_str = extent_dump['bitmap'].strip()
                # 去掉方括号并分割字符串，转换为整数列表
                bit_values = list(map(int, bitmap_str[1:-1].split()))
                # 将比特位转换为整数
                bitmap = sum(bit << i for i, bit in enumerate(bit_values))
                extent_entry['bitmap'] = bitmap
                inode_extent_entries.append(extent_entry)
            if inode_extent_entries:
                common.logger.info('file: {} found extent: {}, entrier_size: {}, inode_extent_entries: {}'.format(
                    inode['uuid'], extent_no, len(inode_extent_entries), json.dumps(inode_extent_entries, indent=4)))
                extent_entries += inode_extent_entries
    
    # 可能存在多个inode的extent存放在一起
    if extent_entries:
        extent_repair['extents'] = extent_entries
        extent_repair['meta_offset'] = s_tier_superblock['extent_table_offset'] + extent_no * TIER_META_UNIT
    return extent_repair

def tier_extent_repair(ssd_uuid, extent_no):
    # 校验是否存在坏道
    if common.s_check_badblock:
        try:
            extent_info = get_tier_extent_offline(ssd_uuid, extent_no)
            common.logger.info('[check badblocks], extent_info: {}'.format(json.dumps(extent_info, indent=4)))
            return 0
        except common.CmdError as e:
            common.logger.warn('[check badblocks], except: {}'.format(str(e)))

    extent_repair = {}
    if not s_tier_online:
        # 离线修复，设置extent未使用
        if s_tier_version == TIER_V3:
            extent_repair = {"meta_offset": s_tier_superblock['extent_table_offset'] + extent_no * TIER_META_UNIT, 'extents': []}
        else:
            extent_repair = {"te_no":extent_no, "te_use":0}
    else:
        # 在线修复
        inodes_info = get_tier_inode_online(ssd_uuid)
        if not inodes_info:
            common.logger.error('failed to find inodes_info online')
            return -1
        assert inodes_info['version'] in [TIER_V1, TIER_V2, TIER_V3]
        if inodes_info['version'] == TIER_V1:
            extent_repair = generate_extent_v1(inodes_info, extent_no)
        elif inodes_info['version'] == TIER_V2:
            extent_repair = generate_extent_v2(inodes_info, extent_no)
        elif inodes_info['version'] == TIER_V3:
            extent_repair = generate_extent_v3(ssd_uuid, inodes_info, extent_no)
        if not extent_repair:
            # 内存中无法找到extent，直接设置未使用
            common.logger.warn('failed to find extent_no: {}, try to unlink it'.format(extent_no))
            if s_tier_version == TIER_V3:
                extent_repair = {"meta_offset": s_tier_superblock['extent_table_offset'] + extent_no * TIER_META_UNIT, 'extents': []}
            else:
                extent_repair = {"te_no":extent_no, "te_use":0}
    
    # 执行修复
    assert extent_repair
    repair_conf = '{}/{}_repair_extent.json'.format(REPAIR_TOOL_WORK_DIR, common.VSFIRE_MAGIC)
    json_dump(repair_conf, extent_repair)
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cmdline = '{} -p {} -c repair_extent -f {} -y'.format(TIER_REPAIR_TOOL, tier_dev, repair_conf)
    common.logger.info('try to cmdline: {}, repair_conf: {}'.format(cmdline, json.dumps(extent_repair, indent=4)))
    
    # 提示是否修复
    # 如果离线场景，有损修复，需要特殊提示
    if not s_tier_online:
        readline = '当时分层离线，继续修复将损坏数据，是否继续, 输入\'y\'继续, \'n\'退出'
        common.check_terminal_input(readline)
        
    readline = '是否执行分层: {} [{}], extent区间坏道修复, 修复文件: {}, 修复数据: \n{}, 输入\'y\'继续，\'n\'退出'.format(
        ssd_uuid, '在线' if s_tier_online else '离线(注意会丢失数据)', repair_conf, json.dumps(extent_repair, indent=4))
    common.check_terminal_input(readline)
    
    common.cli(cmdline, False)
    os.remove(repair_conf)

    # 再次校验是否存在坏道
    try:
        extent_info = get_tier_extent_offline(ssd_uuid, extent_no)
        common.logger.info('[check badblocks], extent_info: {}'.format(json.dumps(extent_info, indent=4)))
        return 0
    except common.CmdError as e:
        common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    return -1

def tier_shard_repair(ssd_uuid, shard_no):
    assert s_tier_version == TIER_V3
    # 校验是否存在坏道
    if common.s_check_badblock:
        try:
            shard_info = get_tier_shard_offline(ssd_uuid, shard_no)
            common.logger.info('[check badblocks], shard_info: {}'.format(json.dumps(shard_info, indent=4)))
            return 0
        except common.CmdError as e:
            common.logger.warn('[check badblocks], except: {}'.format(str(e)))

    shard_repair = get_tier_shard_from_oplog(ssd_uuid, shard_no)
    if not shard_repair:
        shard_repair = check_and_build_repair_object('shard', shard_no)
        if not shard_repair:
            common.logger.error('failed to build shard_repair')
            return -1
    
    repair_conf = '{}/{}_repair_shard.json'.format(REPAIR_TOOL_WORK_DIR, common.VSFIRE_MAGIC)
    json_dump(repair_conf, shard_repair)
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cmdline = '{} -p {} -c repair_shard -f {} -y'.format(TIER_REPAIR_TOOL, tier_dev, repair_conf)
    common.logger.info('try to cmdline: {}, repair_conf: {}'.format(cmdline, json.dumps(shard_repair, indent=4)))

    # 提示是否修复
    readline = '是否执行分层: {} [{}], shard区间坏道修复, 修复文件: {}, 修复数据: \n{}, 输入\'y\'继续，\'n\'退出'.format(
        ssd_uuid, '在线' if s_tier_online else '离线', repair_conf, json.dumps(shard_repair, indent=4))
    common.check_terminal_input(readline)
    
    common.cli(cmdline, False)
    os.remove(repair_conf)

    # 再次校验是否存在坏道
    try:
        shard_info = get_tier_shard_offline(ssd_uuid, shard_no)
        common.logger.info('[check badblocks], shard_info: {}'.format(json.dumps(shard_info, indent=4)))
        return 0
    except common.CmdError as e:
        common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    return -1

def tier_hot_repair(ssd_uuid, hot_no):
    assert s_tier_version == TIER_V3
    # 校验是否存在坏道
    if common.s_check_badblock:
        try:
            hot_info = get_tier_hot_offline(ssd_uuid, hot_no)
            common.logger.info('[check badblocks], hot_info: {}'.format(json.dumps(hot_info, indent=4)))
            return 0
        except common.CmdError as e:
            common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    
    # hot区间修复，直接置0
    hot_repair = {"meta_offset": s_tier_superblock['hot_table_offset'] + hot_no * TIER_META_UNIT}    
    repair_conf = '{}/{}_repair_hot.json'.format(REPAIR_TOOL_WORK_DIR, common.VSFIRE_MAGIC)
    json_dump(repair_conf, hot_repair)
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cmdline = '{} -p {} -c repair_hot -f {} -y'.format(TIER_REPAIR_TOOL, tier_dev, repair_conf)
    common.logger.info('try to cmdline: {}, repair_conf: {}'.format(cmdline, json.dumps(hot_repair, indent=4)))

    # 提示是否修复
    readline = '是否执行分层: {} [{}], hot区间坏道修复, 修复文件: {}, 修复数据: \n{}, 输入\'y\'继续，\'n\'退出'.format(
        ssd_uuid, '在线' if s_tier_online else '离线', repair_conf, json.dumps(hot_repair, indent=4))
    common.check_terminal_input(readline)
    
    common.cli(cmdline, False)
    os.remove(repair_conf)

    # 再次校验是否存在坏道
    try:
        hot_info = get_tier_hot_offline(ssd_uuid, hot_no)
        common.logger.info('[check badblocks], hot_info: {}'.format(json.dumps(hot_info, indent=4)))
        return 0
    except common.CmdError as e:
        common.logger.warn('[check badblocks], except: {}'.format(str(e)))
    return -1

def tier_metadata_repair_by_offset(ssd_uuid, badblock_offset):
    assert badblock_offset >= 0 and badblock_offset % TIER_META_UNIT == 0
    common.logger.info('ssd_uuid: {} try to repair offset: {}'.format(ssd_uuid, badblock_offset))

    # V3版本，在线场景下，通过flush_meta命令修复
    if tier_has_flush_meta(s_tier_online, common.get_vs_version(), s_tier_version):
        return tfs_metadata_repair_online(ssd_uuid)

    # 前4K是预留区间，不需要修复
    if 0 <= badblock_offset < TIER_SB_OFF:
        common.logger.error('badblock_offset: {} is reserved area, not need to repair'.format(badblock_offset))
        return 0
    
    # 超级块区间，前面已经修复过
    if TIER_SB_OFF <= badblock_offset < (TIER_SB_OFF + TIER_META_UNIT):
        common.logger.error('badblock_offset: {} is superblock, must repair firstly'.format(badblock_offset))
        return -1
    
    if s_tier_version == TIER_V3:
        brick_offset = s_tier_superblock['brick_table_offset']
        brick_end = brick_offset + s_tier_superblock['brick_table_size']
        inode_offset = s_tier_superblock['inode_table_offset']
        inode_end = inode_offset + s_tier_superblock['inode_table_size']
        shard_offset = s_tier_superblock['shard_table_offset']
        shard_end = shard_offset + s_tier_superblock['shard_table_size']
        extent_offset = s_tier_superblock['extent_table_offset']
        extent_end = extent_offset + s_tier_superblock['extent_table_size']
        hot_offset = s_tier_superblock['hot_table_offset']
        hot_end = hot_offset + s_tier_superblock['hot_table_size']
        data_offset = s_tier_superblock['data_offset']
        data_end = data_offset + s_tier_superblock['data_table_size']
    else:
        brick_offset = TIER_SB_OFF + s_tier_superblock['super block size']
        brick_end = brick_offset + s_tier_superblock['brick_table_size']
        inode_offset = brick_offset + s_tier_superblock['brick_table_size']
        inode_end  = inode_offset + (s_tier_superblock['inode_cnt'] * TIER_META_UNIT)
        extent_offset = inode_offset + s_tier_superblock['inode_table_size']
        extent_end = extent_offset + s_tier_superblock['extent_table_size']
        data_offset = s_tier_superblock['data_offset']
        data_end = data_offset + s_tier_superblock['block_size'] * s_tier_superblock['block_cnt']

    # brick区间，坏道修复
    if brick_offset <= badblock_offset < brick_end:
        brick_no = (badblock_offset - brick_offset) / TIER_META_UNIT
        assert brick_no >= 0 and brick_no < s_tier_superblock['brick_cnt']
        return tier_brick_repair(ssd_uuid, brick_no)
    
    # inode区间，坏道修复
    if inode_offset <= badblock_offset < inode_end:
        inode_no = (badblock_offset - inode_offset) / TIER_META_UNIT
        assert inode_no >= 0 and inode_no < s_tier_superblock['inode_cnt']
        return tier_inode_repair(ssd_uuid, inode_no)
    
    # extent区间，坏道修复
    if extent_offset <= badblock_offset < extent_end:
        extent_no = (badblock_offset - extent_offset) / TIER_META_UNIT
        if s_tier_version == TIER_V3:
            assert extent_no >= 0 and extent_no < s_tier_superblock['extent_table_cnt']
        else:
            assert extent_no >= 0 and extent_no < s_tier_superblock['block_cnt']
        return tier_extent_repair(ssd_uuid, extent_no)
    
    # data区间，坏道修复 (不支持，需要使用数据模式修复)
    if data_offset <= badblock_offset < data_end:
        common.logger.error('badblock_offset: {} is data, cannot repair'.format(badblock_offset))
        return -1
    
    if s_tier_version == TIER_V3:
        # inode区间，坏道修复
        if shard_offset <= badblock_offset < shard_end:
            shard_no = (badblock_offset - shard_offset) / TIER_META_UNIT
            assert shard_no >= 0 and shard_no < s_tier_superblock['shard_table_cnt']
            return tier_shard_repair(ssd_uuid, shard_no)
        
        # inode区间，坏道修复
        if hot_offset <= badblock_offset < hot_end:
            hot_no = (badblock_offset - hot_offset) / TIER_META_UNIT
            assert hot_no >= 0
            return tier_hot_repair(ssd_uuid, hot_no)

    # V2版本，可能存在扩展区间
    if s_tier_version == TIER_V2 and s_tier_superblock.get('exp_inode_magic') == TIER_INODE_EXP_MAGIC:
        exp_inode_offset = data_end + TIER_INODE_EXP_RESERVE
        exp_inode_end = exp_inode_offset + s_tier_superblock['sb_inode_exp_area'] * TIER_INODE_EXP_BLOCK
        if exp_inode_offset <= badblock_offset < exp_inode_end:
            inode_no = (badblock_offset - exp_inode_offset) / TIER_META_UNIT + s_tier_superblock['inode_cnt']
            assert inode_no > s_tier_superblock['inode_cnt']
            return tier_inode_repair(ssd_uuid, inode_no)
        
    # 剩下的区间，是预留区间
    assert badblock_offset < data_offset
    return tier_reserved_repair(ssd_uuid, badblock_offset)
    
def tfs_metadata_repair_online(ssd_uuid):
    # # 提示是否修复
    # readline = '扫描发现坏道, 当前分层: {} [{}], 是否执行坏道修复, 输入\'y\'继续，\'n\'退出'.format(ssd_uuid, 
    #     '在线' if s_tier_online else '离线')
    # common.check_terminal_input(readline)
    
    cmdline = '/sf/vs/bin/vs_tier_cli.py -c flush_meta -a ssd_uuid={}'.format(ssd_uuid)
    common.logger.info('try to cmdline: {}'.format(cmdline))
    common.cli(cmdline, False)
    return 0

def scan_tier_metadata_and_repair(ssd_uuid):   
    # 扫描超级块后面位置是否存在坏道
    error_offsets = []
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    cur_off = TIER_SB_OFF + TIER_META_UNIT
    while cur_off < (TIER_SB_OFF + TIER_SB_RSIZE):
        try:
            cmdline = "/bin/dd if={} of=/dev/null bs={} count=1 iflag=direct skip={} 2>/dev/null".format(tier_dev, TIER_META_UNIT, cur_off/TIER_META_UNIT)
            common.cli(cmdline, False)
        except common.CmdError as e:
            common.logger.warn('got except: {}'.format(str(e)))
            error_offsets.append(cur_off)
        finally:
            cur_off += TIER_META_UNIT
    
    if s_tier_version == TIER_V3:
        metas = ['brick', 'shard', 'extent', 'inode', 'hot']
    else:
        metas = ['brick', 'extent', 'inode']
    for meta in metas:
        cmdline = "{} -p {} -c dump_{} -g 'error=1'".format(TIER_REPAIR_TOOL, tier_dev, meta)
        result = common.cli(cmdline, False)
        errs_dump = json.loads(result.decode('utf-8').strip())
        if isinstance(errs_dump, dict):
            for key, data in errs_dump.items():
                error_offsets += [e['meta_offset'] for e in data]
        else:
            error_offsets += [e['meta_offset'] for e in errs_dump]
    if not error_offsets:
        common.logger.info('ssd_uuid: {} has no error_offsets'.format(ssd_uuid))
        return 0
    
    # 提示是否修复
    readline = '扫描发现坏道偏移列表: {}, \n当前分层: {} [{}], 是否执行坏道修复, 输入\'y\'继续，\'n\'退出'.format(error_offsets, ssd_uuid, 
        '在线' if s_tier_online else '离线')
    common.check_terminal_input(readline)

    # V3版本，在线场景下，通过flush_meta命令修复
    if tier_has_flush_meta(s_tier_online, common.get_vs_version(), s_tier_version):
        return tfs_metadata_repair_online(ssd_uuid)

    # 执行坏道修复
    result = 0
    for error_offset in error_offsets:
        error_offset -= (error_offset % TIER_META_UNIT)
        if tier_metadata_repair_by_offset(ssd_uuid, error_offset):
            common.logger.error('ssd_uuid: {} failed to repair error_offset: {}'.format(ssd_uuid, error_offset))
            result = -1
    return result

def get_ssd_superblock(ssd_uuid):
    # 获取超级块
    tier_superblock = None
    try:
        tier_superblock = get_tier_superblock_offline(ssd_uuid)
    except common.CmdError as e:
        common.logger.warn('got except: {}'.format(str(e)))
        # 离线获取不到超级块，说明超级块有坏道，先修复超级块
        readline = '当前分层: {} 超级块存在坏道, 是否执行坏道修复, 输入\'y\'继续，\'n\'退出'.format(ssd_uuid)
        common.check_terminal_input(readline)
        if tier_superblock_repair(ssd_uuid):
            common.logger.error('ssd_uuid: {} failed to repair superblock'.format(ssd_uuid))
            return -1
        # 修复完超级块后，再读取一次
        tier_superblock = get_tier_superblock_offline(ssd_uuid)
    
    # 获取超级块
    global s_tier_superblock
    s_tier_superblock = tier_superblock
    if not s_tier_superblock:
        common.logger.error('ssd_uuid: {} failed to get superblock'.format(ssd_uuid))
        return -1
    
    # 获取分层是否在线 (通过rpc是否获取超级块)
    global s_tier_online
    s_tier_online = True if not s_tier_force_offline and get_tier_superblock_online(ssd_uuid) else False
    
    # 获取超级块版本号
    global s_tier_version
    s_tier_version = s_tier_superblock['version']
    if s_tier_version not in [TIER_V1, TIER_V2, TIER_V3]:
        common.logger.error('failed to supported, s_tier_version: {} invalid'.format(s_tier_version))
        return -1
    
    common.logger.info('s_tier_online: {}, s_tier_version: {}, s_tier_superblock: {}'.format(s_tier_online, s_tier_version,
        json.dumps(s_tier_superblock, indent=4)))
    return 0

def tier_metadata_repair(ssd_uuid):
    version = common.get_vs_version()
    if not common.is_vs_version_valid(version):
        common.logger.error('failed to supported, version: {} invalid'.format(version))
        return -1
    
    if not re.match(r"^\w{6}-\w{4}-\w{4}-\w{4}-\w{4}-\w{4}-\w{6}$", ssd_uuid):
        common.logger.error('failed to supported, ssd_uuid: {} invalid'.format(ssd_uuid))
        return -1
    tier_dev = '/dev/{}/{}-tcache'.format(ssd_uuid, ssd_uuid)
    if not os.path.exists(tier_dev):
        common.logger.error('failed to supported, tier_dev: {} not exist'.format(tier_dev))
        return -1
    
    # 获取超级块与分层版本号
    if get_ssd_superblock(ssd_uuid):
        return -1

    badblock_offset = 0
    while True:
        readline = '开始修复分层: {} 上的元数据坏道，请输入坏道的偏移(单位为字节)，输入\'y\'表示自动扫描所有元数据区间(扫描坏道可能影响业务)'.format(ssd_uuid)
        print common.Colored().fuchsia('{}'.format(readline))
        step = sys.stdin.readline().strip('\n')
        if step.isdigit():
            badblock_offset = int(step)
            break
        elif step.lower() == 'y':
            badblock_offset = -1
            break
        else:
            print common.Colored().red('输入字符错误，请重新输入')
    
    if badblock_offset < 0:
        # 自动扫描所有的元数据区间坏道，并执行修复
        return scan_tier_metadata_and_repair(ssd_uuid)
    else:
        # 指定坏道偏移，执行修复
        badblock_offset -= (badblock_offset % TIER_META_UNIT)
        return tier_metadata_repair_by_offset(ssd_uuid, badblock_offset)
    return 0

def _tier_metadata_repair(ssd_uuid):
    lock_file = common.get_vsfire_lock_file()
    with common.VsfireFlock(lock_file) as lock:
        ret = tier_metadata_repair(ssd_uuid)
        if ret:
            print common.Colored().red('执行失败')
        else:
            print common.Colored().cyan('执行成功')
        return ret