#!/usr/bin/env /sf/vs/bin/python
#-*- coding: utf-8 -*-
import os
import re
import sys
import uuid
import time
import json
import logging
import socket
import struct
import argparse
import subprocess32 as subprocess
from libcommon import config


TIER_SOCK_PATH    = '/run/vs/tier_cmd_sock'
TIER_RECV_SIZE_4K = 4096

##分层相关的固定参数##

TIER_V1         ="0x10000"
TIER_V2         ="0x20000"
TIER_V3         ="0x30000"
TIER_SB_OFF     = 4096
#超级块实际不会超过4K，但v1、v2加载超级块都是读128K，故128K区域都认为是超级块
TIER_SB_RSIZE   = 131072
TIER_META_UNIT  = 4096

TIER_BAK_DIR    = "/sf/cfg/vs/cache"
TIER_CONF       = "/sf/cfg/vs/cache/tier.json"
TIER_EXPECT_VER = ""
TIER_INODE_EXP_MAGIC = "0x4558"
TIER_INODE_EXP_BLOCK = 100 * 1024 * 1024
TIER_INODE_EXP_RESERVE = 1024 * 1024

##
VSD_SBIN        = "/sf/vs/sbin/"
VSD_BIN         = "/sf/vs/bin"
GFS_MNT         = ""
REPAIR_WORK_DIR = "/sf/data/local/vs_tier_repair"
REPAIR_LOG      = os.path.join(REPAIR_WORK_DIR, "vs_tier.log")
REPAIR_TOOL     = os.path.join(REPAIR_WORK_DIR, "vs_tier")
IO_TRANS_TOOL   = os.path.join(VSD_SBIN, "shard_io_trans")
REPAIR_VERS     = [TIER_V1, TIER_V2]

REPAIR_COMMIT   = ""
REPAIR_INODE_USE_MEM = False
REPAIR_OFFLINE  = False

log             = None

def resp2dict(response):
    """
    将响应解析成字典返回：{'ret': -1, ' msg': 'magic error:0xbab4'}
    """
    dictresp ={}
    resp = response.strip('{}').split(',', 1)
    #print resp

    for res in resp:
        line = res.split(':', 1)
        value = line[1].strip(' \"\"')
        if len(line) > 2:
            value += ':' + line[2].strip(' \"\"')
        dictresp[line[0].strip('\' ')] = value
    dictresp['ret'] = int(dictresp['ret'])

    return dictresp

def send_request(data):
    """
    发送命令请求
    """
    #socket文件
    server_address = TIER_SOCK_PATH

    #创建socdet
    sockfd = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)

    #connect
    try:
        sockfd.connect(server_address)
    except Exception as e:
        return {'ret': -1, 'msg': str(e)}

    #发送数据
    try:
        sockfd.sendall(data)
    except Exception as e:
        return {'ret': -1, 'msg': str(e)}

    #返回结果
    response, address = sockfd.recvfrom(TIER_RECV_SIZE_4K)

    #关闭socket
    sockfd.close()

    return resp2dict(response)


def do_command(cmd, arg):
    """
    按照固定格式封装命令
    """
    assert cmd == "dump" or cmd == "file"
    # vs_tier_cli.py命令都会加锁，这里发命令前也加一下锁，免得命令并发出问题
    lock = open('/var/lock/vs_tier_cli.lock', 'a')
    with config.Wlock(lock):
        cmd_header = 'tier'

        if arg:
            cmdarg = cmd_header + '.' + cmd + ':' + arg + ';'
        else:
            cmdarg = cmd_header + '.' + cmd + ':' + ';'

        data = struct.pack('<HH%ds' % len(cmdarg), 0xB4BA, len(cmdarg), cmdarg)

        ret = send_request(data)

        # 命令返回的数据是保存在一个文件中，每次命令都是用同1个文件，解锁之后别的命令可能修改，故重命名一下
        if ret['ret'] == 0:
            bak_f = "{}.repair".format(ret['msg'])
            os.rename(ret['msg'], bak_f)
            ret['msg'] = bak_f

        return ret

def json_dump(path, obj):
    """
    将json对象内容写入配置
    """
    bakfile = path + '.bak'
    with open(bakfile, 'w') as f:
        json.dump(obj, f, indent=None)

    os.rename(bakfile, path)


def json_load(path):
    """
    解析配置文件为json对象
    """
    return json.load(file(path))


def exec_cmd(cmd, out_json=False):
    """
    执行shell命令，并返回标准输出，标准错误，返回值
    """
    res = subprocess.check_output(cmd, shell=True).strip()
    log.debug("cmd:{}, res:{}".format(cmd, res))

    return res if not out_json else json.loads(res)


def prompt_before_action(p):
    assert p
    v = raw_input('{}.(yes/no?)'.format(p))
    if v != "yes":
        sys.exit(1)

def tier_dump_ssd_meta(dev, mtype):
    """dump meta info from tierd
    """
    ssd_uuid = os.path.basename(os.path.dirname(dev))

    if REPAIR_OFFLINE:
        log.info("<===you specify 【OFFLINE】 mode, won't dump meta info===>")
        return None

    argument = ""
    if mtype != "superblock":
        argument = "-a {}".format(mtype)

    ret = do_command('dump', argument)

    log.debug("dump ssd meta. dev:{}, ssd_uuid:{}, type:{}, ret:{}".format(dev, ssd_uuid, mtype, ret))

    if ret['ret'] != 0:
        log.error("command dump faild:{}, arg:{}".format(ret['msg'], argument))
        return None

    dump_obj = json_load(ret['msg'])
    os.remove(ret['msg'])

    if dump_obj is None:
        log.error("command dump success, but msg content is invalid. msg:{}".format(ret['msg']))
        return None

    for s in dump_obj['ssd']:
        if s['ssd_uuid'] == ssd_uuid:
            return s

    return None

""" v1版本-c file输出格式
size = 4
hit = 0
miss = 0
uniliztion ratio = 0.39%(4096/1048576)
block_cnt = 1
dirty_block_cnt = 1
clean_block_cnt = 0

file_no  block_no data     hot                meta
0        0        4096     0                  1 0 0 0 
"""
def generate_extent_v1(dump_inode, ext_no):
    ext_obj = {}

    log.info("<===use vs_tier_cli.py -c file to dump all file info, please wait...===>")

    for brick in dump_inode['brick']:
        for inode in brick['inodes']:
            #log.debug("brick:{},gfid:{}".format(brick['bi_brickid'], inode['uuid']))
            arg = "brick_id={},gfid={}".format(brick['bi_brickid'], inode['uuid'])
            ret = do_command('file', arg)
            if ret['ret'] != 0:
                raise Exception("command file failed:{}, arg:{}".format(ret['msg'], arg))

            #log.debug("ret:{}".format(ret))
            with open(ret['msg'], 'r') as f:
                lines = f.readlines()
                if len(lines) < 9:
                    raise Exception("command file output is invalid")

                for line in lines[9:]:
                    values = line.strip().split()
                    if int(values[1]) != ext_no:
                        continue

                    log.info("<===extent {} found in file {}===>\n{}".format(ext_no, inode['uuid'], line))

                    size  = lines[0].strip().split('=')[1]
                    log.debug("ext_no:{}, values:{}".format(ext_no, values))
                    ext_obj['te_no'] = ext_no
                    ext_obj['te_block'] = int(values[0])
                    ext_obj['te_size'] = int(size)
                    ext_obj['te_inode_no'] = int(inode['i_no'])
                    ext_obj['te_sampling_low'] = int(inode['uuid'][0:8], 16)
                    ext_obj['te_use'] = 1
                    ext_obj['te_clean'] = 0 #默认标记为dirty
                    ext_obj['te_hot'] = int(values[3])
                    ext_obj['te_reserved1'] = ext_no
                    ext_obj['te_bitmap'] = [int(v, 16) for v in values[4:]]

                    return ext_obj

    return None

"""v2版本-c file输出格式
{
  "ver": 131072,
  "size": 4096,
  "hit": 0,
  "miss": 0,
  "file info": [
    {
      "offset": 0,
      "len": 3072,
      "clean": 1,
      "mapno": 0,
      "begin": 0,
      "cnt": 8,
      "blockno": 3334,
      "ssd_offset": 11209277440
    },
    {
      "end line": 1
    }
  ]
}

"""
def generate_extent_v2(dump_inode, ext_no):
    ext_obj = {}
    re_str = "\"blockno\":{},".format(ext_no)

    log.info("<===use vs_tier_cli.py -c file to dump all file info, please wait...===>")

    for brick in dump_inode['brick']:
        for inode in brick['inodes']:
            #log.debug("brick:{},gfid:{}".format(brick['bi_brickid'], inode['uuid']))
            arg = "brick_id={},gfid={}".format(brick['bi_brickid'], inode['uuid'])
            ret = do_command('file', arg)
            if ret['ret'] != 0:
                raise Exception("command file failed:{}, arg:{}".format(ret['msg'], arg))

            #log.debug("ret:{}".format(ret))
            #file_info = json_load(ret['msg'])
            content = ""
            try:
                with open(ret['msg'], 'r') as fp:
                    content = fp.read()
                    assert content
                    if not re.search(re_str, content):
                        continue
            finally:
                os.remove(ret['msg'])

            file_info = json.loads(content)
            ext_map = []
            for map_meta in file_info['file info']:
                if "end line" in map_meta or map_meta['blockno'] != ext_no:
                    continue

                ext_map.append(map_meta)

            if ext_map:
                #log.debug("inode:{} exent:{}".format(inode, json.dumps(file_info, indent=4)))
                log.info("<===extent {} found in file {}, map:===>\n{}".format(ext_no, inode['uuid'], json.dumps(ext_map, indent=4)))
                ext_obj['te_inode_no'] = inode['i_no']
                ext_obj['te_sampling_low'] = int(inode['uuid'][0:8], 16)
                ext_obj['te_sampling_mid'] = 0
                ext_obj['te_sampling_high'] = 0
                ext_obj['te_use'] = 1
                ext_obj['te_no'] = ext_no
                ext_obj['te_mapped_meta'] = ext_map
                return ext_obj

    return None

def do_repair_extent(dev, ext_no, ext_obj):
    repair_conf = "{}/repair_extent.json".format(REPAIR_WORK_DIR)
    json_dump(repair_conf, ext_obj)
    cmd = "{} -p {} -c repair_extent -f {} {}".format(REPAIR_TOOL, dev, repair_conf, REPAIR_COMMIT)

    new_extent = exec_cmd(cmd, out_json=True)
    if not REPAIR_COMMIT:
        log.info("\n{}".format(json.dumps(new_extent, indent=4)))
        log.info("<===new exent is above, please CHECK it and then run cmd manually:===>\n{} -y".format(cmd))
        return False

    # 校验extent是否ok
    cmd = "{} -p {} -c dump_extent -n {}".format(REPAIR_TOOL, dev, ext_no)
    try:
        extent = exec_cmd(cmd, out_json=True)
        log.info("<===extent {} repaired success===>\n{}".format(ext_no, json.dumps(extent, indent=4)))
        return True
    except Exception as ex:
        log.error("<===extent {} read failed after repair:{}===>".format(ext_no, ex))
        raise

def repair_extent(dev, ext_no):
    unuse_extent = {"te_no":ext_no, "te_use":0}
    unuse_check  = False

    # 校验extent是否ok
    cmd = "{} -p {} -c dump_extent -n {}".format(REPAIR_TOOL, dev, ext_no)
    try:
        extent = exec_cmd(cmd, out_json=True)
        log.info("<===extent {} is ok, no need repair===>\n{}".format(ext_no, json.dumps(extent, indent=4)))
        return
    except Exception as ex:
        log.debug("extent {} read failed:{}".format(ext_no, ex))
        pass

    dump_info = tier_dump_ssd_meta(dev, "inode")
    if dump_info is None:
        log.error("<===dump info from tierd failed, repair extent 【OFFLINE】===>")
        ext_obj = unuse_extent
    else:
        assert dump_info['version'] in REPAIR_VERS
        if dump_info['version'] == TIER_V1:
            ext_obj = generate_extent_v1(dump_info, ext_no)
        else:
            ext_obj = generate_extent_v2(dump_info, ext_no)

        #extent生成失败直接抛异常，返回None表示未找到，将extent置为未使用
        if ext_obj is None:
            log.info("<===extent {} not found in dump info, treat it as unused===>".format(ext_no))
            unuse_check = True
            ext_obj = unuse_extent

    # 没有真正执行命令，直接返回
    if not do_repair_extent(dev, ext_no, ext_obj):
        return

    if unuse_check:
        dump_info = tier_dump_ssd_meta(dev, "inode")
        assert dump_info
        if dump_info['version'] == TIER_V1:
            ext_obj = generate_extent_v1(dump_info, ext_no)
        else:
            ext_obj = generate_extent_v2(dump_info, ext_no)

        if ext_obj:
            log.error("<===extent {} has set to unuse, but it alloc to inode {}, repair again===>".format(ext_no, ext_obj['te_inode_no']))
            do_repair_extent(dev, ext_no, ext_obj)

def find_inode_from_lvs(brickid, part_uuid):
    try:
        lv_name = brickid.split("/")[-2]
        cmd = "lvs {} | grep {} | awk -F '_|\\.' '{{print $2}}' | uniq".format(lv_name, part_uuid)
        res = exec_cmd(cmd)
        if not res:
            return ""
        if len(res) != 32:
            raise Exception("res:{} len not 32".format(res))
        return "{}-{}-{}-{}-{}".format(res[:8], res[8:12], res[12:16], res[16:20], res[20:])
    except subprocess.CalledProcessError:
        return ""
    except Exception as ex:
        raise


def find_inode_from_efs(brickid, part_uuid):
    cmd = "ps auxf |grep {} |grep -vE 'super|grep' | awk '{{print $2}}'".format(brickid)
    pid = exec_cmd(cmd)
    if not pid:
        raise Exception("get brick {} pid failed".format(brickid))

    cmd = "efs_dbg -p {} -c 'itable list' 2>/dev/null | grep {} | awk '{{print $1}}'".format(pid, part_uuid)
    res = exec_cmd(cmd)
    assert len(res) == 0 or len(res) == 36

    return res

def find_inode_from_devs(brickid, part_uuid):
    uuid_str = find_inode_from_lvs(brickid, part_uuid)
    if uuid_str:
        return uuid_str
    return find_inode_from_efs(brickid, part_uuid)


def get_brickno_by_brickid(dev, brickid):
    cmd = "{} -p {} -c dump_brick -g 'id={}'".format(REPAIR_TOOL, dev, brickid)
    res = exec_cmd(cmd, True)
    if not res:
        raise Exception("cannot find brickno for brick {}".format(brickid))

    for b in res:
        if 'error' not in b and b['bi_brickid'] == brickid:
            return b['bi_no']
    else:
        raise Exception("cannot find brick {}, maybe it also corrupt".format(brickid))


def lookup_inode_from_bricks(gfid_prefix, dev, inode_no):
    wcache = json_load("/sf/cfg/vs/cache/wcache.json")
    brickid = ""
    gfid = ""
    gfids = {}
    #通过遍历extent得到的部分inode的gfid，搜索分层对应的brick，尝试找到完整的gfid
    for item in wcache["maps"]:
        for brick in item["bricks"]:
            brickid = brick["brickId"]
            gfid = find_inode_from_devs(brickid, gfid_prefix)
            if not gfid:
                continue

            assert brickid not in gfids
            gfids[brickid] = gfid
            log.info("find uuid:{} in brick:{}".format(gfid, brickid))

    #如果找到多个或者没有找到gfid，需要人工处理。大概率不会会出现这种情况，除非brick异常。
    if len(gfids) != 1:
        raise Exception("find file for inode {} failed, gfid_prefix:{}, found_gfids:{}".format(inode_no, gfid_prefix, gfids))

    brickid, gfid = gfids.items()[0]

    brickno = get_brickno_by_brickid(dev, brickid)
    assert brickno >= 0

    #找到gfid后去构造用于repair inode需要的配置文件
    conf={}
    conf["i_uuid"] = gfid
    conf["i_no"] = inode_no
    conf["i_brick_no"] = brickno
    conf["i_use"] = 1
    conf["i_atime"] = 0
    conf["i_ctime"] = 0
    conf["i_mtime"] = 0
    conf["i_dtime"] = 0
    conf["i_size"] = 0
    conf["i_priority"] = 2
    conf["i_unlink"] = 0

    return conf

def repair_inode_from_extent(dev, inode_no):
    cmd = "{} -p {} -c dump_extent -g 'owner={}'".format(REPAIR_TOOL, dev, inode_no)
    inode_exts = exec_cmd(cmd, out_json=True)
    gfid_prefix = ""

    valid_exts = [e for e in inode_exts if 'error' not in e]

    if not inode_exts:
        log.info("<===inode {} cannot find extent, treat it as unused===>".format(inode_no))
        return {"i_no": inode_no, "i_use": 0}

    #如果返回的全部是读不出来的extent，无法确认inode信息，这种情况应该先修extent
    if not valid_exts:
        log.error("error extents:{}".format(inode_exts))
        raise Exception("{} extent unreadable, you should repair extent first".format(len(inode_exts)))

    gfid_prefix = valid_exts[0]['te_sampling_low']
    for e in valid_exts[1:]:
        assert e['te_sampling_low'] == gfid_prefix

    log.info("<===find inode {} gfid_prefix is {}, will lookup complete gfid from bricks===>".format(inode_no, gfid_prefix))
    return lookup_inode_from_bricks(gfid_prefix, dev, inode_no)

def shard_to_file_offset(bgfid, idx):
    from shard import ShardService

    shard_service = ShardService.ShardService()

    res = shard_service.get_attr_by_gfid(bgfid)
    if res.op_ret < 0:
        raise Exception("get file:{} shard base attr failed, error:{}".format(bgfid, res.op_errno))

    attr = res.attr
    cmd = "{} --block_size {} --preblk_size {} " \
          "--lst_preblk_size {} --preblk_cnt {} --stripe_size {}" \
          " --stripe_width {} -r --shard_idx {} --shard_offset {}".format(
              IO_TRANS_TOOL,
              attr.shard_size, attr.preblk_size * attr.shard_size,
              attr.lst_preblk_size, attr.preblk_cnt,
              attr.stripe_size, attr.stripe_width,
              idx, 0)

    res = exec_cmd(cmd)

    res = eval(res.replace("\n", ""))

    return res['file_offset']


def trigger_inode_update(dev, inode):
    gfid = inode['uuid']
    offset = None

    # gfid转路径：3.0之后版本通过vs_rpc_api转，3.0之前版本通过vs_lv2file.sh转
    if not os.path.exists(os.path.join(VSD_BIN, "vs_rpc_tool")):
        cmd = "{}/vs_lv2file.sh {} |grep vs_vol_rep".format(VSD_BIN, gfid[0:8])
        rpath = exec_cmd(cmd)
    else:
        from vs_rpc.vs_rpc_api import VsRpcApi

        vs_rpc_api = VsRpcApi()
        res = vs_rpc_api.vs_get_fileinfo([gfid])
        rpath = res['files'][0]['path']
        if rpath.startswith("/.vs/shard"):
            bgfid = os.path.basename(os.path.dirname(rpath))
            idx   = int(rpath.rsplit('.', 2)[1])
            offset = shard_to_file_offset(bgfid, idx)

            res = vs_rpc_api.vs_get_fileinfo([bgfid])
            rpath = res['files'][0]['path']

        # 内部文件（比如trash或者snapshot）处理不了：
        # 1）trash、delete文件想办法清理掉
        # 2）snapshot文件，可以打开snapshot调试，然后挂载点读一下快照；（非常规操作，有风险）
        if not rpath or rpath.startswith("/.vs") or rpath.startswith("/vs/snapshot"):
            raise Exception("file {} is a vs internal file, cannot handle it. gfid:{}".format(rpath, inode['uuid']))

        rpath = os.path.join(GFS_MNT, rpath.lstrip('/'))

    log.debug("gfid:{}, real path:{}".format(gfid, rpath))
    if not os.path.exists(rpath):
        raise Exception("file {} not exists".format(rpath))

    cmd = "{}/getfattr -m . -d {} | grep user.glusterfs.tier_level | cut -d= -f2".format(VSD_BIN, rpath)
    prior = exec_cmd(cmd)
    if prior != "low":
        new_prior = "low"
    else:
        new_prior="normal"

    log.info("<===inode {} is belong file {}, will repair it by set tier_level xattr===>".format(inode['i_no'], rpath))

    cmd = "{}/setfattr -n user.glusterfs.tier_level -v {} {}".format(VSD_BIN, new_prior, rpath)
    exec_cmd(cmd)

    # 设置完扩展属性后，只有首分片会立刻更新，需要读一下分片，触发其他分片更新
    if offset:
        cmd = "hexdump -C {} -n 512 -s {} 1>/dev/null".format(rpath, offset)
        exec_cmd(cmd)

    prior = "normal" if not prior else prior
    cmd = "{}/setfattr -n user.glusterfs.tier_level -v {} {}".format(VSD_BIN, prior, rpath)
    exec_cmd(cmd)

    # 修复完成再校验一下
    cmd = "{} -p {} -c dump_inode -n {}".format(REPAIR_TOOL, dev, inode['i_no'])
    try:
        new_inode = exec_cmd(cmd, out_json=True)
        log.info("<===inode {} repaired success===>\n{}".format(inode['i_no'], json.dumps(new_inode, indent=4)))
    except Exception as ex:
        log.error("<===inode {} still cannot read after repair:{}===>".format(inode['i_no'], ex))
        raise

def get_file_size(brickid, gfid):
    cmd = "/sf/vs/bin/vs_tier_cli.py -c file -a brick_id={},gfid={}".format(brickid, gfid)

    res = exec_cmd(cmd)

    return int(res.split()[2])

def repair_inode(dev, inode_no):
    repair_conf  = "{}/repair_inode.json".format(REPAIR_WORK_DIR)
    unused_inode = {"i_no": inode_no, "i_use": 0}
    inode_obj    = {}
    unused_check = False

    # 校验inode是否ok
    cmd = "{} -p {} -c dump_inode -n {}".format(REPAIR_TOOL, dev, inode_no)
    try:
        inode_obj = exec_cmd(cmd, out_json=True)
        log.info("<===inode {} is ok, no need repair===>\n{}".format(inode_no, json.dumps(inode_obj, indent=4)))
        return
    except Exception as ex:
        log.debug("inode {} read failed:{}===>".format(inode_no, ex))
        pass

    dump_info = tier_dump_ssd_meta(dev, "inode")
    if dump_info is None:
        log.error("<===dump inode from tierd failed, will repair inode 【OFFLINE】===>")
        inode_obj = repair_inode_from_extent(dev, inode_no)
    else:
        log.info("<===dump inode from tierd success, will repair inode 【ONLINE】===>")
        found = False
        for brick in dump_info['brick']:
            for inode in brick['inodes']:
                if inode['i_no'] != inode_no:
                    continue

                # 默认如果找到inode，则通过设置分层优先级，触发inode更新来修复
                if not REPAIR_INODE_USE_MEM:
                    log.info("<===inode {} found in dump info:===>\n{}".format(inode_no, json.dumps(inode, indent=4)))
                    return trigger_inode_update(dev, inode)

                # 设置分层优先级，只能通过挂载点文件来，如果非挂载点文件，只能直接将内存中的信息写入设备
                # 【但这样存在的问题是，mtime会不准，如果有读缓存，可能存在数据问题，谨慎使用】
                inode_obj["i_uuid"]     = inode['uuid']
                inode_obj["i_no"]       = inode_no
                inode_obj["i_brick_no"] = brick['bi_no']
                inode_obj["i_use"]      = 1
                inode_obj["i_atime"]    = 1000000000 * int(time.mktime(time.strptime(inode['atime'], "%Y-%m-%d %H:%M:%S")))
                inode_obj["i_ctime"]    = 1000000000 * int(time.mktime(time.strptime(inode['ctime'], "%Y-%m-%d %H:%M:%S")))
                inode_obj["i_mtime"]    = 1000000000 * int(time.mktime(time.strptime(inode['mtime'], "%Y-%m-%d %H:%M:%S")))
                inode_obj["i_size"]     = get_file_size(brick['bi_brickid'], inode['uuid'])
                inode_obj["i_priority"] = inode['i_priority']
                inode_obj["i_unlink"]   = inode['unlink']
                found = True
                break

            if found:
                break
        else:
            log.info("<===inode {} not found in dump info, treat it as unsed===>".format(inode_no))
            unused_check = True
            inode_obj = unused_inode

    json_dump(repair_conf, inode_obj)
    cmd = "{} -p {} -c repair_inode -f {} {}".format(REPAIR_TOOL, dev, repair_conf, REPAIR_COMMIT)

    new_inode = exec_cmd(cmd, out_json=True)
    if not REPAIR_COMMIT:
        log.info("\n{}".format(json.dumps(new_inode, indent=4)))
        log.info("<===new inode is above, please CHECK it and then run cmd manually:===>\n{} -y".format(cmd))
        return

    cmd = "{} -p {} -c dump_inode -n {}".format(REPAIR_TOOL, dev, inode_no)
    try:
        new_inode = exec_cmd(cmd, out_json=True)
        log.info("<===inode {} repaired success===>\n{}".format(inode_no, json.dumps(new_inode, indent=4)))
    except Exception as ex:
        log.error("<===inode {} still cannot read after repair:{}===>".format(inode_no, ex))
        raise

    if unused_check:
        dump_info = tier_dump_ssd_meta(dev, "inode")
        assert dump_info
        inode = [i for b in dump_info['brick'] for i in b['inodes'] if i['i_no'] == inode_no]
        if inode:
            log.error("<===inode {} has set to unuse, but it alloc to file {}, repair again===>".format(inode_no, inode[0]['uuid']))
            trigger_inode_update(dev, inode[0])

def repair_brick_from_conf(dev, brick_no):
    cache_conf = json_load("/sf/cfg/vs/cache/wcache.json")
    unused_brick = {"bi_no": brick_no, "bi_use": 0}

    # 首先，遍历inode区查看是否有关联该brick的文件
    log.info("<===STEP1: scan inode to check brick {} inuse or not===>".format(brick_no))
    cmd = "{} -p {} -c dump_inode -g \"owner={}\"".format(REPAIR_TOOL, dev, brick_no)
    inodes = exec_cmd(cmd, out_json=True)
    if len(inodes) == 0:
        log.info("<===brick {} cannot find inode, treat it as unsed".format(brick_no))
        return unused_brick

    valid_inodes = [i['uuid'] for i in inodes if 'error' not in i]
    error_inodes = [i['no'] for i in inodes if 'error' in i]

    # 其次，对比wcache.json和SSD上能够读取的brick进行对比，推测故障位置的brick
    log.info("<===STEP2:brick {} have {} inode, and {} inode read error, guess brick info from wcache.json ===>".format(brick_no, len(valid_inodes), len(error_inodes)))

    # 获取gluster中的brick信息，用于佐证wcache.json中的信息是全的
    cmd = "gluster v i |grep $(hostname) | grep -vE 'meta|arbiter' | awk -F'[: ]' '{print $4}'"
    gluster_brick = exec_cmd(cmd)

    gluster_brick = sorted(gluster_brick.split())
    conf_brick = sorted([b['brickId'] for s in cache_conf['maps'] for b in s['bricks']])

    if gluster_brick != conf_brick:
        log.error("gluster_brick:{}, conf_brick:{}".format(gluster_brick, conf_brick))
        raise Exception("brick between wcache.json and gluster mismatched, cannot guess brick info automatically")

    #上一步比对完成后，只能确定brick数量是完整的，但brick和设备的绑定关系仍不确定，wcache.json中
    #的绑定关系不可信，网上问题多次遇到wcache.json中的绑定关系，和SSD上实际的绑定不一致
    error_bricks = []
    found_bricks = []
    for s in cache_conf['maps']:
        tmp_uuid = s['uuid'].rsplit('-', 1)[0]
        tmp_dev  = "/dev/{}/{}-tcache".format(tmp_uuid, tmp_uuid)

        # dump设备上所有在用的brick，读取不出来的也返回
        cmd = "{} -p {} -c dump_brick -g \"use=1\"".format(REPAIR_TOOL, tmp_dev)
        dev_bricks = exec_cmd(cmd, out_json=True)

        for b in dev_bricks:
            # 统计下设备上哪些brick读不出来或者校验失败，失败的brick会带error:错误码
            if 'error' in b:
                if tmp_dev != dev:
                    raise Exception("other dev {} also have error brick, cannot handle automatically".format(tmp_dev))

                error_bricks.append(b['bi_no'])
            else:
                found_bricks.append(b['bi_brickid'])

    not_found_brick = [b for b in conf_brick if b not in found_bricks]

    log.debug("not_found_brick:{}, error_bricks:{}, inodes:{}".format(not_found_brick, error_bricks, inodes))
    if len(not_found_brick) == 1 and len(error_bricks) == 1:
        assert error_bricks[0] == brick_no
        log.info("<===guess error brick {} is {}, you can make sure it by files:===>\n{}".format(brick_no, not_found_brick[0], valid_inodes[0:5]))
        brick_obj = {'bi_use':1,\
                     'bi_no':brick_no,\
                     'bi_uuid':str(uuid.uuid4()),\
                     'bi_brickid':not_found_brick[0],\
                     'bi_unlink':0}

        return brick_obj

    raise Exception("cannot guess brick info, not_found_brick:{}, error_brick:{}".format(len(not_found_brick), len(error_bricks)))

def repair_brick(dev, brick_no):
    repair_conf = "{}/repair_brick.json".format(REPAIR_WORK_DIR)
    unused_brick = {"bi_no": brick_no, "bi_use": 0}
    brick_obj = None

    cmd = "{} -p {} -c dump_brick -n {}".format(REPAIR_TOOL, dev, brick_no)
    try:
        brick_obj = exec_cmd(cmd, out_json=True)
        log.info("<===brick {} is ok, no need repair===>\n{}".format(brick_no, json.dumps(brick_obj, indent=4)))
        return
    except Exception as ex:
        log.debug("read brick {} failed:{}, cmd:{}".format(brick_no, ex, cmd))
        pass

    dump_info=tier_dump_ssd_meta(dev, "brickinfo")
    if dump_info is None:
        log.info("<===dump brick info failed, will repair brick 【OFFLINE】===>")
        brick_obj = repair_brick_from_conf(dev, brick_no)
    else:
        log.info("<===dump brick info success, will repair brick 【ONLINE】===>")
        for brick in dump_info['brick']:
            if brick['bi_no'] != brick_no:
                continue

            log.info("<===brick {} found in dump info===>\n{}".format(brick_no, json.dumps(brick, indent=4)))

            brick_obj = {'bi_use':1,\
                         'bi_no':brick_no,\
                         'bi_uuid':brick['bi_uuid'],\
                         'bi_brickid':brick['bi_brickid'],\
                         'bi_unlink':int(brick['bi_unlink'])}
            break
        else:
            log.info("<===brick {} not found in dump info, treat it as unused===>".format(brick_no))
            brick_obj = unused_brick

    json_dump(repair_conf, brick_obj)
    cmd = "{} -p {} -c repair_brick -f {} {}".format(REPAIR_TOOL, dev, repair_conf, REPAIR_COMMIT)

    new_brick = exec_cmd(cmd, out_json=True)
    if not REPAIR_COMMIT:
        log.info("\n{}".format(json.dumps(new_brick, indent=4)))
        log.info("<===new brick is above, please CHECK it and then run cmd manually:===>\n{} -y".format(cmd))
        return

    cmd = "{} -p {} -c dump_brick -n {}".format(REPAIR_TOOL, dev, brick_no)
    try:
        brick_obj = exec_cmd(cmd, out_json=True)
        log.info("<===brick {} repaired success===>\n{}".format(brick_no, json.dumps(brick_obj, indent=4)))
    except Exception as ex:
        log.error("<===brick {} still cannot read after repair:{}===>".format(brick_no, ex))
        raise


def read_super_from_dev(dev):
    try:
        cmd = "{} -p {} -c dump_super".format(REPAIR_TOOL, dev)
        return exec_cmd(cmd, out_json=True)
    except Exception as ex:
        log.debug("read super failed:{}, dev:{}".format(ex, dev))
        return None


def repair_superblock_from_backup(dev):
    convert_conf = "{}/convert_sb.json".format(REPAIR_WORK_DIR)
    ssd_uuid = os.path.basename(os.path.dirname(dev))
    bak_path = os.path.join(TIER_BAK_DIR, ssd_uuid)

    if not os.path.exists(bak_path):
        raise Exception("super block backup {} not exist".format(bak_path))

    bak_sb = read_super_from_dev(bak_path)
    if bak_sb is None:
        raise Exception("read super block backup {} failed".format(bak_path))

    if bak_sb['version'] == TIER_EXPECT_VER:
        assert ssd_uuid == bak_sb['ssd_uuid']
        log.info("<===super in backup {} verify ok, bak_sb:===>\n{}".format(bak_path, bak_sb))
        return bak_sb

    if bak_sb['version'] != TIER_V1 or TIER_EXPECT_VER != TIER_V2:
        raise Exception("backup version {} mismatch. expect:{}".format(bak_sb['version'], TIER_EXPECT_VER))

    log.info("<===backup super version is {}, need convert to {}===>".format(bak_sb['version'], TIER_EXPECT_VER))

    assert TIER_EXPECT_VER == TIER_V2
    convert_ver = 2

    json_dump(convert_conf, bak_sb)
    #调用工具转换super block，命令只输出转换后的结果，不直接写盘
    cmd = "{} -p {} -c convert_super -f {} -v {}".format(REPAIR_TOOL, dev, convert_conf, convert_ver)

    return exec_cmd(cmd, out_json=True)


def repair_superblock(dev, offset, dev_sb):
    repair_conf = "{}/repair_sb.json".format(REPAIR_WORK_DIR)

    if dev_sb:
        log.info("<===dev {} super block is valid, no need repair===>".format(dev))
        return

    sb = tier_dump_ssd_meta(dev, "superblock")
    if sb is None:
        log.info("<===dump super block from tierd failed, will repair super 【OFFLINE】===>")
        sb = repair_superblock_from_backup(dev)
    else:
        log.info("<===dump super block from tierd success, will repair super 【ONLINE】===>")

    assert sb
    version = 1 if sb['version'] == TIER_V1 else 2

    json_dump(repair_conf, sb)

    cmd = "{} -p {} -c repair_super -f {} -v {} {}".format(REPAIR_TOOL, dev, repair_conf, version, REPAIR_COMMIT)

    new_sb = exec_cmd(cmd, out_json=True)

    if not REPAIR_COMMIT:
        log.info("\n{}".format(json.dumps(new_sb, indent=4)))
        log.info("<===new super is above, please CHECK it and then run cmd manually:===>\n{} -y".format(cmd))
        return

    # 如果修复成功，再读一次确认一下
    new_sb = read_super_from_dev(dev)
    if new_sb is None:
        raise Exception("super block still cannot read after repair")

    log.info("<===repair super success, new super is===>:\n{}".format(json.dumps(new_sb, indent=4)))

def repair_exp_inode(dev, offset, sb):
    if sb['version'] != TIER_V2:
        return False
    
    if sb['exp_inode_magic'] != TIER_INODE_EXP_MAGIC:
        return False 
    
    exp_inode_off = sb['data_offset'] + sb['block_size'] * sb['block_cnt'] + TIER_INODE_EXP_RESERVE
    if offset < exp_inode_off:
        return False

    exp_inode_off_end = exp_inode_off + sb['sb_inode_exp_area'] * TIER_INODE_EXP_BLOCK 
    if offset >= exp_inode_off_end:
        return False
   
    inode_no = (offset - exp_inode_off) / TIER_META_UNIT + sb['inode_cnt']
    log.info("<===offset {} is exp_inode area, start repair inode {}===>".format(offset, inode_no))
    repair_inode(dev, inode_no)
    return True
    

def repair_dev_offset(dev, offset, sb):
    assert offset % TIER_META_UNIT == 0

    if offset < TIER_SB_OFF:
        log.info("<===offset {} is reserved area, no need repair===>".format(offset))
    elif offset < TIER_META_UNIT + TIER_SB_OFF:
        log.info("<===offset {} is super block, start repair super block===>".format(offset))
        repair_superblock(dev, offset, sb)
    else:
        if not sb:
            raise Exception("super block is None, maybe need repair it first")

        if sb['version'] not in REPAIR_VERS:
            raise Exception("version {} is not supported".format(sb['version']))

        brick_off  = TIER_SB_OFF + sb['super block size']
        inode_off  = brick_off   + sb['brick_table_size']
        inode_end  = inode_off   + (sb['inode_cnt'] * TIER_META_UNIT)

        extent_off = inode_off   + sb['inode_table_size']
        extent_end = extent_off  + sb['extent_table_size']
        data_off   = sb['data_offset']
        

        # 数据区不处理
        if offset >= data_off:
            if not repair_exp_inode(dev, offset, sb): # 如果是exp_inode则触发修复exp_inode 
                log.info("<===offset {} not in meta area [{}-{}], not handle===>".format(offset, brick_off, data_off))
        # 超级块->brick区，inode区->extent区，extent区->数据区，这3处有预留，加载时可能会访问，直接置0
        elif offset < brick_off or (offset >= inode_end and offset < extent_off) or (offset >= extent_end and offset < data_off):
            try:
                cmd = "dd if={} of=/dev/null bs={} count=1 skip={} iflag=direct conv=notrunc 2>/dev/null".format(dev, TIER_META_UNIT, offset / TIER_META_UNIT)
                exec_cmd(cmd)
                log.info("<===reserved area {} read ok, no need repair===>".format(offset))
                return
            except Exception:
                pass

            cmd = "dd if=/dev/zero of={} bs={} count=1 seek={} oflag=direct conv=notrunc 2>/dev/null".format(dev, TIER_META_UNIT, offset / TIER_META_UNIT)
            if not REPAIR_COMMIT:
                log.info("<===offset {} is meta reserved area, you can use cmd to zero it===>\n{}".format(offset, cmd))
            else:
                exec_cmd(cmd)
                log.info("<===offset {} repaired success===>".format(offset))
        elif offset < inode_off:
            brick_no = (offset - brick_off) / TIER_META_UNIT
            assert brick_no >= 0 and brick_no < sb['brick_cnt']
            log.info("<===offset {} is brick area, start repair brick {}===>".format(offset, brick_no))
            repair_brick(dev, brick_no)
        elif offset < extent_off:
            inode_no = (offset - inode_off) / TIER_META_UNIT
            assert inode_no >= 0 and inode_no < sb['inode_cnt']
            log.info("<===offset {} is inode area, start repair inode {}===>".format(offset, inode_no))
            repair_inode(dev, inode_no)
        else:
            ext_no = (offset - extent_off) / TIER_META_UNIT
            assert ext_no >= 0 and ext_no < sb['block_cnt']
            log.info("<===offset {} is extent area, start repair extent {}===>".format(offset, ext_no))
            repair_extent(dev, ext_no)


def check_and_repair_dev(dev):
    sb = read_super_from_dev(dev)
    if sb is None:
        log.error("<===read super on {} failed, maybe super is corrupt, run with '-s {}' to repair it first===>".format(dev, TIER_SB_OFF))
        return
        #repair_dev_offset(dev, TIER_SB_OFF, None)

    error_offset = []
    cur_off = TIER_SB_OFF + TIER_META_UNIT

    log.info("<===begin scan {} meta area===>".format(dev))

    while cur_off < (TIER_SB_OFF + TIER_SB_RSIZE):
        cmd = "dd if={} of=/dev/null bs={} count=1 iflag=direct skip={} 2>/dev/null".format(dev, TIER_META_UNIT, cur_off/TIER_META_UNIT)
        try:
            exec_cmd(cmd)
        except Exception as ex:
            log.debug("exec cmd {} failed:{}".format(cmd, ex))
            error_offset.append(cur_off)
        finally:
            cur_off += TIER_META_UNIT

    metas = ['brick', 'inode', 'extent']

    for m in metas:
        log.info("<===begin scan {} area===>".format(m))
        cmd = "{} -p {} -c dump_{} -g 'error=1'".format(REPAIR_TOOL, dev, m)
        error_metas = exec_cmd(cmd, out_json=True)
        if isinstance(error_metas, dict):
            for key, data in error_metas.items():
                error_offset += [e['meta_offset'] for e in data]
        else:     
            error_offset += [e['meta_offset'] for e in error_metas]

        log.info("<===end scan {} area, found {} error===>".format(m, len(error_offset)))

    log.info("<===end scan dev {}, error offset:{}===>".format(dev, error_offset))

    #还是不自动修，稳妥一点
    #for offset in error_offset:
    #    repair_dev_offset(dev, offset, sb)


def get_vs_version():
    version_file = "/sf/vs/version"
    with open(version_file, "r") as f:
        return f.readline().strip()

def is_adesk_version():
    adesk_file = "/sf/etc/version.adesk"

    if not os.path.exists(adesk_file):
        return False

    cmd = "diff /sf/etc/version {}".format(adesk_file)
    try:
        exec_cmd(cmd)
        return True
    except Exception as e:
        log.debug("error on check adesk version, error:{}".format(e))
        return False


def is_eds_version():
    eds_file = "/sf/etc/version.eds"

    if not os.path.exists(eds_file):
        return False

    cmd = "diff /sf/etc/version {}".format(eds_file)
    try:
        exec_cmd(cmd)
        return True
    except Exception as e:
        log.debug("error on check eds version, error:{}".format(e))
        return False

def prepare_env(args):
    global log
    global GFS_MNT
    global TIER_EXPECT_VER
    global REPAIR_COMMIT
    global REPAIR_INODE_USE_MEM
    global REPAIR_OFFLINE

    if args.commit:
        REPAIR_COMMIT = "-y"

    if args.inode_use_mem:
        REPAIR_INODE_USE_MEM = True

    if args.repair_offline:
        REPAIR_OFFLINE = True

    if not os.path.exists(REPAIR_WORK_DIR):
        os.mkdir(REPAIR_WORK_DIR)

    if not os.path.exists(REPAIR_TOOL) or not os.path.exists(IO_TRANS_TOOL):
        raise Exception("please mv all repair tools to work dir:{}".format(REPAIR_WORK_DIR))

    log_level = logging.DEBUG if args.debug else logging.INFO

    logging.basicConfig(level=logging.DEBUG,
                        format='[%(asctime)s] [%(levelname)s] [%(filename)s:%(lineno)s:%(funcName)s] %(message)s',
                        filename=REPAIR_LOG,
                        filemode='a')

    log = logging.getLogger('vs_tier_repair')

    console = logging.StreamHandler()
    console.setLevel(log_level)
    formatter = logging.Formatter('[%(levelname)s] [%(filename)s:%(lineno)s:%(funcName)s] %(message)s')
    console.setFormatter(formatter)

    log.addHandler(console)

    GFS_MNT = exec_cmd("mount |grep /sf/data/vs/gfs/ | awk '{print $3}'")

    vs_ver       = get_vs_version()
    product_name = ""

    if is_eds_version():
        product_name    = "EDS"
        TIER_EXPECT_VER = TIER_V1 if vs_ver < "3.0.3" else TIER_V2
    else:
        product_name    = "VDI" if is_adesk_version() else "HCI"
        if vs_ver < "3.0.3":
            TIER_EXPECT_VER = TIER_V1
        elif vs_ver < "3.6.0":
            TIER_EXPECT_VER = TIER_V2
        else:
            d = json_load(TIER_CONF)
            TIER_EXPECT_VER = TIER_V2 if d['tier_version'] == "2.0" else TIER_V3

    log.info("Product:{}, Version:{}, Tier version:{}".format(product_name, vs_ver, TIER_EXPECT_VER))

def parse_args():
    parser = argparse.ArgumentParser(description='vs repair tool')

    parser.add_argument('-p', dest='dev', help='tcache device path, such as: /dev/vgname/vgname-tcache')
    parser.add_argument('-s', dest='offset', type=int, help='badblock offset on tcache device')
    parser.add_argument('-D', dest='debug', action='store_true', help="enable debug mode")
    parser.add_argument('-y', dest='commit', action='store_true', help="commit 'repair' action")
    parser.add_argument('-c', dest='cmd', help='command pass to vs_tier tool')
    parser.add_argument('--repair-inode-use-mem', dest='inode_use_mem', action='store_true', help='repair inode by dumped inode info')
    parser.add_argument('--repair-offline', dest='repair_offline', action='store_true', help='repair with offline mode')

    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit(1)

    return parser.parse_known_args()

def main():

    args, unparse_args = parse_args()

    prepare_env(args)

    if not args.dev or not os.path.exists(args.dev):
        raise Exception("dev must be specified, ant it should be exists. args.dev:{}".format(args.dev))

    # dev限制只能传tcache lv，防止传成盘符，导致出问题
    if not (args.dev.startswith("/dev/") and args.dev.endswith("-tcache")) and\
        not args.dev.startswith("/sf/cfg/vs/cache/"):
        raise Exception("dev {} is invalid. it should be /dev/$vg/$vg-tcache or /sf/cfg/vs/cache/$vg".format(args.dev))

    log.debug("args:{}, unparse_args:{}".format(args, unparse_args))

    #脚本只支持修复，其他命令则默认传递给vs_tier工具
    if args.cmd:
        cmd = "{} -p {} -c {} {}".format(REPAIR_TOOL, args.dev, args.cmd, ' '.join(unparse_args))
        res = exec_cmd(cmd)
        print(res)
    elif args.offset is not None:
        sb = read_super_from_dev(args.dev)
        args.offset = args.offset - (args.offset % TIER_META_UNIT)
        repair_dev_offset(args.dev, args.offset, sb)
    else:
        check_and_repair_dev(args.dev)

    return 0

if __name__ == '__main__':
    sys.exit(main())

