#!/usr/bin/env /sf/vs/bin/python
# -*- coding: utf-8 -*-
# =============================================================================
# Copyright © 2023 Sangfor Technologies
# All rights reserved.
#
# Author: chengzhilong - 178987@sangfor.com
#
# Last modified: 2023-07-26 09:16
#
# Filename: vst_prealloc_fiemap_tool.py
#
# Description:
# 问题背景: 分层discard缺陷导致new_data值未更新, glusterfsd重启后空间泄漏;
# 先决条件: 环境已存在修复分层discard缺陷的补丁包, 且打完补丁包后有重启过glusterfsd进程
# 工具目的: 该工具用于修复环境分层new data值不准确的场景, 校正分层new_data值, 然后需手动
# 重启glusterfsd进程
# =============================================================================
import datetime
import json
import logging
import os
import socket
import struct
import sys
from collections import OrderedDict

from libcommon import config
from libvs.utils.stddir import VSF_TIERD
from vs_rpc.vs_rpc_api import VsRpcApi, PreallocRpcApi

VST_LOCK_FILE = '/var/lock/vs_tier_cli.lock'
VST_SOCK_PATH = '/run/vs/tier_cmd_sock'
VST_TOOL_LOG = '/sf/log/today/vs/vst_prealloc_fiemap_tool.log'
VST_RECV_SIZE_4K = 4096
VST_SEPCIAL_FILE = ['00000000-0000-0000-0000-000000000002']
VST_CIRCLE_UNIT = 1000

logging.basicConfig(level=logging.INFO,
                    format='[%(asctime)s] [%(levelname)s] [%(filename)s:%(lineno)s:%(funcName)s] %(message)s',
                    filemode='a')
logger = logging.getLogger(__name__)

# json_load, resp2dict, send_request, do_command实现均从vs_tier_cli.py搬移过来
def json_load(path):
    """
    解析配置文件为json对象
    """
    try:
        obj = json.load(file(path), object_pairs_hook=OrderedDict)
    except Exception as ex:
        logger.exception("json load path: {} failed, ex: {}".format(path, ex))
        return None
    return obj


def resp2dict(response):
    """
    将响应解析成字典返回：{'ret': -1, ' msg': 'magic error:0xbab4'}
    """
    dictresp = {}
    resp = response.strip('{}').split(',', 1)
    # print resp

    for res in resp:
        line = res.split(':', 1)
        value = line[1].strip(' \"\"')
        if len(line) > 2:
            value += ':' + line[2].strip(' \"\"')
        dictresp[line[0].strip('\' ')] = value
    dictresp['ret'] = int(dictresp['ret'])
    return dictresp


def send_request(data):
    """
    发送命令请求
    """
    # socket文件
    server_address = VST_SOCK_PATH
    # 创建socdet
    sockfd = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
    # connect
    try:
        sockfd.connect(server_address)
    except Exception, e:
        return {'ret': -1, 'msg': str(e)}
    # 发送数据
    try:
        sockfd.sendall(data)
    except Exception, e:
        return {'ret': -1, 'msg': str(e)}
    # 返回结果
    response, address = sockfd.recvfrom(VST_RECV_SIZE_4K)
    # 关闭socket
    sockfd.close()
    return resp2dict(response)


def do_command(cmd, arg):
    """
    按照固定格式封装命令
    """
    cmd_header = 'tier'
    if arg:
        cmdarg = cmd_header + '.' + cmd + ':' + arg + ';'
    else:
        cmdarg = cmd_header + '.' + cmd + ':' + ';'
    data = struct.pack('<HH%ds' % len(cmdarg), 0xB4BA, len(cmdarg), cmdarg)
    return send_request(data)


def __exec_tier_cmd(cmd, args):
    lock = open(VST_LOCK_FILE, 'a')
    with config.Wlock(lock):
        ret = do_command(cmd, args)
        if ret['ret'] != 0:
            logger.error("vs_tier_cli.py -c dump -a inode failed: %s" % ret['msg'])
            raise
    lock.close()
    return ret


def __save_inodes(inode_list, promote_str):
    now = datetime.datetime.now()
    time_str = now.strftime("%Y-%m-%d %H:%M:%S")
    with open(VST_TOOL_LOG, 'a') as f:
        f.write(time_str + " " + promote_str + ": ")
        for gfid in inode_list:
            f.write(str(gfid) + ' ')
        f.write('\n\n')
        f.flush()


"""
从分层获取new_data值可能不准的inode列表
"""
def __tier_dump_inode():
    logger.info("2. [BEGIN] 准备从分层获取inode列表")
    ret = __exec_tier_cmd('dump', 'inode')
    if not ret:
        logger.error("2. [END  ] 执行分层dump命令失败\n")
        return None, None
    dump_obj = json_load(ret['msg'])
    if dump_obj is None:
        logger.error("2. [END  ] 执行分层dump命令失败\n")
        return None, None
    ssd_list = {}
    inode_list = []
    for ssd_info in dump_obj['ssd']:
        brick_list = {}
        for brick_info in ssd_info['brick']:
            files = []
            for inode_info in brick_info['inodes']:
                gfid = str(inode_info['uuid'])
                # 跳过0002, new_data为0的gfid
                if gfid in VST_SEPCIAL_FILE or inode_info['new data'] == 0:
                    continue
                files.append({
                    "gfid": gfid,
                    "block_cnt": inode_info['block_cnt'],
                    "new_data": inode_info['new data'],
                })
                inode_list.append(gfid)
            brick_id = str(brick_info['bi_brickid'])
            brick_list[brick_id] = files
        ssd_list[ssd_info['ssd_uuid']] = brick_list
    os.remove(ret['msg'])
    logger.info("2. [END  ] 分层需要检查的inode数量: {}\n".format(len(inode_list)))
    return ssd_list, inode_list


def __prealloc_lookup_inode(inode_list):
    logger.info("3. [BEGIN] 准备对分层inode下发stat请求")
    if not inode_list:
        logger.info("3. [END  ] inode_list为空，跳过\n")
        return False
    err_files = []
    try:
        route_tasks = VsRpcApi()
        # 一个个传，查询慢点无所谓，避免传入过多一下子把client给卡死了
        for index in range(0, len(inode_list), VST_CIRCLE_UNIT):
            split_gfids = inode_list[index: index + VST_CIRCLE_UNIT]
            res = route_tasks.vs_get_fileinfo(split_gfids)
            if not res['ret']:
                err_files.extend(split_gfids)
        if not err_files:
            logger.info("3. [END  ] 下发stat请求成功\n")
        else:
            # 失败了，仍继续后续工作
            __save_inodes(err_files, "vs_get_fileinfo failed")
            logger.error("vs_get_fileinfo inode failed! log path: {}\n".format(VST_TOOL_LOG))
        return True
    except Exception as ex:
        logger.exception("3. [END  ] vs_get_fileinfo请求异常 {}\n".format(str(ex)))
        return False


def __exec_dump_fiemap(brickid, inode_list, index):
    if not brickid:
        return None
    err_files = []
    logger.info("4.{} [BEGIN] 准备下发prealloc_dump_fiemap接口. brickid: {}".format(index, brickid))
    try:
        brick = brickid.split('/')[-2] + '-' + brickid.split('/')[-1]
        path = '/run/vs/rpc/' + brick + '.sock'
        if not os.path.exists(path):
            logger.error("4.{} [END  ] Brick path {} 不存在".format(index, path))
            return False
        prealloc_rpc_srv = PreallocRpcApi(path)
        for inode in inode_list:
            res = prealloc_rpc_srv.prealloc_dump_fiemap(inode["gfid"])
            result = json.loads(res, encoding='utf-8')
            if result["ret"]:
                err_files.append(inode["gfid"])
    except Exception as ex:
        logger.exception("4.{} [END  ] prealloc_dump_fiemap接口调用异常. brickid: {}, inode_list: {}, ex: {}\n".format(
            index, brickid, inode_list, str(ex)))
        return False
    if not err_files:
        logger.info("4.{} [END  ] prealloc_dump_fiemap success. Brick: {}".format(index, brickid))
    else:
        __save_inodes(err_files, "prealloc_dump_fiemap failed")
        logger.error("4.{} [END ] prealloc_dump_fiemap failed, Brick: {}, inode in {}".format(index,
                                                                                              brickid, VST_TOOL_LOG))
    return True


def __prealloc_dump_fiemap(ssd_list):
    logger.info("4. [BEGIN] 准备调用prealloc_dump_fiemap接口")
    if not ssd_list:
        logger.info("4. [END  ] 分层ssd列表为空")
        return False
    idx = 1
    for ssd, brick_list in ssd_list.items():
        for brickid, inode_list in brick_list.items():
            __exec_dump_fiemap(brickid, inode_list, idx)
            idx = idx + 1
    logger.info("\n4. [END  ] prealloc_dump_fiemap接口执行完毕\n")
    return True


def env_check():
    logger.info("1. [BEGIN] 环境检查")
    # 检查分层是否有配置文件存在
    if not os.path.exists(VSF_TIERD):
        logger.error("1. [END  ] 检查失败: 分层配置文件{}不存在".format(VSF_TIERD))
        return False
    logger.info("1. [END  ] 环境检查通过\n")
    return True


def print_message():
    logger.info("5. [BEGIN] 分层new_data值均已通过fiemap校正")
    logger.info("5. [END  ] 请稍后重启glusterfsd进程以校正brick容量\n")


def _capacity_leak_fix():
    # 1. 环境检查
    if not env_check():
        return
    # 2. 从分层获取inode列表
    ssd_list, inode_list = __tier_dump_inode()
    if not ssd_list or not inode_list:
        return
    # 3. 对所有inode都提前lookup一次
    if not __prealloc_lookup_inode(inode_list):
        return
    # 4. 调用prealloc_dump_fiemap接口，未load bitmap的都主动下发一次fiemap请求
    if not __prealloc_dump_fiemap(ssd_list):
        return
    # 5. 提示重启glusterfsd进程
    print_message()

