#!/sf/vs/bin/python
# -*- coding: utf-8 -*-
# =============================================================================
# Copyright © 2024 Sangfor Technologies
# All rights reserved.
# Filename: vsts_disk_lba2file.py
# Author: zhengzt
# Description:
# 获取指定物理地址的分片文件
# Last modified: 2024.3.25
#
# =============================================================================
import argparse
import csv
import logging
import os
import sys
import re
import json
import contextlib
import time
import threading
import uuid
import stat
import platform
import pipes

from fnmatch import fnmatch
import subprocess32 as subprocess

from libcommon.singleproc import SingleProc
from libcommon.log import logger_init
from libvs.glusterfs import Glusterfs
from libvs.volume.common import get_volume_name

logger = logging.getLogger(__name__)


# 扇区大小
SECTOR_SIZE = 512
KB = 1024
MB = 1024 * KB
GB = 1024 * MB
TB = 1024 * GB

# 文件系统的偏移
FILE_SYSTEM_OFFSET = 304 * MB

# 分区相对于裸盘的偏移
PART_PV_OFFSET = MB

# 虚拟机信息列表
VM_INFO_FILE_PATH = '/cfs/.vmlist'
# 输出路径
OUT_PATH = '/sf/log/vsts_disk_lba2file'
# 脚本执行路径
CUR_PATH = os.getcwd()

# 文件列表保存路径
OUT_SHARD_FILES_FILE_PATH = os.path.join(OUT_PATH, "shard_files_file.csv")

# 文件列表
TOTAL_SHARD_FILE_COUNT = 0
TOTAL_VM_IDS = []

EFS_REPAIR_BIN = '/sf/vs/sbin/efs_repair'


class CallbackThread(threading.Thread):
    def __init__(self, callback=None, *args, **kwargs):
        super(CallbackThread, self).__init__(*args, **kwargs)
        self.callback = callback

    def run(self):
        try:
            # 执行线程任务
            super(CallbackThread, self).run()
        except Exception as e:
            logger.error("Thread run failed: {}".format(e))
        finally:
            # 在任务执行完毕后调用回调函数
            if self.callback:
                self.callback()


def is_gt_vs3x(ver):
    ver_list = ver.split('.')
    return len(ver_list) > 2 and ver_list[0] >= '3'


def is_system_disk(device):
    cmd = "source /sf/vs/bin/vs_common_disk_interface.sh; vs_get_symtem_disk"
    try:
        output = subprocess.check_output(cmd, shell=True, executable="/bin/bash")
    except Exception as e:
        logger.exception("cmd:{} failed, err:{}".format(cmd, e))
        return False
    disk = os.path.basename(device)
    return disk in output.strip()


def is_two_host_volume():
    volume_id = get_volume_name()
    glfs_vol = Glusterfs(volume_id)
    return not glfs_vol.is_arbiter_on()


def get_cpu_architecture():
    # 获取处理器架构
    architecture = platform.machine().lower()

    if 'arm' in architecture or 'aarch64' in architecture:
        return 'arm'
    elif 'x86' in architecture or 'amd64' in architecture or 'x86_64' in architecture:
        return 'x86'
    else:
        return "unknown"

def check_efs_repair():
    global EFS_REPAIR_BIN
    cmd = 'efs_repair -h'
    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdoutdata, stderrdata = process.communicate()
    if process.returncode != 0:
        # 命令执行失败，处理错误输出
        error_output = stderrdata.decode('utf-8')
        if 'extent' not in error_output:
            EFS_REPAIR_BIN = os.path.join(CUR_PATH, 'efs_repair_' + get_cpu_architecture())

    else:
        # 命令执行成功，处理标准输出
        output = stdoutdata.decode('utf-8')
        if 'extent' not in output:
            EFS_REPAIR_BIN = os.path.join(CUR_PATH, 'efs_repair_' + get_cpu_architecture())

    if not os.path.exists(EFS_REPAIR_BIN):
        logger.error("{} not exists".format(EFS_REPAIR_BIN))
        return False
    if not os.access(EFS_REPAIR_BIN, os.X_OK):
        # 增加执行权限
        os.chmod(EFS_REPAIR_BIN, os.stat(EFS_REPAIR_BIN).st_mode | stat.S_IXUSR)
    return True


def check_evn(disk):
    # 1. 检查磁盘是否存在
    if not os.path.exists(disk):
        logger.error("lv {} not exists".format(disk))
        sys.stdout.write('当前磁盘{}不存在当前主机，请确认逻辑卷所在主机执行脚本！\n'.format(disk))
        return False

    # 2. 检查输入是否为非系统盘
    if is_system_disk(disk):
        logger.error("device {} is not data disk".format(disk))
        sys.stdout.write('当前磁盘{}是系统盘，工具仅支持数据盘！\n'.format(disk))
        return False

    # 3. 检查是否为三主机
    if is_two_host_volume():
        sys.stdout.write('当前磁盘{}所属虚拟机存储不是三主机，工具仅支持三主机！\n'.format(disk))
        return False

    # 4. 检查efs_repair是否存在
    if not check_efs_repair():
        sys.stdout.write('{} 工具不存在，请下载工具到指定路径\n'.format(EFS_REPAIR_BIN))
        return False

    return True


def get_address_by_file(file_path):
    """
    从文件中读取坏块地址，并返回一个地址列表。

    :param file_path: 坏块地址文件的路径
    :return: 包含坏块地址列表
    """
    bad_block_address = []
    try:
        with open(file_path, 'r') as file:
            for line in file:
                line = line.strip()
                if line.isdigit():
                    bad_block_address.append(int(line))
    except Exception as e:
        logger.error("read file {file_path} error: {err}".format(file_path=file_path, err=e))
        raise
    return bad_block_address


def reverse_physical_address(physical_address_sector, lv_start_offset):
    """
    将物理地址转换为相对于偏移量的地址，并按1MB对齐。

    :param physical_address_sector: 原始的物理地址（以扇区为单位）
    :param lv_start_offset: 逻辑卷起始偏移量（以扇区为单位）
    :return: 转换并对齐后的地址（以Byte为单位）
    """
    byte_address = physical_address_sector * SECTOR_SIZE
    reversed_address = byte_address - lv_start_offset * SECTOR_SIZE - FILE_SYSTEM_OFFSET
    aligned_address = (reversed_address // MB) * MB
    return aligned_address


def get_lv_address(physical_address, lv_efs_device):
    lv_sector_addr = []
    for bad_block_address in physical_address:
        lv_sector_addr.append((bad_block_address - MB) / SECTOR_SIZE)
    physical_address = lv_sector_addr

    lv, _ = get_lv_vg_by_device(lv_efs_device)
    offset_sector = get_lv_offset(lv)
    reverse_addresses = []
    for bad_block_address in physical_address:
        tmp_address = reverse_physical_address(bad_block_address, offset_sector)
        reverse_addresses.append(tmp_address)
    return reverse_addresses


def get_lv_vg_by_device(device):
    """
    通过device获取lv和vg
    :param device:
    仲裁          /dev/mapper/LZUB6b--GTXb--USk4--1VkR--MdiZ--PfXC--Ab1fVf-lv_fs
    三主机efs     /dev/bXtqDU-dLxy-KqYj-FscL-5u46-gf5y-Mt3FUl/lv_efs
    两主机lv      /dev/d3a777-L1EN-jlXJ-Uu.../d3a777-L1EN-jlXJ-Uusi-J8V1-o0LL-9sQebi_350a51a8ab9f4efb8c23df98ac021178.0
    :return: lv,vg
    lv   /sbin/dmsetup table结合该值算裸盘偏移
    vg： d3a777-L1EN-jlXJ-Uusi-J8V1-o0LL-9sQebi，vs_pvinfo.sh结合该值找到对应盘符
    """

    # /dev/mapper/LZUB6b--GTXb--USk4--1VkR--MdiZ--PfXC--Ab1fVf-lv_efs
    if re.match('/dev/mapper/.*?-lv_efs', device):
        tmp = device.split('/')

        # LZUB6b--GTXb--USk4--1VkR--MdiZ--PfXC--Ab1fVf-lv_fs
        lv = tmp[-1]

        # LZUB6b-GTXb-USk4-1VkR-MdiZ-PfXC-Ab1fVf
        vg = lv[:-len('-lv_fs')].replace('--', '-')

    # /dev/bXtqDU-dLxy-KqYj-FscL-5u46-gf5y-Mt3FUl/lv_efs
    elif re.match('/dev/.*?/lv_efs', device):
        tmp = device.split('/')

        # bXtqDU--dLxy--KqYj--FscL--5u46--gf5y--Mt3FUl-lv_efs
        lv = '{vg}-{efs}'.format(vg=tmp[2].replace('-', '--'), efs=tmp[-1])

        # bXtqDU-dLxy-KqYj-FscL-5u46-gf5y-Mt3FUl
        vg = tmp[2]

    # /dev/d3a777-L1EN-jlXJ-Uusi-J8V1-o0LL-9sQebi/d3a777-L1EN-jlXJ-Uusi-J8V1-o0LL-9sQebi_350a51a8ab9f4efb8c23df98ac021178.0
    elif re.match('/dev/.*?/.*?_[0-9a-f]{32}', device):
        tmp = device.split('/')
        # d3a777--L1EN--jlXJ--Uusi--J8V1--o0LL--9sQebi-d3a777--L1EN--jlXJ--Uusi--J8V1--o0LL--9sQebi_350a51a8ab9f4efb8c23df98ac021178.0
        lv = '{}-{}'.format(tmp[2].replace('-', '--'), tmp[3].replace('-', '--'))

        # d3a777-L1EN-jlXJ-Uusi-J8V1-o0LL-9sQebi
        vg = tmp[2]
    else:
        logger.error("device:{} invalid".format(device))
        raise ValueError('device:{} invalid'.format(device))

    logger.debug("device:{}, lv:{}, vg:{}".format(device, lv, vg))

    return lv, vg


def get_lv_offset(lv=None):
    """
    获取lv相对磁盘的偏移,lv为空则返回/sbin/dmsetup table的结果
    :param lv:
    :return:
    """
    if lv:
        cmd = '/sbin/dmsetup table | grep {}'.format(lv)
    else:
        cmd = '/sbin/dmsetup table'

    try:
        output = subprocess.check_output(cmd, shell=True)
    except Exception as e:
        logger.exception("cmd:{} failed, err:{}".format(cmd, e))
        raise e

    logger.debug("cmd:{}, output:{}".format(cmd, output))

    if not lv:
        return output

    offset_sector = int(output.strip().split(' ')[-1])
    logger.debug("lv:{}, lv_offset:{}".format(lv, offset_sector))

    return offset_sector


def mount_efs(device_path, mnt_path, timeout=10):
    """
      挂载EFS文件系统

      参数:
      device_path -- EFS块设备路径
      mnt_path -- 挂载目标路径
      """
    if not os.path.exists(mnt_path):
        os.makedirs(mnt_path)

    cmd = 'efs_standalone -i ulvm -p {} {} -o readonly'.format(device_path, mnt_path)
    try:
        subprocess.check_output(cmd, shell=True)
    except Exception as e:
        logger.error("exec {} failed, error:{}".format(cmd, e))
        raise e
    end_time = time.time() + timeout
    while time.time() < end_time:
        if os.path.ismount(mnt_path):
            try:
                os.listdir(mnt_path)
            except Exception as e:
                logger.debug("disk {} not mount, err: {}, try again".format(mnt_path, e))
                time.sleep(1)
                continue
            logger.info("disk {} mount success".format(mnt_path))
            return
        time.sleep(1)  # 等待一段时间后再次检查
    raise ValueError("disk mount fail")


def umount_efs(mnt_path):
    # 卸载目录
    cmd = 'umount {}'.format(mnt_path)
    try:
        subprocess.check_output(cmd, shell=True)
    except Exception as e:
        logger.error("exec {} failed, error:{}".format(cmd, e))
        raise e


@contextlib.contextmanager
def mounted_disk(mount_point, mnt_path):
    """
    上下文管理器，用于挂载和卸载efs文件系统。

    :param mount_point: efs文件系统的挂载点
    :param mnt_path: 本地挂载路径
    """
    mounted = False
    try:
        # 挂载efs文件系统
        mount_efs(mount_point, mnt_path)
        mounted = True
        yield
    finally:
        # 上下文管理器退出时，无论是否发生异常，都会执行卸载efs文件系统的操作
        if mounted:
            umount_efs(mnt_path)


def get_efs_standalone_pid(device):
    cmd = 'ps -ef | grep "efs_standalone -i ulvm -p {}" | grep -v grep | awk \'{{print $2}}\''.format(device)
    try:
        efs_standalone_pid = subprocess.check_output(cmd, shell=True)
    except Exception as e:
        logger.error("exec {} failed, error:{}".format(cmd, e))
        raise e
    efs_standalone_pids = efs_standalone_pid.strip().split('\n')
    if len(efs_standalone_pids) == 0:
        logger.error("efs_standalone process not found, cmd: {}".format(cmd))
        raise ValueError("efs_standalone process not found")
    if len(efs_standalone_pids) != 1:
        logger.error("efs_standalone has many process, cmd : {}".format(cmd))
        raise ValueError("efs_standalone process too many")
    return int(efs_standalone_pids[0])


def get_inode_by_gfid(efs_pid, gfid):
    cmd = '/sf/vs/sbin/efs_dbg -p {} -c "inode info {}"'.format(efs_pid, gfid)
    try:
        inode_info = subprocess.check_output(cmd, shell=True, stderr=subprocess.DEVNULL)
    except Exception as e:
        logger.error("exec {} failed, error:{}".format(cmd, e))
        raise e
    """
    efs_dbg -p 37567 -c "inode info bb3e2840-90e3-4faa-85c3-f391a1aa657b"
    Successfully attched to /var/run/vs/efs_dbg_srv_37567.sock@37567
    ====================head=====================
    magic:               _EFSINO_
    generation:          26
    length:              464
    type:                2
    version:             1
    checksum:            0x6b02d8107b10f14a
    ====================data=====================
    i_uuid:              bb3e2840-90e3-4faa-85c3-f391a1aa657b
    i_no:                1657
    i_type:              _____C(0x20)
    i_state:             NORMAL(2)
    i_statex:            UNCOMPRESSED(0)
    i_size:              2097152
    i_blocks:            2
    i_slices:            16
    i_flags:             0xc0
    i_blkmap_type:       0x2
    i_mode:              -rw-rw-rw-(100666)
    i_uid:               0
    i_gid:               0
    i_ntime:             2024-03-26 19:45:28.159570335
    i_atime:             2024-03-26 19:45:28.159557573
    i_mtime:             2024-03-26 19:45:28.274029828
    i_ctime:             2024-03-27 17:15:50.685007518
    i_dtime:             1970-01-01 08:00:00.000000000
    i_cur_pblk:          0
    i_extents[0]:        1987
    ====================end=====================
    """
    logger.debug(inode_info)
    inode_info = inode_info.strip().split('\n')
    for line in inode_info:
        if line.startswith("i_no:"):
            i_no = int(line.split(' ')[-1])
            return i_no
    err = "efs_pid:{}, gfid:{}, inode_info:{}, can't find i_type".format(efs_pid, gfid, inode_info)
    logger.error(err)
    raise ValueError(err)


def get_extent_by_inode(device_path, inode):
    cmd = '{} -i ulvm -p {} -a dump -t extent -o nolock,ino={} -l /dev/null'.format(
        pipes.quote(EFS_REPAIR_BIN), device_path, inode)
    try:
        extent_info = subprocess.check_output(cmd, shell=True)
    except Exception as e:
        logger.error("exec {} failed, error:{}".format(cmd, e))
        raise e
    extent_info = extent_info.strip().split('\n')
    return extent_info


def get_shard_file_by_gfid(gfid):
    from shard import ShardService
    shard_service = ShardService.ShardService()
    srv_gfid2path = shard_service.gfid2path([gfid])[0]
    return srv_gfid2path.path


def get_real_path(gfid, file_path):
    # 分片文件
    # /.vs/shard/7e/7e947410-774c-49e5-8c65-26282469d70e/7e947410-774c-49e5-8c65-26282469d70e.00055.shard
    from shard import ShardService
    shard_service = ShardService.ShardService()
    if fnmatch(file_path, '/.vs/shard/*'):
        base_gfid = shard_service.gfid2basegfid([gfid])[0]
        srv_gfid2path = shard_service.gfid2path([base_gfid])[0]
        # 分片文件可能是快照文件，再查询一次
        return get_real_path(base_gfid, srv_gfid2path.path)
    # 快照文件
    # /vs/snapshot/3d/3d16f67c-2dbe-4232-a329-50a9539f592e/1.qcow2
    elif fnmatch(file_path, '/vs/snapshot'):
        base_gfid = file_path.split('/')[4]
        srv_gfid2path = shard_service.gfid2path([base_gfid])[0]
        return srv_gfid2path.path
    # trash文件、备份文件，回收站文件返回原路径
    else:
        return file_path


def get_vmid_by_path(file_path):
    #  虚拟机文件(包括回收站文件)
    vm_reg = re.compile(r'/(\d+)\.vm/')
    match = vm_reg.search(file_path)
    if match:
        vm_id = match.group(1)
        return vm_id
    # 备份文件
    backup_reg = re.compile(r"(?:/private/backup/|/backup/images/)(\d+)/")
    match = backup_reg.search(file_path)
    if match:
        vm_id = match.group(1)
        return vm_id
    return None


def get_vm_name_by_id(vm_id):
    vm_info_by_file = read_file_to_dict(VM_INFO_FILE_PATH)
    if len(vm_info_by_file) == 0:
        logger.warn("read {} failed".format(VM_INFO_FILE_PATH))
        return ""
    vm_info_by_file = vm_info_by_file.get("ids", {})
    if len(vm_info_by_file.get(vm_id, {})) > 0:
        return vm_info_by_file.get(vm_id, {}).get("vm_name", "")
    # 查询不到则从回收站中查询
    vm_del_file = "/cfs/recycle_bin/vm/cluster/{}.conf".format(vm_id)
    if not os.path.exists(vm_del_file):
        logger.warn("file {} no exist".format(vm_del_file))
        return ""
    with open(vm_del_file, "r") as file:
        content = file.read()
    vm_info = content.split('\n')
    for line in vm_info:
        if "vm_name" in line and len(line.split(':')[1]) > 1:
            vm_name = line.split(':')[1].strip()
            return vm_name
    return ""


def get_shard_file_and_vm_info(bad_block_gfids):
    all_shard_files = []
    vm_ids = []
    for bad_block_gfid in bad_block_gfids:
        shard_file_path = get_shard_file_by_gfid(bad_block_gfid)
        real_path = get_real_path(bad_block_gfid, shard_file_path)
        vm_id = get_vmid_by_path(real_path)
        vm_name = get_vm_name_by_id(vm_id)
        if vm_id is not None:
            vm_ids.append(vm_id)
        else:
            vm_id = ''
            vm_name = ''
        shard_file_info = [bad_block_gfid, shard_file_path, real_path, vm_id, vm_name]
        all_shard_files.append(shard_file_info)
    return all_shard_files, list(set(vm_ids))


def read_file_to_dict(file_path):
    try:
        with open(file_path, 'r') as file:
            content = file.read()
            # 读取文件并解析JSON内容
            data = json.loads(content, encoding='UTF-8')
            data = byteify(data)
            return data
    except ValueError as e:
        print("文件内容不是有效的JSON格式: {}".format(e))
        return None


def get_gfid_by_address(gfids, device_path, bad_block_address):
    """
    根据坏块物理地址获取对应的gfid集合。

    :param gfids: GFID列表
    :param device_path: 设备路径
    :param bad_block_address: 坏块地址列表
    :return: 受坏块影响的gfid集合
    """
    efs_standalone_pid = get_efs_standalone_pid(device_path)
    logger.debug("get efs standalone pid: {}".format(efs_standalone_pid))
    bad_block_gfid = set()  # 使用集合去重
    for gfid in gfids:
        try:
            inode = get_inode_by_gfid(efs_standalone_pid, gfid)
            extent = get_extent_by_inode(device_path, inode)
        except Exception as e:
            logger.error("Error getting file {} inode or extent: {}".format(gfid, e))
            continue  # 处理异常，继续处理下一个gfid
        for line in extent:
            if not line.strip():
                continue
            for tmp_address in bad_block_address:
                if str(tmp_address) in line:
                    bad_block_gfid.add(gfid)
    return list(bad_block_gfid)


def batch_lvm_gfid_iterator(device, batch_size=1000):
    cmd = ["lsblk", "-o", "NAME,TYPE", "--raw", device]
    with subprocess.Popen(cmd, stdout=subprocess.PIPE) as process:
        batch = []  # 用于累积文件名的列表
        for line in iter(process.stdout.readline, ''):
            lv_line = line.strip().decode('utf-8')
            lv_info = lv_line.split(" ")
            if len(lv_info) <= 2 or lv_info[-1].strip() != "lvm":
                continue
            lvs_name = lv_info[0].strip()
            gfid = get_gfid_by_lvm(lvs_name)
            if gfid:
                batch.append(os.path.basename(gfid))
            # 当累积到batch_size指定的数量时，yield当前批次的文件名列表
            if len(batch) == batch_size:
                yield batch
                batch = []  # 重置batch列表以用于下一个批次
        # 如果最后一个批次不足batch_size，也需要yield出去
        if batch:
            yield batch


def get_lvm_gfid_by_address(gfids, bad_block_address):
    gfids_by_address = []
    for gfid in gfids:
        if is_gfid_in_address(gfid, bad_block_address):
            gfids_by_address.append(gfid)
    return list(set(gfids_by_address))


def is_gfid_in_address(gfid, bad_block_address):
    gfid = uuid.UUID(gfid).hex
    cmd = "/sbin/dmsetup table | grep {}".format(gfid)
    try:
        output = subprocess.check_output(cmd, shell=True)
    except Exception as e:
        logger.error("exec {} failed, error:{}".format(cmd, e))
        return False
    lvs = output.strip().split("\n")
    for lv in lvs:
        lv_table = lv.split(" ")
        lv_len = lv_table[2].strip()
        lv_offset = lv_table[5].strip()
        start = int(lv_offset) * SECTOR_SIZE + PART_PV_OFFSET
        end = start + int(lv_len) * SECTOR_SIZE
        for tmp_address in bad_block_address:
            if start <= tmp_address <= end:
                return True
    return False


def get_gfid_by_lvm(lv_name):
    _, _, gfid = lv_name.partition("_")
    gfid, _, _ = gfid.partition(".")
    try:
        gfid = str(uuid.UUID(gfid))
    except Exception as e:
        logger.error("Error getting gfid: {}".format(e))
        return None
    return gfid


def get_lv_efs_by_disk(device):
    cmd = "lsblk -o NAME,TYPE --raw {} | grep lv_efs".format(device)
    try:
        output = subprocess.check_output(cmd, shell=True)
    except Exception as e:
        logger.error("cmd:{} failed, err:{}".format(cmd, e))
        raise e
    logger.debug("cmd:{}, output:{}".format(cmd, output))
    lvs = output.strip().split("\n")
    if len(lvs) == 0:
        return ""
    else:
        lv_info = lvs[0].split(" ")
        lvs_name = lv_info[0].strip()
        return os.path.join("/dev/mapper/", lvs_name)


def write_to_csv(data, file_path, mode='w'):
    dir_name = os.path.dirname(file_path)
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    if len(data) == 0:
        return
    with open(file_path, mode) as file:
        writer = csv.writer(file)
        # 将二维数组写入CSV文件的每一行
        for row in data:
            writer.writerow(row)


def byteify(input):
    if isinstance(input, dict):
        return {byteify(key): byteify(value)
                for key, value in input.iteritems()}
    elif isinstance(input, list):
        return [byteify(element) for element in input]
    elif isinstance(input, unicode):
        return input.encode('utf-8')
    else:
        return input


def batch_files_iterator(dir, batch_size=1000):
    """
    在指定目录中分批迭代文件名

    :param dir: 要搜索的目录路径
    :param batch_size: 每个批次的文件数量
    :yield: 文件名列表
    """
    cmd = ['find', dir, '-type', 'f']  # 查找所有文件
    with subprocess.Popen(cmd, stdout=subprocess.PIPE) as process:
        batch = []  # 用于累积文件名的列表
        for line in iter(process.stdout.readline, ''):
            file_name = line.strip().decode('utf-8')
            batch.append(os.path.basename(file_name))

            # 当累积到batch_size指定的数量时，yield当前批次的文件名列表
            if len(batch) == batch_size:
                yield batch
                batch = []  # 重置batch列表以用于下一个批次

        # 如果最后一个批次不足batch_size，也需要yield出去
        if batch:
            yield batch


def get_parser():
    parser = argparse.ArgumentParser(description="get file and vm info by physical address")

    # 逻辑卷
    parser.add_argument("-d", "--disk", required=True, type=str,
                        help="disk letter, example: /dev/sdb")
    # 挂载点
    parser.add_argument("-p", "--mountpoint", required=True, type=str,
                        help="mount path")
    # 指定的物理地址文件路径
    parser.add_argument("-a", "--address", required=True, type=str,
                        help="physical address file")
    # 线程数量
    parser.add_argument("-n", '--thread_num', required=False, type=int,
                        default=20,
                        help='This parameter can be used to change the number of threads.Default value: 20. Valid values are integers between 1 and 64')
    return parser


def do_get_file_by_address(gfid_file_list, device_path, block_address, gfid_type, lock):
    """
    根据坏块物理地址获取文件，并将分片和虚拟机信息写入文件。

    :param gfid_file_list: gfid文件列表
    :param device_path: 设备路径
    :param block_address: 块地址
    :param lock: 线程锁, 用于控制并发写入文件
     """
    if gfid_type == "efs":
        bad_block_gfids = get_gfid_by_address(gfid_file_list, device_path, block_address)
    elif gfid_type == "lvm":
        bad_block_gfids = get_lvm_gfid_by_address(gfid_file_list, block_address)
    else:
        logger.error("unknow gfid type {}".format(gfid_type))
        return
    shard_files, vm_ids = get_shard_file_and_vm_info(bad_block_gfids)
    # 获取锁
    with lock:
        if len(shard_files) > 0:
            write_to_csv(shard_files, OUT_SHARD_FILES_FILE_PATH, 'a')
            global TOTAL_SHARD_FILE_COUNT
            TOTAL_SHARD_FILE_COUNT = TOTAL_SHARD_FILE_COUNT + len(shard_files)
        # 多线程中可能处理同一个虚拟机的不同分片文件, 去重vm_id
        diff_vm_ids = list(set(vm_ids) - set(TOTAL_VM_IDS))
        if len(diff_vm_ids) > 0:
            TOTAL_VM_IDS.extend(diff_vm_ids)


def get_file_by_address(disk_path, mnt_path, bad_block_file_path):
    """
    根据坏块物理地址获取文件。

    :param args: 命令行参数对象，包含disk, mountpoint, address等字段。
    """
    lv_path = get_lv_efs_by_disk(disk_path)
    if lv_path == "":
        logger.error("get lv_efs failed")
        return 1

    # 转换坏道地址
    try:
        physical_addresses = get_address_by_file(bad_block_file_path)
    except Exception as e:
        logger.error("getting bad block physical address fail: {}".format(e))
        return 1
    if len(physical_addresses) == 0:
        logger.error("no bad block physical address")
        return 1

    # 创建一个锁对象, 控制文件的并发读写
    lock = threading.Lock()
    # 创建一个信号量对象, 控制并发线程数为20
    semaphore = threading.Semaphore(args.thread_num)
    # 线程列表
    threads = []
    with mounted_disk(lv_path, mnt_path):
        def thread_callback():
            semaphore.release()

        # 写入csv表头并清空文件
        write_to_csv([["gfid", "shard_file", "real_file", "vm_id", "vm_name"]], OUT_SHARD_FILES_FILE_PATH)

        # 处理2扩3的lvm分片文件
        for gfid_file_list in batch_lvm_gfid_iterator(disk_path, 1000):
            # 拆分gfid_file_list为每100个元素一个chunk
            chunks = [gfid_file_list[i:i + 100] for i in range(0, len(gfid_file_list), 100)]
            for chunk in chunks:
                # 创建一个线程根据gfid获取文件, 并将获取信号量
                semaphore.acquire()
                thread = CallbackThread(callback=thread_callback, target=do_get_file_by_address,
                                        args=(chunk, "", physical_addresses, "lvm", lock))
                thread.start()
                threads.append(thread)

        # 处理EFS分片文件
        lv_address = get_lv_address(physical_addresses, lv_path)
        # 使用迭代器分批次处理, 避免一次性将所有文件名读取到内存中造成内存消耗
        for gfid_file_list in batch_files_iterator(mnt_path, 1000):
            # 拆分gfid_file_list为每100个元素一个chunk
            chunks = [gfid_file_list[i:i + 100] for i in range(0, len(gfid_file_list), 100)]
            for chunk in chunks:
                # 创建一个线程根据坏块物理地址获取文件, 并将获取信号量
                semaphore.acquire()
                thread = CallbackThread(callback=thread_callback, target=do_get_file_by_address,
                                        args=(chunk, lv_path, lv_address, "efs", lock))
                thread.start()
                threads.append(thread)

        # 等待所有线程完成
        for thread in threads:
            thread.join()
        sys.stdout.write('共{}个分片文件写入 {}\n'.format(TOTAL_SHARD_FILE_COUNT, OUT_SHARD_FILES_FILE_PATH))
        sys.stdout.write('共影响{}个虚拟机\n'.format(len(TOTAL_VM_IDS)))
    return 0


def addr2file(disk, mountpoint, address):
    logger.info("start convert physical address")
    # 加锁确保单进程执行
    with SingleProc("/var/lock/vs_get_pa_lock.lock"):
        if not check_evn(disk):
            logger.error("check environment fail")
            return 1
        return get_file_by_address(disk, mountpoint, address)

