#!/bin/bash

#
#       缓存盘进入维护模式，刷相应的分层和写缓存        
#       主要工作
#           等待缓存及分层全部刷入到磁盘
#       author: wangjing
#       date:   2018-11-13
#

#-----------------------全局变量---------------------------

. /.PATH
. vs_common.sh
. vs_logger.sh

CFG_CACHE="/sf/cfg/vs/cache/wcache.json"
CFG_TIER="/sf/cfg/vs/cache/tier.json"
CFG_DISK_PATH="/sf/cfg/vs/disk"
CFG_PROGRESS_FILE=
FLUSH_ABORT_FLAG=
# 维护模式gluster需要连接到主控操作
alias gluster="gluster_orig"

#------------------------函数------------------------------

function if_cache_disk()
{
    local disk_id=$1
    local disk_file="/sf/cfg/vs/disk/${disk_id}.json"
    
    if [ ! -f $disk_file ];then
        log_error "the disk file {$disk_file} is not exist"
        return 2
    fi
    
    local ret=""
    ret=$(grep 'STORAGE_CACHE' $disk_file)
    if [ x"$ret" == x"" ];then
        log_info "the disk {$disk_id} is not cache disk"
        return 1
    fi
    log_info "the disk ${disk_id} is cache disk"
    return 0
}

# 检测写缓存级分层是否正常
function check_wcache_tier()
{
    local disk_id=$1
    local disk_file="/sf/cfg/vs/disk/${disk_id}.json"
        
    local part_uuid=""
    part_uuid=$(grep part_uuid $disk_file  | awk -F\" '{print $4}' | head -1 | cut -c 1-38)
    if [ x"$part_uuid" == x"" ]; then
        log_error "grep part_uuid $disk_file  | awk -F\" '{print $4}' | head -1 | cut -c 1-38 failed"
        return 1
    fi
    log_info "the part_uuid is $part_uuid"
    
    grep "$part_uuid" "$CFG_CACHE" -q
    if [ $? -ne 0 ]; then
        log_error "grep '$part_uuid' '$CFG_CACHE' -q failed"
        return 1
    fi
    
    grep "$part_uuid" "$CFG_TIER" -q
    if [ $? -ne 0 ];then
        log_error "grep '$part_uuid' '$CFG_TIER' -q failed"
        return 1
    fi
    
    log_info "the wcache or tier file is normal"
    return 0
}

function update_wcache_tier_clean_progress() {
    if [ $# -ne 2 ]; then
        log_error "invalid param $@"
        return 1
    fi
    local op=$1
    local progress=$2
    local base=80
    # enter_maintain 进行到当前步骤时，进度为80%，此处配额15%
    local quota=0.15
    local result=0
    local vtmaster=

    local file=$CFG_PROGRESS_FILE
    local lastprogress=$base

    vtmaster=$(vs_check_if_vt_master)

    log_info "$vtmaster: $op clean progress reached $progress"

    [[ $op =~ wcache ]] && result=0
    [[ $op =~ tier ]] && result=0.5
    result=$(echo "scale = 2;
                $base + ($result + $progress / 2) * 100 * $quota" | bc)
    [[ $result =~ ^\. ]] && result=0$result
    result=${result%.*}

    if [[ -n "$file" ]]; then
        lastprogress=$(${VSD_BIN}/vs_ssh $vtmaster jq .progress $file)
        if [ $result -ge 100 ] || [ $result -lt $lastprogress ]; then
            log_error "invalid progress $result, last $lastprogress, exit"
            return 1
        fi

        ${VSD_BIN}/vs_ssh $vtmaster \
            "${VSD_BIN}/vs_json_rw.py -f $file -w -k progress -v $result -t int"
        log_info "update enter-maintain progress to $result"

    fi
}

function wait_clean_wcache(){
    local path=$1
    local wcacheInfo=""
    local headArr=""
    local tailArr=""
    log_info "################# waitDev ${path} ##################"
    
    #获取fifo tail 数据并比较，直到相等
    local cycFlag=True;
    # 进度小数制
    local progress=0
    local turn_num=0
    local turn_max=0
    local turn_now=0
    local diff=
    local last_progress=
    while [ x"$cycFlag" == x"True" ]
    do
        super_zkcli.py exists $FLUSH_ABORT_FLAG
        if [ $? -eq 0 ]; then
            log_info "abort due to flag"
            return 1
        fi
        cycFlag=False;
        progress=$(echo "scale = 2; $turn_num / $turn_max" | bc)
        if [ x"$progress" != x"$last_progress" ]; then
            update_wcache_tier_clean_progress wcache $progress
            last_progress=$progress
        fi

        wcacheInfo=$(${VSD_BIN}/vs_make_wcache -C "$path" -S)
        backdevNum=$(printf "%s" "${wcacheInfo}" | awk -F: '{if($1~"backdev num")print $2}')
        if [ "$backdevNum" == "0" ];then
            log_info "no brick in wcache"
            return 0
        fi
        headArr=($(printf "%s" "${wcacheInfo}" | awk -F: '{if($1~"fifo head")print $2}'))
        if [ "${headArr}" == "" ];then
            log_error "failed to get headArr"
            return 1
        fi
        log_info "headArr : ${headArr[@]}"
        tailArr=($(printf "%s" "${wcacheInfo}" | awk -F: '{if($1~"fifo tail")print $2}'))
        if [ "${tailArr}" == "" ];then
            log_error "failed to get tailArr"
            return 1
        fi

        for idx in ${!tailArr[@]}; do
            [ $turn_max -gt 0 ] && break
            diff=$(expr ${tailArr[idx]} - ${headArr[idx]})
            diff=${diff#-}
            turn_now=$(expr $diff + $turn_now)
        done
        [ $turn_max -le 0 ] && turn_max=$turn_now
        [ $turn_max -le 0 ] && turn_max=1

        turn_now=0
        log_info "tailArr : ${tailArr[@]}"
        for idx in ${!tailArr[@]}
        do
            log_info "idx:${idx} tail:${tailArr[idx]} head:${headArr[idx]}"
            diff=$(expr ${tailArr[idx]} - ${headArr[idx]})
            diff=${diff#-}
            turn_now=$(expr $diff + $turn_now)
            if [ "${tailArr[idx]}" != "${headArr[idx]}" ];then
                cycFlag=True;
            fi
        done

        progress=$(echo "scale = 2; 1 - ($turn_now / $turn_max)" | bc)
        update_wcache_tier_clean_progress wcache $progress

        log_info "cycFlag:${cycFlag}"
        sleep 5
        turn_num=$((turn_num + 1))
    done

    if [ x"$cycFlag" == x"True" ];then
        log_error "wait clean wcache failed"
        progress=1
        update_wcache_tier_clean_progress wcache $progress
        return 1
    fi
    log_info "wait clean wcache failed"
    return 0
}

function wait_clean_tier()
{
    local uuid=$1
    
    ${VSD_BIN}/vs_tier_cli.py -c kickout -a ssd_uuid=$uuid > /dev/null
    ret=$?
    log_info "${VSD_BIN}/vs_tier_cli.py -c kickout -a ssd_uuid=$uuid. ret is $ret"
    
    local tmp_file=$(vs_mktemp)
    local turn_num=0
    local turn_max=1
    # 进度小数制
    local progress=0
    local last_progress=
    while [ 1 ]
    do
        super_zkcli.py exists $FLUSH_ABORT_FLAG
        if [ $? -eq 0 ]; then
            log_info "abort due to flag"
            return 1
        fi
        ${VSD_BIN}/vs_tier_cli.py -c dump >$tmp_file
        if [ $? -ne 0 ];then
            log_error " ${VSD_BIN}/vs_tier_cli.py -c dump >$tmp_file failed"
            vs_del_file $tmp_file
            return 1
        fi

        local ssd_uuids=""
        ssd_uuids=$(grep ssd_uuid $tmp_file | awk -F\" '{print $4}')
        if [ $? -ne 0 ];then
            log_error "grep ssd_uuid $tmp_file | awk -F\" '{print $4}' failed"
            vs_del_file $tmp_file
            return 1
        fi
        log_info "ssd_uuids is $ssd_uuids"

        local cnt=0
        local find_ret=0
        for ssd_uuid in $ssd_uuids
        do
            if [ x"$ssd_uuid" == x"$uuid" ];then
                log_info "the cnt is $cnt, uuid {$ssd_uuid} is this disk"
                find_ret=1
                break
            fi
            cnt=$((cnt + 1))
            log_info "the ssd_uuid {$ssd_uuid} is not the disk uuid {$uuid}"
        done

        if [ $find_ret -ne 1 ];then
            log_error "get the disk tier info filed"
            vs_del_file $tmp_file
            return 1
        fi

        local dirty_block_cnt=""
        local clean_block_cnt=""

        dirty_block_cnt=$(${VSD_BIN}/vs_json_rw.py -f $tmp_file -r -k ssd.$cnt.dirty_block_cnt)
        clean_block_cnt=$(${VSD_BIN}/vs_json_rw.py -f $tmp_file -r -k ssd.$cnt.clean_block_cnt)
        local turn_now=$((dirty_block_cnt + clean_block_cnt))
        [ $turn_max -lt $turn_now ] && turn_max=$turn_now
        turn_num=$((turn_max - turn_now))

        if [ x"$dirty_block_cnt" == x"0" ] && [ x"$clean_block_cnt" == x"0" ]; then
            progress=1
            update_wcache_tier_clean_progress tier $progress
            log_info "wait clean tier finished"
            vs_del_file $tmp_file
            return 0
        else
            progress=$(echo "scale = 2; $turn_num / $turn_max" | bc)
            if [ x"$progress" != x"last_progress" ]; then
                update_wcache_tier_clean_progress tier $progress
                last_progress=$progress
            fi
        fi

        ${VSD_BIN}/vs_tier_cli.py -c kickout -a ssd_uuid=$uuid > /dev/null
        ret=$?
        log_info "${VSD_BIN}/vs_tier_cli.py -c kickout -a ssd_uuid=$uuid. ret is $ret"
        sleep 10
        turn_num=$((turn_num + 1))
    done
    vs_del_file $tmp_file
    log_error "wait clean tier failed"
    return 1
}


function wait_wcache_tier()
{
    local disk_id=$1
    local disk_file="/sf/cfg/vs/disk/${disk_id}.json"
        
    local part_uuid=""
    part_uuid=$(grep -w part_uuid $disk_file  | awk -F\" '{print $4}' | head -1 | cut -c 1-38)
    if [ x"$part_uuid" == x"" ]; then
        log_error "grep part_uuid $disk_file  | awk -F\" '{print $4}' | head -1 | cut -c 1-38 failed"
        return 1
    fi
    log_info "part_uuid is $part_uuid"
    
    local wcache_path=""
    wcache_path=$(grep -w path /sf/cfg/vs/cache/wcache.json | grep $part_uuid | awk -F\" '{print $4}')
    if [ "$wcache_path" == "" ];then
        log_error "grep path /sf/cfg/vs/cache/wcache.json | grep $part_uuid | awk -F\" '{print $4}'"
        return 1
    fi
    log_info "wcache_path is $wcache_path"
    
    local tier_path=""
    tier_path=$(grep -w ssd_uuid /sf/cfg/vs/cache/tier.json | grep $part_uuid | awk -F\" '{print $4}')
    if [ "$tier_path" == "" ];then
        log_error "grep path /sf/cfg/vs/cache/tier.json | grep $part_uuid | awk -F\" '{print $4}'"
        return 1
    fi
    log_info "tier_path is $tier_path"
    
    
    wait_clean_wcache "$wcache_path"
    if [ $? -ne 0 ];then
        log_error "wait wcache failed"
        return 1
    fi
    
    wait_clean_tier "$tier_path"
    if [ $? -ne 0 ];then
        log_error "wait tier failed"
        return 1
    fi
    
    log_info "wait wcache tier success"
    return 0
}

function get_all_files_from_uuid()
{   
    local uuid=$1
    local mount_path="/mnt/efs/$uuid"
    local all_files=""
    
    # 首先判断是两主机还是三主机两主机直接获取
    local arbiter=""
    arbiter=$(gluster vol i | grep -w arbiter)
    if [ x"$arbiter" == x"" ];then
        log_info "the volume is not arbiter volume"
        local brickpath=""
        brickpath=$(grep $uuid /sf/cfg/vs/cache/wcache.json | awk -F\" '{print $4}')
        if [ x"$brickpath" == x"" ];then
            log_error "grep $uuid /sf/cfg/vs/cache/wcache.json | awk -F\" '{print $4}' failed"
            return 1
        fi
        all_files=$(find $brickpath -name "*.qcow2")
        echo all_files
        return 0
    fi
    log_info "the volume is arbiter volume"
    
    
    mkdir -p $mount_path
    if [ $? -ne 0 ];then
        log_error "mkdir -p $mount_path failed"
        return 1
    fi
    
    umount $mount_path
    mount | grep efs_standalone | grep $uuid
    if [ $? -eq 0 ];then
        log_error "mount | grep efs_standalone | grep $uuid failed"
        return 1
    fi
    log_info "will efs_standalone -i ulvm -p '/dev/$uuid/lv_efs' -o readonly $mount_path"
    
    efs_standalone -i ulvm -p "/dev/$uuid/lv_efs" -o readonly $mount_path
    sleep 3
    
    mount | grep efs_standalone | grep $uuid
    if [ $? -ne 0 ];then
        log_error "efs_standalone -i ulvm -p '/dev/$uuid/lv_efs' -o readonly $mount_path failed"
        return 1
    fi
    
    all_files=$(ls $mount_path)
    echo $all_files
    return 0
}

function clean_tmp_mount_by_uuid()
{
    local uuid=$1
    if [ x"$uuid" == x"" ];then
        log_error "uuid is null"
        return 1
    fi
    
    # 首先判断是两主机还是三主机两主机直接获取
    local arbiter=""
    arbiter=$(gluster vol i | grep -w arbiter)
    if [ x"$arbiter" == x"" ];then
        log_info "the volume is not arbiter volume"
        return 0
    fi
    
    log_info "the volume is arbiter volume"
    local mount_path="/mnt/efs/$uuid"
    
    umount $mount_path
    mount | grep efs_standalone | grep $uuid
    if [ $? -eq 0 ];then
        log_error "mount | grep efs_standalone | grep $uuid failed"
        return 1
    fi
    
    rm -rf $mount_path
}

function check_wcacheid_tierid()
{
    local disk_id=$1
    local disk_file="/sf/cfg/vs/disk/${disk_id}.json"
    
    local disk_group_id=""
    local disk_files=""
        
    disk_group_id=$(grep disk_group_id $disk_file | sed 's/^[ \t]*//g' | sed 's/[ \t]*$//g')
    if [ x"$disk_group_id" == x"" ];then
        log_error "grep disk_group_id $disk_file failed"
        return 1
    fi
        
    disk_files=$(grep -l "$disk_group_id" ${CFG_DISK_PATH}/* | grep -v $disk_id)
    if [ x"$disk_files" == x"" ];then
        log_error "grep -l '$disk_group_id' ${CFG_DISK_PATH}/* | grep -v $disk_id failed"
        return 1
    fi
    
    local part_uuids=""
    part_uuids=$(grep -w part_uuid $disk_files | grep -v meta | awk -F\" '{print $4}')
    if [ x"$part_uuids" == x"" ];then
        log_error "grep -w part_uuid $disk_files | grep -v meta | awk -F\" '{print $4}' failed"
        return 1
    fi
    log_info "part_uuids is $part_uuids"
    
    local ret=0
    #检查所有qcow2文件是否没有写缓存，分层识
    for uuid in $part_uuids
    do
        all_files=$(get_all_files_from_uuid $uuid)
        if [ $? -ne 0 ];then
            log_error "get_all_files_from_uuid $uuid failed"
            clean_tmp_mount_by_uuid $uuid
            return 1
        fi
        
        for file in $all_files
        do
            local tier_status=""
            local wcache=""
            
            tier_status=$(getfattr -e hex -n user.glusterfs.tier_status "$file" 2> /dev/null | grep tier_status | awk -F= '{print $2}')
            if [ "$tier_status" != "" ] && [ "$tier_status" != "0x0000000000000000" ]; then
                log_error "$file check tier fail"
                ret=1
            fi
            
            wcache=$(getfattr -e hex -n user.glusterfs.wcache "$file" 2> /dev/null | grep wcache | awk -F= '{print $2}')
            if [ "$wcache" != "" ] && [ "$wcache" != "0x0000000000000000" ]; then
                log_error "$file check wcache fail"
                ret=1
                break
            fi  
            log_info "$file ok"
        done
        
        if [ $ret -ne 0 ];then
            log_error "check brick:$brick tier_status and wcache failed"
            clean_tmp_mount_by_uuid $uuid
            return 1
        fi
        log_info "check brick:$brick tier_status and wcache success"
    done
    
    clean_tmp_mount_by_uuid $uuid
    if [ $? -ne 0 ];then
        log_error "clean_tmp_mount_by_uuid $uuid failed"
        return 1
    fi
    log_info "check_wcacheid_tierid success"
    return 0
}
function set_wcc_delay_off()
{
    local ret=0
    local volume_name=""

    gluster volume info | grep "performance.wcc-delay-time: 0"
    ret=$?
    if [ $ret -eq 0 ];then
        log_info "volume $volume_name has performance.wcc-delay-time: 0"
        return 0
    fi

    volume_name=$(gluster volume list)
    if [ x"$volume_name" == x"" ];then
        log_error "get volume name exec ${GFS_SBIN}gluster volume list failed"
        return 1
    fi
    log_info "volume_name is $volume_name"
    
    gluster volume set "$volume_name" performance.wcc-delay-time 0
    ret=$?
    if [ $ret -ne 0 ];then
        gluster volume info | grep "performance.wcc-delay-time: 0"
        ret=$?
        if [ $ret -eq 0 ];then
            log_info "volume $volume_name has performance.wcc-delay-time: 0"
            return 0
        fi
        log_error "failed to close performance.wcc-delay-time, ret is: $ret"
        sleep 5
        # 在执行一次
        gluster volume set "$vol" performance.wcc-delay-time 0 || return 1
    fi
    
    log_info "close performance.wcc-delay-time success"
    return 0
}


function check_wcache_if_brick()
{
    local disk_id=$1
    local disk_file="/sf/cfg/vs/disk/${disk_id}.json"
    local wcacheInfo=""
    local backdevNum=0
        
    local part_uuid=""
    part_uuid=$(grep -w part_uuid $disk_file  | awk -F\" '{print $4}' | head -1 | cut -c 1-38)
    if [ x"$part_uuid" == x"" ]; then
        log_error "grep part_uuid $disk_file  | awk -F\" '{print $4}' | head -1 | cut -c 1-38 failed"
        return 2
    fi
    log_info "part_uuid is $part_uuid"
    
    local wcache_path=""
    wcache_path=$(grep -w path /sf/cfg/vs/cache/wcache.json | grep $part_uuid | awk -F\" '{print $4}')
    if [ "$wcache_path" == "" ];then
        log_error "grep path /sf/cfg/vs/cache/wcache.json | grep $part_uuid | awk -F\" '{print $4}'"
        return 2
    fi
    log_info "wcache_path is $wcache_path"
    
    wcacheInfo=$(${VSD_BIN}/vs_make_wcache -C "$wcache_path" -S)
    if [ x"$wcacheInfo" == x"" ];then
        log_error "${VSD_BIN}/vs_make_wcache -C '$wcache_path' -S failed"
        return 2
    fi

    backdevNum=$(printf "%s" "${wcacheInfo}" | awk -F: '{if($1~"backdev num")print $2}')
    if [ $? -ne 0 ] || [ x"$backdevNum" == x"" ];then
        log_error "printf '%s' '${wcacheInfo}' | awk -F: '{if($1~'backdev num')print $2}' failed"
        return 2
    fi
    log_info "backdevNum is $backdevNum"
   
    if [ "$backdevNum" == "0" ];then
        log_info "no brick in wcache"
        return 1
    fi
    log_info "there is brick in wcache"
    return 0
}

function main()
{
    local disk_id=$1
    local ret=0
    local progress_file=
    FLUSH_ABORT_FLAG="/tmp/$disk_id.cancel"
    [ $# -eq 2 ] && CFG_PROGRESS_FILE=$2
    
    # 1. 判断该磁盘是否为缓存盘
    if_cache_disk $disk_id
    ret=$?
    if [ $ret -ne 0 ];then
        log_error "the disk maybe has error or the disk is not cache disk"
        return 1
    fi
    log_info "the disk is cache disk"
    
    # 判断是够绑定了数据盘
    check_wcache_if_brick $disk_id
    ret=$?
    if [ $ret -eq 1 ];then
        log_error "the cache disk $disk_id is not brick"
        return 0
    fi
    
    if [ $ret -eq 2 ];then
        log_error "check_wcache_if_brick $disk_id failed"
        return 1
    fi
    log_info "the wcache has brick"
    
    # 2. 判断相对应的写缓存配置是否正常
    check_wcache_tier $disk_id
    ret=$?
    if [ $ret -ne 0 ];then
        log_error "check_wcache failed"
        return 1
    fi
    log_info "the wcache and tier is normal"
    
    # 3. 关闭写缓存延时回刷
    set_wcc_delay_off
    ret=$?
    if [ $ret -ne 0 ];then
        log_error "set wcc delay off failed"
        return 1
    fi
    log_info "set wcc delay off successfully"
    
    # 3. 等待写缓存及分层刷干净
    wait_wcache_tier $disk_id
    ret=$?
    if [ $ret -ne 0 ];then
        log_error "wait_wcache_tier failed"
        return 1
    fi
    log_info "the wcache and tier clean successfully"
    
    # 4. 检查文件是扩展属性，是否刷干净
    check_wcacheid_tierid $disk_id
    ret=$?
    if [ $ret -ne 0 ];then
        log_error "check_wcacheid_tierid failed"
        return 1
    fi
    log_info "check wcacheid and tierid clean successfully"
    return 0
}

main "$@"
