#!/bin/bash
# 
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
# 功能: 处理两主机环境qcow2文件脑裂
# 用法：集群中任一主机上执行
# 如果有容器版本需要先进入VS管理面容器(ssh -p 25022 localhost); vsts-recover-brian-split-vm3x.sh <vmid> 
# 适用: VS3.0版本及以上两主机环境专用
# 版本：Ver 1.0
#####################################################################


. /.PATH
. vs_stddir.sh
. vs_logger.sh

#set -e

g_mount_path=
g_vt_mount_path=
g_vm_path=
g_vol=
g_backup_path=

g_bad_host=
g_bad_file=
g_bad_gfid=
g_org_gfid=


declare -a g_attr_info
declare -a g_gfid_info

CL='\033[0;34m'
NC='\033[0m'

IFS=$'\n'

function log()
{
        echo -e "${CL}$1${NC}"
        log_info "$1"
}

function init()
{
    g_vol=$(gluster vol list)
    if [ -z "$g_vol" ]; then
        log "get volume name failed" 
        exit 1
    fi
    
    g_mount_path=$(mount | grep "$g_vol" | grep gfs | awk '{print $3}')
    if [ -z "$g_mount_path" ]; then
        log "get mount path failed" 
        exit 1
    fi

    g_vt_mount_path=$(mount | grep "$g_vol" | grep -v gfs | awk '{print $3}')
    if [ -z "$g_vt_mount_path" ]; then
        log "get vt mount path failed"
        exit 1
    fi
    
    g_backup_path=$g_mount_path/vsfire_recovery_FEC61AC2/file_backup
    
    mkdir -p $g_backup_path
}

function get_vs_vm_path()
{
        local tmp=$1
        
        echo "$tmp" | grep gfs 
        
        if [ $? -ne 0 ]; then
            get_vs_vm_path_byvmid "$tmp"
        else
            get_vs_vm_path_bypath "$tmp"
        fi
}

function get_vs_vm_path_bypath()
{
        local path
        
        path=$(dirname "$1")
        if [ ! -d "$path" ]; then
            log "dir path not exit: $g_vm_path, $path"
            exit 1
        fi
        
        g_vm_path=${path#$g_mount_path}
        
        log "nfs path: $path,  dir path: $g_vm_path"
}

function get_vs_vm_path_byvmid()
{
        local vmid="$1"
        local path
        
        path=$(/sf/vs/bin/vs_quick_get_vmpath_by_vmid.sh "$vmid")
        if [[ $path =~ $g_vt_mount_path ]]; then
            g_vm_path=${path#$g_vt_mount_path}
        else
            g_vm_path=${path#$g_mount_path}
        fi

        if [ -z "$g_vm_path" ]; then
                log "Can't get vm path, vmid: $vmid"
                exit 1
        fi

        log "vmid: $vmid path: $g_vm_path"

        ls -l "$path"
}


#Sangfor:aSV/host-a0369f03488b /sf # ls -l /sf/data/vs/gfs/d81b0486_vs_vol_rep2/vm-disk-1.qcow2   
#ls: cannot access /sf/data/vs/gfs/d81b0486_vs_vol_rep2/vm-disk-1.qcow2: Operation not permitted
function check_brain_split_vm_disk()
{
        local disk="$1"
        local gfid1
        local gfid2
        local client1
        local client2
        local changlog1
        local changlog2
        

        gfid1=$(echo "${g_attr_info[0]}" | jq -r '."trusted.gfid"')
        gfid2=$(echo "${g_attr_info[1]}" | jq -r '."trusted.gfid"')

        if [ "$gfid1" != "$gfid2" ]; then
                log "gfid is conflict, file: $g_vm_path/$disk, gfid: $gfid1  $gfid2"
                return 1
        fi

        client1=$(echo "${g_attr_info[0]}" | jq -r '.client[0]')
        client2=$(echo "${g_attr_info[1]}" | jq -r '.client[1]')

        changlog1=$(echo "${g_attr_info[0]}" | jq -r ".\"$client1\"")
        changlog2=$(echo "${g_attr_info[1]}" | jq -r ".\"$client2\"")

        if [ "$changlog1" != "0x000000000000000000000000" ] &&
                [ "$changlog2" != "0x000000000000000000000000" ]; then
                return 0
        fi

        changlog1=$(echo "${g_attr_info[1]}" | jq -r ".\"$client1\"")
        changlog2=$(echo "${g_attr_info[0]}" | jq -r ".\"$client2\"")

        if [ "$changlog1" != "0x000000000000000000000000" ] &&
                [ "$changlog2" != "0x000000000000000000000000" ]; then
                return 0
        fi
        
        log "check operation: $g_mount_path/$g_vm_path/$disk"
        
        ls -l "$g_mount_path/$g_vm_path/$disk" 2>&1 | grep 'Operation not permitted' 
        if [ $? -eq 0 ]; then
            return 0
        fi
        

        return 1

}


# 输出格式
#{
#  "host": "host-a0369f467080",
#  "file": "/sf/data/vs/local/k6lOQT-K88T-iBfC-0ZP8-O0tp-arOo-Sw6kfU/ed09ea65-2832-4ba1-bb3a-1cb7653bba97/f7149279-da1e-43a1-aa68-9d3ef6d071c4",
#  "trusted.afr.vs_vol_rep2-vclnt-2": "0x000000000000000000000000",
#  "trusted.afr.vs_vol_rep2-vclnt-3": "0x000000000000000000000010",
#  "client": [
#    "trusted.afr.vs_vol_rep2-vclnt-2",
#    "trusted.afr.vs_vol_rep2-vclnt-3"
#  ]
#}
#
function parse_attr_info()
{
        local host="$1"
        local info="$2"
        local json=
        local attr
        local key
        local value
        local i=0
        local client

        json=$(echo "{}" | jq ".host |= \"$host\"")

        for attr in $(echo "$info"); do
                if echo "$attr" | grep -q '^# file: '; then
                        value=$(echo "$attr" | sed 's/# file: //')
                        json=$(echo "$json" | jq ".file |= \"$value\"")
                        continue
                fi

                key=$(echo $attr | cut -d'=' -f1);
                value=$(echo $attr | cut -d'=' -f2);

                json=$(echo "$json" | jq ".\"$key\" |= \"$value\"")

                if echo $key | grep -q 'trusted.afr.*vclnt'; then
                        client[$i]="$key"
                        ((i++))
                fi
        done

        json=$(echo "$json" | jq ".client[0] |= \"${client[0]}\" | .client[1] |= \"${client[1]}\"")

        echo  "$json"
}

function get_disk_attr()
{
        local i=0
        local rep_info
        local host

        for host in $(gluster peer status | grep -w Hostname | awk '{print $2}' && echo "$(hostname)"); do
                rep_info=$(ssh "$host" "find /sf/data/vs/local/*/*/${g_bad_gfid} -type f \
                                        -not -perm /01000 \
                                        -exec /sf/vs/bin/getfattr --absolute-names -m . -d -e hex {} \; | sort" 2>/dev/null)

                if [ -z "$rep_info" ]; then
                        continue
                fi

                g_attr_info[$i]=$(parse_attr_info "$host" "$rep_info")
                ((++i))
        done

        if ((i != 2)); then
                log "*** replication > 2 or arbiter is on, gfid: ${g_bad_gfid} ***"
                exit 1
        fi
}

function flush_tier_cache()
{
        local tier_info
        local output
        local ssd_uuid
        local brick_id

        tier_info=$(ssh "$g_bad_host" /sf/vs/bin/vs_tier_cli.py -c dump -a inode 2>/dev/null)

        output=$(echo "$tier_info" | jq -r "recurse(.ssd[]?) | recurse(.brick[]?) | \
                                        if (.. | select(.uuid? == \"$g_bad_gfid\") | length) > 0 \
                                        then .ssd_uuid, .bi_brickid  else null end" | \
                                grep -v null)

        ssd_uuid=$(echo "$output" | head -1)
        brick_id=$(echo "$output" | tail -1)

        if [ -z "$ssd_uuid" ] || [ -z "$brick_id" ]; then
                echo "*** file $g_bad_gfid not found, output: $output ***"
                exit 1
        fi

        echo "/sf/vs/bin/vs_tier_cli.py -c kickout -a brick_id=$brick_id,gfid=$g_bad_gfid"

        ssh "$g_bad_host" /sf/vs/bin/vs_tier_cli.py \
                                -c kickout -a brick_id=$brick_id,gfid=$g_bad_gfid 2>/dev/null
}

function wait_for_completion()
{
        local attr
        printf "waiting cache flush: ${g_bad_gfid}... "

        while true; do
                flush_tier_cache
                
                attr=$(ssh "$g_bad_host" /sf/vs/bin/getfattr --absolute-names -d -m . -e hex "\"$g_bad_file\"" 2>/dev/null)

                if echo "$attr" | grep -q 'user.glusterfs.tier_status' &&
                    ! echo "$attr" | grep -q 'user.glusterfs.tier_status=0x0000000000000000'; then
                        sleep 10
                        continue
                fi

                if echo "$attr" | grep -q 'user.glusterfs.wcache' &&
                    ! echo "$attr" | grep -q 'user.glusterfs.wcache=0x0000000000000000'; then
                        sleep 10
                        continue
                fi


                sleep 10
                break
        done

        echo "done"

        ssh "$g_bad_host" /sf/vs/bin/getfattr -d -m . -e hex "\"$g_bad_file\"" 2>/dev/null
}

function do_backup_bad_vm_disk()
{
        local file_size
        local avail_size
        local file
        local seek=0
        local size=0
        local nfs_path

        file_size=$(ssh "$g_bad_host" lvs --unit m --nosuffix 2>/dev/null | \
                        awk "/$g_org_gfid/ {size += \$NF} END{printf \"%u\", size}")

        avail_size=$(df -B 1048576 "$g_backup_path" | awk '/rep2/ {print $4}')

        if ((file_size > avail_size - 16384)); then
                echo "Can not backup file: $g_org_gfid. No space left, file size: $file_size"
                exit 1
        fi

        mkdir "$g_backup_path" 2>/dev/null
        
        file="$g_backup_path"/$g_org_gfid.qcow2
        
        touch "$file"
        
        nfs_path=nfs://127.0.0.1/$g_vol/${file#$g_mount_path/}
        
        /sf/vs/sbin/nfs_fallocate -l "$file_size"M "$nfs_path"
        
        if [ ! -f "$file" ]; then
            log "create file failed: $file"
            exit 1
        fi

        log "Start backup, path: $file, nfs_path: $nfs_path"

        for shard in $(ssh "$g_bad_host" lvs --unit m --nosuffix 2>/dev/null | \
                        grep $g_org_gfid | sort -t '.' -k 2 -b -n ); do
                vg=$(echo $shard | awk '{print $2}')
                lv=$(echo $shard | awk '{print $1}')
                size=$(echo $shard | awk '{printf "%d", $NF}')

                echo ssh "$g_bad_host" dd if=/dev/$vg/$lv of="$file" bs=1M seek=$seek iflag=direct oflag=direct conv=notrunc
                ssh "$g_bad_host" dd if=/dev/$vg/$lv of="$file" bs=1M seek=$seek iflag=direct oflag=direct conv=notrunc 2>/dev/null

                if [ $? -ne 0 ]; then
                        echo "Recover file $g_org_gfid to $g_backup_path failed"
                        exit 1
                fi

                seek=$((seek + size))
        done

}

function rape_bad_vm_disk()
{
        local client1
        local client2
        local ans
        local bad_rep="$1"

        client1=$(echo "${g_attr_info[$bad_rep]}" | jq -r '.client[0]')
        client2=$(echo "${g_attr_info[$bad_rep]}" | jq -r '.client[1]')

        printf "\nSet bad to replication: $g_bad_host:$g_bad_file ? Y/n "
        # read ans

        # if [ "$ans" != "y" ] && [ "$ans" != "Y" ]; then
        #         exit
        # fi
        
        log "ssh $g_bad_host /sf/vs/bin/setfattr -n trusted.file_status -v bad $g_bad_file 2>/dev/null"
        ssh "$g_bad_host" /sf/vs/bin/setfattr -n trusted.file_status -v bad "\"$g_bad_file\"" 2>/dev/null
        
        log "ssh $g_bad_host /sf/vs/bin/setfattr -n $client1 -v 0x000000000000000000000000 $g_bad_file 2>/dev/null"
        ssh "$g_bad_host" /sf/vs/bin/setfattr -n "$client1" -v 0x000000000000000000000000 "\"$g_bad_file\"" 2>/dev/null
        
        log "ssh $g_bad_host /sf/vs/bin/setfattr -n $client2 -v 0x000000000000000000000000 $g_bad_file 2>/dev/null"
        ssh "$g_bad_host" /sf/vs/bin/setfattr -n "$client2" -v 0x000000000000000000000000 "\"$g_bad_file\"" 2>/dev/null
        
        log "setfattr finished"
}

function backup_bad_vm_disk()
{
        local bad_rep=$1
        local wcache_attr
        local tier_attr
        local ans
        local flag=0
        local flush=0

        printf "Backup replication: $g_bad_host:$g_bad_file ? Y/n "
        # read ans

        # if [ "$ans" != "y" ] && [ "$ans" != "Y" ]; then
        #         exit
        # fi

        wcache_attr=$(echo "${g_attr_info[$bad_rep]}" | jq -r '."user.glusterfs.wcache"')
        if [ -n "$wcache_attr" ] && [ "$wcache_attr" != "null" ] &&
                [ "$wcache_attr" != "0x0000000000000000" ]; then
               gluster v set $g_vol performance.wcc-wbforce on >/dev/null
               flag=1
               flush=1
        fi

        tier_attr=$(echo "${g_attr_info[$bad_rep]}" | jq -r '."user.glusterfs.tier_status"')
        if [ -n "$tier_attr" ]  && [ "$tier_attr" != "null" ] &&
                [ "$tier_attr" != "0x0000000000000000" ]; then
                flush=1
        fi

        if ((flush)); then
                wait_for_completion
        fi

        if ((flag)); then
               gluster v reset $g_vol performance.wcc-wbforce >/dev/null
        fi

        do_backup_bad_vm_disk
}

function select_bad_replication()
{
        local tm1
        local tm2
        local file1
        local file2
        local host1
        local host2
        local bad

        #g_bad_gfid=$(echo "${g_attr_info[0]}" | jq -r '."trusted.gfid"')
        #g_bad_gfid=${g_bad_gfid:2}

        file1=$(echo "${g_attr_info[0]}" | jq -r '.file')
        host1=$(echo "${g_attr_info[0]}" | jq -r '.host')
        bad=$(echo "${g_attr_info[0]}" | grep '.trusted.file_status')

        # 副本有bad
        if [ -n "$bad" ] && [ "$bad" != "null" ] ; then
                g_bad_file="$file1"
                g_bad_host="$host1"
                return 0
        fi

        tm1=$(ssh $host1 "/sf/bin/busybox/stat -c '%Y' \"$file1\" " 2>/dev/null)

        file2=$(echo "${g_attr_info[1]}" | jq -r '.file')
        host2=$(echo "${g_attr_info[1]}" | jq -r '.host')
        bad=$(echo "${g_attr_info[1]}" | grep '.trusted.file_status')

        if [ -n "$bad" ] && [ "$bad" != "null" ] ; then
                g_bad_file="$file2"
                g_bad_host="$host2"
                return 1
        fi

        tm2=$(ssh $host2 "/sf/bin/busybox/stat -c '%Y' \"$file2\"" 2>/dev/null)

        if ((tm1 > tm2)); then
                g_bad_file="$file2"
                g_bad_host="$host2"
                return 1
        else
                g_bad_file="$file1"
                g_bad_host="$host1"
                return 0
        fi
}

function get_all_vm_disk()
{
        local host

        for host in $(gluster peer status | grep -w Hostname | awk '{print $2}' && echo "$(hostname)"); do
                ssh "$host" "find /sf/data/vs/local/*/*/\"$g_vm_path\"/vm-disk-*.qcow2* -type f \
                                -not -perm /01000 -exec basename {} \;" 2>/dev/null
        done | sort | uniq
}

function get_disk_gfid()
{
        local disk="$1"
        local i=0
        local rep_info
        local host
        local bad_gfid

        for host in $(gluster peer status | grep -w Hostname | awk '{print $2}' && echo "$(hostname)"); do
                rep_info=$(ssh "$host" "find /sf/data/vs/local/*/*/\"$g_vm_path\"/$disk -type f \
                                        -not -perm /01000 \
                                        -exec /sf/vs/bin/getfattr --absolute-names -e hex -n trusted.gfid {} \; | grep trusted.gfid | sort -u" 2>/dev/null)

                if [ -z "$rep_info" ]; then
                        continue
                fi

                g_gfid_info[$i]="$rep_info"
                ((++i))
        done

        if ((i != 2)); then
                log "*** replication > 2 or arbiter is on, disk: $g_vm_path/$disk ***"
                exit 1
        fi
        
        if [ "${g_gfid_info[0]}" != "${g_gfid_info[1]}" ]; then
                log "gfid is not equal,  ${g_gfid_info[0]}, ${g_gfid_info[1]}***"
                exit 1
        fi
        
        bad_gfid=$(echo "${g_gfid_info[0]}" | awk -F0x '{print $2}')
        
        g_org_gfid=${bad_gfid}
        g_bad_gfid=${bad_gfid:0:8}-${bad_gfid:8:4}-${bad_gfid:12:4}-${bad_gfid:16:4}-${bad_gfid:20}
        
        log "org gfid: $g_org_gfid gfid: $g_bad_gfid"
}

function process_bad_vm_disk()
{
        local bad_rep=0
        local disk

        for disk in $(get_all_vm_disk); do
                echo ""
                ls -l "$g_mount_path/$g_vm_path/$disk"
                
                get_disk_gfid "$disk"
                
                get_disk_attr "$disk"

                if check_brain_split_vm_disk "$disk"; then
                        log "Disk $disk is bad"

                        select_bad_replication
                        bad_rep=$?

                        log "Will set bad to replication $bad_rep, info:"
                        log "${g_attr_info[0]}"
                        log "${g_attr_info[1]}"

                        backup_bad_vm_disk $bad_rep
                        rape_bad_vm_disk $bad_rep
                        
                        log "$g_mount_path/$g_vm_path/$disk is ok now"
                else
                        log "Disk $disk is good"
                fi
        done
        
        ls -l "$g_mount_path/$g_vm_path"
}


function main()
{
        if [ $# -ne 1 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
                echo "Usage: $0 <vmid>"
                exit 0
        fi

        init
        
        get_vs_vm_path "$1"

        process_bad_vm_disk
}

main "$@"
exit
