#!/bin/bash

. /.PATH
. vs_stddir.sh
. vs_logger.sh
. vs_common.sh

#set -e
g_vm_path=
g_vol=
g_vol_dir=
g_bad_host=
g_bad_file=
g_bad_gfid=
g_backup_path=
g_need_commit="no"

declare -a g_attr_info

CL='\033[0;34m'
NC='\033[0m'

IFS=$'\n'

function log()
{
        echo -e "${CL}$1${NC}"
        log_info "$1"
}

function get_vs_vm_path()
{
        local vmid="$1"
        local path

        path=$(/sf/vs/bin/vs_quick_get_vmpath_by_vmid.sh "$vmid")

        g_vm_path=${path#$VSD_GFS_MNT/$g_vol_dir}


        if [ -z "$g_vm_path" ]; then
                log "Can't get vm path, vmid: $vmid"
                exit 1
        fi

        log "vmid: $vmid path: $g_vm_path"

        ls -l "$path"

}

function check_brain_split_vm_disk()
{
        local disk="$1"
        local gfid1
        local gfid2
        local client1
        local client2
        local changlog1
        local changlog2

        gfid1=$(echo "${g_attr_info[0]}" | jq -r '."trusted.gfid"')
        gfid2=$(echo "${g_attr_info[1]}" | jq -r '."trusted.gfid"')

        if [ "$gfid1" != "$gfid2" ]; then
                log "gfid is conflict, file: $g_vm_path/$disk, gfid: $gfid1  $gfid2"
                return 1
        fi

        client1=$(echo "${g_attr_info[0]}" | jq -r '.client[0]')
        client2=$(echo "${g_attr_info[1]}" | jq -r '.client[1]')

        changlog1_1=$(echo "${g_attr_info[0]}" | jq -r ".\"$client1\"")
        changlog1_2=$(echo "${g_attr_info[0]}" | jq -r ".\"$client2\"")
        changlog2_1=$(echo "${g_attr_info[1]}" | jq -r ".\"$client1\"")
        changlog2_2=$(echo "${g_attr_info[1]}" | jq -r ".\"$client2\"")

        badstatus1=$(echo "${g_attr_info[0]}" | jq -r '."trusted.file_status"')
        badstatus2=$(echo "${g_attr_info[1]}" | jq -r '."trusted.file_status"')

        if [ "$changlog1_1" == "0x000000000000000000000000" -a "$changlog1_2" != "0x000000000000000000000000" ] &&
                [ "$changlog2_2" == "0x000000000000000000000000" -a "$changlog2_1" != "0x000000000000000000000000" ]; then
                return 0
        fi

        if [ "$badstatus1" == "0x626164" -a "$changlog1_2" != "0x000000000000000000000000" ] ||
                [ "$badstatus2" == "0x626164" -a "$changlog2_1" != "0x000000000000000000000000" ]; then
                return 0
        fi

        return 1
}

function parse_attr_info()
{
        local host="$1"
        local info="$2"
        local json=
        local attr
        local key
        local value
        local i=0
        local client

        json=$(echo "{}" | jq ".host |= \"$host\"")

        for attr in $(echo "$info"); do
                if echo "$attr" | grep -q '^# file: '; then
                        value=$(echo "$attr" | sed 's/# file: //')
                        json=$(echo "$json" | jq ".file |= \"$value\"")
                        continue
                fi

                key=$(echo $attr | cut -d'=' -f1);
                value=$(echo $attr | cut -d'=' -f2);

                json=$(echo "$json" | jq ".\"$key\" |= \"$value\"")

                if echo $key | grep -q 'afr.*client'; then
                        client[$i]="$key"
                        ((i++))
                fi
        done

        json=$(echo "$json" | jq ".client[0] |= \"${client[0]}\" | .client[1] |= \"${client[1]}\"")

        echo  "$json"
}

function get_disk_attr()
{
        local disk="$1"
        local i=0
        local rep_info
        local host
        local brick_info

        brick_info=$(gluster v i | grep -E "Brick[0-9]+: host-[0-9a-z]+:")
        if [ -z "$brick_info" ]; then
                log "get brick info failed, try gluster v i to check "
                exit 1
        fi

        for host in $(/sf/sbin/get_nodes_status.pl | cut -d ':' -f 1); do
                rep_info=$(ssh "$host" "find /sf/data/vs/local/*/*/\"$g_vm_path\"/$disk -type f \
                                        -not -perm /01000 \
                                        -exec /sf/vs/bin/getfattr --absolute-names -m . -d -e hex {} \; | \
                                        sort" 2>/dev/null)

                if [ -z "$rep_info" ]; then
                        continue
                fi

                rep_info=$(parse_attr_info "$host" "$rep_info")
                file=$(echo "$rep_info" | jq -r ".file")
                brickno=$(echo "$brick_info" | grep ${file:0:93} | cut -d: -f1 | grep -Eo "[0-9]+")
                if [ -z "$brickno" ]; then
                        log "get brickno failed:$brick_info,$rep_info"
                        exit 1
                fi

                if [ $(($brickno % 2)) != 0 ]; then
                        g_attr_info[0]=$rep_info
                else
                        g_attr_info[1]=$rep_info
                fi
                ((++i))
        done

        if ((i != 2)); then
                log "*** replication > 2 or arbiter is on, disk: $g_vm_path/$disk ***"
                exit 1
        fi
}

function flush_tier_cache()
{
        local tier_info
        local gfid_dash
        local output
        local ssd_uuid
        local bi_uuid

        tier_info=$(ssh "$g_bad_host" /sf/vs/bin/vs_tier_cli.py -c dump -a inode 2>/dev/null)

        gfid_dash=${g_bad_gfid:0:8}-${g_bad_gfid:8:4}-${g_bad_gfid:12:4}-${g_bad_gfid:16:4}-${g_bad_gfid:20}

        output=$(echo "$tier_info" | jq -r "recurse(.ssd[]?) | recurse(.brick[]?) | \
                                        if (.. | select(.uuid? == \"$gfid_dash\") | length) > 0 \
                                        then .ssd_uuid, .bi_brickid  else null end" | \
                                grep -v null)

        ssd_uuid=$(echo "$output" | head -1)
        bi_uuid=$(echo "$output" | tail -1)

        if [ -z "$ssd_uuid" ] || [ -z "$bi_uuid" ]; then
                echo "*** file $gfid_dash not found, output: $output ***"
                exit 1
        fi

        echo "/sf/vs/bin/vs_tier_cli.py -c kickout -a brick_id=$bi_uuid,gfid=$gfid_dash"

        ssh "$g_bad_host" /sf/vs/bin/vs_tier_cli.py \
                                -c kickout -a brick_id=$bi_uuid,gfid=$gfid_dash 2>/dev/null
}

function wait_for_completion()
{
        local attr
        printf "waiting cache flush: ${g_bad_gfid}... "

        while true; do
                sleep 10
                flush_tier_cache
                attr=$(ssh "$g_bad_host" /sf/vs/bin/getfattr --absolute-names -d -m . -e hex "\"$g_bad_file\"" 2>/dev/null)

                if echo "$attr" | grep -q 'user.glusterfs.tier_status' &&
                    ! echo "$attr" | grep -q 'user.glusterfs.tier_status=0x0000000000000000'; then
                        continue
                fi

                if echo "$attr" | grep -q 'user.glusterfs.wcache' &&
                    ! echo "$attr" | grep -q 'user.glusterfs.wcache=0x0000000000000000'; then
                        continue
                fi


                sleep 10
                break
        done

        echo "done"

        ssh "$g_bad_host" /sf/vs/bin/getfattr -d -m . -e hex "\"$g_bad_file\"" 2>/dev/null
}

function do_backup_bad_vm_disk()
{
        local file_size
        local avail_size
        local file
        local seek=0
        local size=0

        file_size=$(ssh "$g_bad_host" lvs --unit m --nosuffix 2>/dev/null | \
                        awk "/$g_bad_gfid/ {size += \$NF} END{printf \"%u\", size}")

        avail_size=$(df -B 1048576 "$g_backup_path" | awk '/rep2/ {print $4}')

        if ((file_size > avail_size - 16384)); then
                echo "Can not backup file: $g_bad_gfid. No space left, file size: $file_size"
                exit 1
        fi

        file="$g_backup_path"/$g_bad_gfid.qcow2
        truncate -s "$file_size"M "$file"

        log "Start backup, path: $file"

        for shard in $(ssh "$g_bad_host" lvs --unit m --nosuffix 2>/dev/null | \
                        grep $g_bad_gfid | sort -t '.' -k 2 -b -n ); do
                vg=$(echo $shard | awk '{print $2}')
                lv=$(echo $shard | awk '{print $1}')
                size=$(echo $shard | awk '{printf "%d", $NF}')

                echo ssh "$g_bad_host" dd if=/dev/$vg/$lv of="$file" bs=1M seek=$seek iflag=direct oflag=direct conv=notrunc
                ssh "$g_bad_host" dd if=/dev/$vg/$lv of="$file" bs=1M seek=$seek iflag=direct oflag=direct conv=notrunc 2>/dev/null

                if [ $? -ne 0 ]; then
                        echo "Recover file $g_bad_gfid to $g_backup_path failed"
                        exit 1
                fi

                seek=$((seek + size))
        done

}

function rape_bad_vm_disk()
{
        local client1
        local client2
        local ans
        local bad_rep="$1"

        client1=$(echo "${g_attr_info[$bad_rep]}" | jq -r '.client[0]')
        client2=$(echo "${g_attr_info[$bad_rep]}" | jq -r '.client[1]')

        log "ssh $g_bad_host /sf/vs/bin/setfattr -n trusted.file_status -v bad $g_bad_file 2>/dev/null"
        ssh "$g_bad_host" /sf/vs/bin/setfattr -n trusted.file_status -v bad "\"$g_bad_file\"" 2>/dev/null
        
        log "ssh $g_bad_host /sf/vs/bin/setfattr -n $client1 -v 0x000000000000000000000000 $g_bad_file 2>/dev/null"
        ssh "$g_bad_host" /sf/vs/bin/setfattr -n "$client1" -v 0x000000000000000000000000 "\"$g_bad_file\"" 2>/dev/null
        
        log "ssh $g_bad_host /sf/vs/bin/setfattr -n $client2 -v 0x000000000000000000000000 $g_bad_file 2>/dev/null"
        ssh "$g_bad_host" /sf/vs/bin/setfattr -n "$client2" -v 0x000000000000000000000000 "\"$g_bad_file\"" 2>/dev/null
}

function backup_bad_vm_disk()
{
        local bad_rep=$1
        local wcache_attr
        local tier_attr
        local ans
        local flag=0
        local flush=0

        wcache_attr=$(echo "${g_attr_info[$bad_rep]}" | jq -r '."user.glusterfs.wcache"')
        if [ -n "$wcache_attr" ] && [ "$wcache_attr" != "null" ] &&
                [ "$wcache_attr" != "0x0000000000000000" ]; then
                gluster v set $g_vol performance.wcc-wbforce on >/dev/null
                gluster v set $g_vol performance.wcc-delay-time 0 >/dev/null
                flag=1
                flush=1
        fi

        tier_attr=$(echo "${g_attr_info[$bad_rep]}" | jq -r '."user.glusterfs.tier_status"')
        if [ -n "$tier_attr" ]  && [ "$tier_attr" != "null" ] &&
                [ "$tier_attr" != "0x0000000000000000" ]; then
                flush_tier_cache
                flush=1
        fi

        if ((flush)); then
                wait_for_completion
        fi

        if ((flag)); then
                gluster v reset $g_vol performance.wcc-wbforce >/dev/null
                gluster v reset $g_vol performance.wcc-delay-time >/dev/null
        fi

        do_backup_bad_vm_disk
}

function select_bad_replication()
{
        local tm1
        local tm2
        local file1
        local file2
        local host1
        local host2
        local bad

        g_bad_gfid=$(echo "${g_attr_info[0]}" | jq -r '."trusted.gfid"')
        g_bad_gfid=${g_bad_gfid:2}

        file1=$(echo "${g_attr_info[0]}" | jq -r '.file')
        host1=$(echo "${g_attr_info[0]}" | jq -r '.host')
        bad=$(echo "${g_attr_info[0]}" | grep '.trusted.file_status')

        # 副本有bad
        if [ -n "$bad" ] && [ "$bad" != "null" ] ; then
                g_bad_file="$file1"
                g_bad_host="$host1"
                return 0
        fi

        tm1=$(ssh $host1 "/sf/bin/busybox/stat -c '%Y' \"$file1\" " 2>/dev/null)

        file2=$(echo "${g_attr_info[1]}" | jq -r '.file')
        host2=$(echo "${g_attr_info[1]}" | jq -r '.host')
        bad=$(echo "${g_attr_info[1]}" | grep '.trusted.file_status')

        if [ -n "$bad" ] && [ "$bad" != "null" ] ; then
                g_bad_file="$file2"
                g_bad_host="$host2"
                return 1
        fi

        tm2=$(ssh $host2 "/sf/bin/busybox/stat -c '%Y' \"$file2\"" 2>/dev/null)

        if ((tm1 > tm2)); then
                g_bad_file="$file2"
                g_bad_host="$host2"
                return 1
        else
                g_bad_file="$file1"
                g_bad_host="$host1"
                return 0
        fi
}

function get_all_vm_disk()
{
        local host

        for host in $(get_nodes_status.pl | cut -d ':' -f 1); do
                ssh "$host" "find /sf/data/vs/local/*/*/\"$g_vm_path\"/vm-disk-*.qcow2* -type f \
                                -not -perm /01000 -exec basename {} \;" 2>/dev/null
        done | sort | uniq
}

function process_bad_vm_disk()
{
        local bad_rep=0
        local disk

        for disk in $(get_all_vm_disk); do
                echo ""
                ls -l "$VSD_GFS_MNT/$g_vol_dir/$g_vm_path/$disk"
                get_disk_attr "$disk"

                if check_brain_split_vm_disk "$disk"; then
                        log "Disk $disk is bad"

                        select_bad_replication
                        bad_rep=$?

                        log "Will set bad to replication $bad_rep, info:"
                        log "${g_attr_info[0]}"
                        log "${g_attr_info[1]}"

                        if [ "$g_need_commit" == "yes" ]; then
                                backup_bad_vm_disk $bad_rep
                                rape_bad_vm_disk $bad_rep
		        fi
                else
                        log "Disk $disk is good"
                fi
        done
}

function main()
{
        if [ $# -ne 1 -a $# -ne 3 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
                echo "Usage: $0 <vmid> <vol> <vol_dir>"
                exit 0
        fi

        g_need_commit="yes"

        g_vol=$2
        g_vol_dir=$3

        g_backup_path=$VSD_GFS_MNT/$g_vol_dir/vsfire_recovery_FEC61AC2/file_backup
        get_vs_vm_path "$1"

        mkdir -p $g_backup_path
        process_bad_vm_disk
}

main "$@"
exit
