#!/bin/bash
#
# update_snapshot - update a snapshot of the current system
#
# Author: Thorsten Kukuk <kukuk@suse.com>
# Copyright (C) 2016, 2017 SUSE Linux GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

export LANG=C

DIR_TO_MOUNT="dev opt usr/local var/log"
EXITCODE=0
ZYPPER_ARG=""
ZYPPER_NONINTERACTIVE=--non-interactive
ZYPPER_ARG_PKGS=""
REWRITE_GRUB_CFG=0
REWRITE_INITRD=0
REBUILD_KDUMP_INITRD=0
DO_CLEANUP=0
DO_ROLLBACK=0
ROLLBACK_SNAPSHOT=0
REBOOT_AFTERWARDS=0
USE_SALT_GRAINS=0
LOGFILE="/var/log/transactional-update.log"
STATE_FILE="/var/lib/misc/transactional-update.state"
PACKAGE_UPDATES=0
SNAPSHOT_ID=""
SECOND_SNAPSHOT_ID=""
KDUMP_SYSCONFIG="/etc/sysconfig/kdump"

usage() {
    echo "Usage: transactional-update --help|--version"
    echo "       transactional-update [cleanup][up|dup|patch|initrd][kdump][reboot]"
    echo "       transactional-update [cleanup] [reboot] pkg install|remove PKG1..PKGN"
    echo "       transactional-update rollback [number]"
    exit $1
}

print_version() {
    echo "transactional-update 1.15"
    exit 0;
}

log_info() {
    echo `date "+%Y-%m-%d %H:%M"` "$@" >> ${LOGFILE}
}

log_error() {
    echo `date "+%Y-%m-%d %H:%M "` "$@" >> ${LOGFILE}
    echo "$@" 1>&2
}

function bashlock {
     if [ "$#" -ne 1 ]; then
        echo 'usage: bashlock [LOCKFILENAME]' 1>&2
        return 2
     fi
     LOCKFILE="$1"

     echo "$$" >"$LOCKFILE.$$"
     if ! ln "$LOCKFILE.$$" "$LOCKFILE" 2>/dev/null; then
        PID=`head -1 "$LOCKFILE"`
        if [ -z "$PID" ]; then
           rm -f "$LOCKFILE"
        else
           kill -0 "$PID" 2>/dev/null || rm -f "$LOCKFILE"
        fi

        if ! ln "$LOCKFILE.$$" "$LOCKFILE" 2>/dev/null; then
           rm -f "$LOCKFILE.$$"
           return 1
        fi
     fi

     rm -f "$LOCKFILE.$$"
     trap 'rm -f "$LOCKFILE"' EXIT

     return 0
  }

save_state_file() {
    echo "LAST_WORKING_SNAPSHOT_ID=${LAST_WORKING_SNAPSHOT_ID}" > ${STATE_FILE}
    echo "UNUSED_SNAPSHOTS=\"${UNUSED_SNAPSHOTS}\"" >>  ${STATE_FILE}

    if [ $1 -ne 0 ]; then
	# if /var/lib/misc is not a seperate partition/subvolume,
	# copy it additional into the new snapshot. This will else
        # an outdated version from before taking the snapshot
	grep -q var.lib.misc /proc/mounts
	if [ $? -ne 0 ]; then
	    cp -a ${STATE_FILE} "/.snapshots/$1/snapshot${STATE_FILE}"
	fi
    fi
}

rebuild_kdump_initrd() {
    local MOUNT_DIR=$1

    test -f /usr/lib/systemd/system/kdump.service || return
    systemctl is-enabled --quiet kdump.service
    if [ $? = 0 ]; then
	if [ ${KDUMP_SYSCONFIG} -nt ${MOUNT_DIR}/${KDUMP_SYSCONFIG} ]; then
	    cp -a ${KDUMP_SYSCONFIG} ${MOUNT_DIR}/${KDUMP_SYSCONFIG}
	fi
	chroot ${MOUNT_DIR} /usr/sbin/tu-rebuild-kdump-initrd
    fi
}

# only called in error case
quit() {
    if [ -n "${SNAPSHOT_ID}" ] ; then
	log_error "Removing snapshot #${SNAPSHOT_ID}..."
	snapper delete ${SNAPSHOT_ID} |& tee -a ${LOGFILE}
    fi
    if [ -n "${SECOND_SNAPSHOT_ID}" ] ; then
	log_error "Removing snapshot #${SECOND_SNAPSHOT_ID}..."
	snapper delete ${SECOND_SNAPSHOT_ID} |& tee -a ${LOGFILE}
    fi
    if [ $USE_SALT_GRAINS -eq 1 ]; then
	if [ -f /etc/salt/grains ]; then
	    grep -q tx_update_failed /etc/salt/grains
	    if [ $? -ne 0 ]; then
		# Add variable to existing salt grains
		echo "tx_update_failed: true" >> /etc/salt/grains
	    else
		# modify variable in existing salt grains
		sed -i -e 's|tx_update_failed:.*|tx_update_failed: true|g' /etc/salt/grains
	    fi
	else
	    echo "tx_update_failed: true" > /etc/salt/grains
	fi
    fi
    log_info "transactional-update finished"
    exit $1
}

ORIG_ARGS="$@"

# if no option is given, assume "up"
if [ $# -eq 0 ]; then
    ZYPPER_ARG="up"
fi

while [ 1 ]; do
    if [ $# -eq 0 ]; then
        break
    fi

    case "$1" in
	cleanup)
	    DO_CLEANUP=1
	    shift
	    ;;
	dup)
	    ZYPPER_ARG="dup --no-allow-vendor-change"
	    shift
	    ;;
        up|patch)
	    ZYPPER_ARG=$1
            shift
            ;;
	ptf|pkg|package)
	    shift
	    if [ $# -eq 0 ]; then
		usage 1
	    fi
	    case "$1" in
		install|in)
		    ZYPPER_ARG="install"
		    shift
		    ;;
		remove|rm)
		    ZYPPER_ARG="remove"
		    shift
		    ;;
		*)
		    usage 1;
		    ;;
	    esac

	    if [ $# -eq 0 ]; then
		usage 1
	    fi

	    while [ 1 ]; do
		if [ $# -eq 0 ]; then
		    break;
		else
		    ZYPPER_ARG_PKGS="${ZYPPER_ARG_PKGS} $1";
		    shift
		fi
	    done
            # Run installing PTFs interactive
            ZYPPER_NONINTERACTIVE=""
	    ;;
	grub.cfg)
	    REWRITE_GRUB_CFG=1
	    shift
	    ;;
	initrd)
	    REWRITE_INITRD=1
	    REBUILD_KDUMP_INITRD=1
	    shift
	    ;;
	kdump)
	    REBUILD_KDUMP_INITRD=1
	    shift
	    ;;
	reboot)
	    REBOOT_AFTERWARDS=1
	    shift
	    ;;
	rollback)
	    DO_ROLLBACK=1
	    shift
	    if [ $# -eq 1 ]; then
		ROLLBACK_SNAPSHOT=$1
		shift
	    fi
	    ;;
	salt)
	    REBOOT_AFTERWARDS=1
	    USE_SALT_GRAINS=1
	    shift
	    ;;
        -h|--help)
            usage 0
            ;;
        --version)
            print_version
            ;;
        *)
            if [ $# -ge 1 ]; then
                usage 1;
            fi
            ;;
    esac
done

if [ $# -ne 0 ]; then
    usage 1;
fi

bashlock "/var/run/transactional-update.pid"
if [ $? -ne 0 ]; then
  echo "Couldn't get lock, another instance is already running?"
  exit 1;
fi

# load old state file
if [ -f ${STATE_FILE} ]; then
    . ${STATE_FILE}
fi

log_info "transactional-update 1.15 started"
log_info "Options: $ORIG_ARGS"

if [ "`stat -f -c %T /`" != "btrfs" ]; then
  log_error "ERROR: no btrfs as root filesystem!"
  log_info "transactional-update finished"
  exit 1
fi

if [ ! -d /.snapshots ]; then
  log_error "ERROR: no snapshots for root filesystem configured!"
  log_info "transactional-update finished"
  exit 1
fi

grep -q var.cache /proc/mounts
if [ $? -ne 0 ]; then
  log_error "WARNING: it looks like your installation isn't recent enough."
fi

ORIG_SNAPSHOT_ID=`grep subvol=/@/.snapshots/ /proc/mounts | grep "/ btrfs" | sed -e 's|.*.snapshots/\(.*\)/snapshot.*|\1|g'`
DEFAULT_SNAPSHOT_ID=`btrfs subvolume get-default / | sed -e 's|.*.snapshots/\(.*\)/snapshot|\1|g'`
RO_ROOT=`btrfs property get / ro | sed -e 's|ro=||'`

if [ ${DO_ROLLBACK} -eq 1 ]; then
    NEED_REBOOT_WARNING=1

    if [ ${ROLLBACK_SNAPSHOT} -eq 0 ]; then
	ROLLBACK_SNAPSHOT=${ORIG_SNAPSHOT_ID}
	NEED_REBOOT_WARNING=0
    fi

    echo "Rollback to snapshot ${ROLLBACK_SNAPSHOT} ..."

    if [ ${RO_ROOT} == "true" ]; then
	BTRFS_ID=`btrfs subvolume list / |grep /.snapshots/${ROLLBACK_SNAPSHOT}/snapshot | awk '{print $2}'`
	if [ -z $BTRFS_ID ]; then
	    log_error "ERROR: couldn't determine btrfs subvolume ID"
	    exit 1
	else
	    btrfs subvolume set-default $BTRFS_ID /.snapshots
	    if [ $? -ne 0 ]; then
		log_error "ERROR: btrfs set-default $BTRFS_ID failed!"
		exit 1
	    fi
	fi
	# Remove possible cleanup algo
	snapper modify -c '' ${ROLLBACK_SNAPSHOT}
    else
	snapper rollback ${ROLLBACK_SNAPSHOT}
	NEED_REBOOT_WARNING=1
    fi
    if [ ${NEED_REBOOT_WARNING} -eq 1 ]; then
	echo "Please reboot to finish rollback!"
    fi
    exit 0;
fi

#
# Cleanup part: make sure old root filesystem will be removed after it is no longer active.
#
if [ ${DO_CLEANUP} -eq 1 ]; then
    # if our current snapshot is identical to the original snapshot, we did a reboot after the
    # last transactional snapshot and try to cleanup old snapshots
    if [ ${ORIG_SNAPSHOT_ID} -eq ${DEFAULT_SNAPSHOT_ID} ]; then
	if [ -n "${LAST_WORKING_SNAPSHOT_ID}" ]; then
	    log_info "Add cleanup algorithm to snapshot #${LAST_WORKING_SNAPSHOT_ID}"
	    snapper modify -c number ${LAST_WORKING_SNAPSHOT_ID} |& tee -a ${LOGFILE}
	    if [ ${PIPESTATUS[0]} -ne 0 ]; then
		log_error "ERROR: cannot set cleanup algorithm for snapshot #${LAST_WORKING_SNAPSHOT_ID}"
	    fi
	    # if the old snapshot is read-write, we have already a mandatory snapshot and this one can deleted
	    # earlier. If not, mark is as important, so that it will not get deleted to fast.
	    if [ ${RO_ROOT} == "true" ]; then
		log_info "Add \"important=yes\" to snapshot #${LAST_WORKING_SNAPSHOT_ID}"
		snapper modify -u "important=yes" ${LAST_WORKING_SNAPSHOT_ID} |& tee -a ${LOGFILE}
		if [ ${PIPESTATUS[0]} -ne 0 ]; then
		    log_error "ERROR: cannot set \"important=yes for snapshot\" #${LAST_WORKING_SNAPSHOT_ID}"
		fi
	    fi
	    LAST_WORKING_SNAPSHOT_ID=""
	    save_state_file 0
	fi
	if [ -n "${UNUSED_SNAPSHOTS}" ]; then
	    for snap in ${UNUSED_SNAPSHOTS}; do
		# Don't mark our current in use snapshot for deletion
		if [ ${snap} -ne ${ORIG_SNAPSHOT_ID} ]; then
		    log_info "Mark not used snapshot #${snap} for deletion"
		    snapper modify -c number ${snap} |& tee -a ${LOGFILE}
		    if [ ${PIPESTATUS[0]} -ne 0 ]; then
			log_error "ERROR: cannot set cleanup algorithm for snapshot #${LAST_WORKING_SNAPSHOT_ID}"
		    fi
		fi
	    done
	    UNUSED_SNAPSHOTS=""
	    save_state_file 0
	fi
    fi
fi

if [ -n "${ZYPPER_ARG}" -o ${REWRITE_GRUB_CFG} -eq 1 \
    -o ${REWRITE_INITRD} -eq 1 -o ${REBUILD_KDUMP_INITRD} -eq 1 ]; then

    if [ -n "${ZYPPER_ARG}" -a -n "${ZYPPER_NONINTERACTIVE}" ]; then
	#check if there are updates at all
	TMPFILE=`mktemp /tmp/transactional-update.XXXXXXXXXX`
	zypper --xml ${ZYPPER_NONINTERACTIVE} ${ZYPPER_ARG} --dry-run ${ZYPPER_ARG_PKGS} > ${TMPFILE}
	if [ $? -ne 0 ]; then
	    log_error "ERROR: zypper failed! Logs could be found in ${TMPFILE}"
	    quit 1
	fi
	PACKAGE_UPDATES=`grep "install-summary download-size" ${TMPFILE} | sed -e 's|.*install-summary download-size=\"\(.*\)\" space-usage-diff.*|\1|g'`
	SIZE_OF_UPDATES=`grep "install-summary.*space-usage-diff" ${TMPFILE} | sed -e 's|.*install-summary.*space-usage-diff=\"\(.*\)\">.*|\1|g'`
	rm -f ${TMPFILE}
	if [ ${PACKAGE_UPDATES} -eq 0 -a ${SIZE_OF_UPDATES} -eq 0 ]; then
	    log_info "zypper: nothing to update"
	    log_info "transactional-update finished"
	    exit 0
	fi
    fi

    # If the current root filesystem is not read-only, we need to create a read-only copy for rollback.
    # The rw subvolume is not shown in grub2.
    if [ ${RO_ROOT} == "false" ]; then
	log_info "Create read-only snapshot of current read-write root filesystem (#${ORIG_SNAPSHOT_ID})"
	SECOND_SNAPSHOT_ID=`snapper create -p -c number -u "important=yes" -d "RO-Clone of #${ORIG_SNAPSHOT_ID}"`
	if [ $? -ne 0 ]; then
	    log_error "ERROR: snapper create failed!"
	    exit 1
	fi
    fi

    SNAPSHOT_ID=`snapper create -p -d "Snapshot Update"`
    if [ $? -ne 0 ]; then
	log_error "ERROR: snapper create failed!"
	quit 1
    fi

    SNAPSHOT_DIR=/.snapshots/${SNAPSHOT_ID}/snapshot

    # Make the snapshot read-write:
    btrfs property set  ${SNAPSHOT_DIR} ro false
    if [ $? -ne 0 ]; then
	log_error "ERROR: changing ${SNAPSHOT_DIR} to read-write failed!"
	quit 1;
    fi

    # Save all snapshots we create for update. If transactional-update is
    # run several times before a reboot, we need to cleanup the not used
    # snapshots. Else we would have a big disk space leak.
    UNUSED_SNAPSHOTS="${SNAPSHOT_ID} ${UNUSED_SNAPSHOTS}"
    save_state_file ${SNAPSHOT_ID}

    # Check if installed with SLES12
    touch ${SNAPSHOT_DIR}/var/tmp/update_snapshot.test
    if [ $? -ne 0 ]; then
	log_error "ERROR: System installation is too old!"
	quit 1;
    fi
    rm -f ${SNAPSHOT_DIR}/var/tmp/update_snapshot.test

    # On a read only system, make sure that /etc/zypp in the
    # snapshot is current, could come from a overlayfs which
    # means not part of the snapshot itself
    if [ ${RO_ROOT} == "true" ]; then
	DIR_TO_MOUNT="${DIR_TO_MOUNT} etc/zypp"
    fi

    # Mount everything we need:
    mount -t proc none ${SNAPSHOT_DIR}/proc
    if [ $? -ne 0 ]; then
        log_error "ERROR: mount of proc failed!"
        quit 1;
    fi
    mount -t sysfs sys ${SNAPSHOT_DIR}/sys
    if [ $? -ne 0 ]; then
        log_error "ERROR: mount of sys failed!"
        quit 1;
    fi
    for directory in $DIR_TO_MOUNT ; do
	mount -o bind /$directory ${SNAPSHOT_DIR}/$directory
	if [ $? -ne 0 ]; then
	    log_error "ERROR: mount of $directory failed!"
	    quit 1;
	fi
    done

    # Do we need to cleanup the /var/cache directory?
    if [ -d ${SNAPSHOT_DIR}/var/cache/zypp ]; then
	VAR_CACHE_CLEANUP=0
    else
	VAR_CACHE_CLEANUP=1
    fi

    # Create bind mount, else grub2 will fail
    MOUNT_DIR=`mktemp -d`
    mount -o rbind ${SNAPSHOT_DIR} ${MOUNT_DIR}

    if [ -n "${ZYPPER_ARG}" ]; then
	log_info "Calling zypper ${ZYPPER_ARG}"
	if [ -n ${ZYPPER_NONINTERACTIVE} ]; then
	    env DISABLE_RESTART_ON_UPDATE=yes zypper ${ZYPPER_NONINTERACTIVE} -R ${MOUNT_DIR} ${ZYPPER_ARG} ${ZYPPER_ARG_PKGS} |& tee -a ${LOGFILE}
	    RETVAL=${PIPESTATUS[0]}
	else
	    env DISABLE_RESTART_ON_UPDATE=yes zypper -R ${MOUNT_DIR} ${ZYPPER_ARG} ${ZYPPER_ARG_PKGS}
	    RETVAL=$?
	fi
	if [ $RETVAL -ne 0 ]; then
	    log_error "ERROR: zypper ${ZYPPER_ARG} on $directory failed!"
	    EXITCODE=1
	else
	    REBUILD_KDUMP_INITRD=1
	fi
    fi

    if [ ${REWRITE_INITRD} -eq 1 ]; then
	log_info "Create new initrd"
	chroot ${MOUNT_DIR} /sbin/mkinitrd
	if [ $? -ne 0 ]; then
	    log_error "ERROR: mkinitrd failed!"
	    EXITCODE=1
	else
	    REBUILD_KDUMP_INITRD=1
	fi
    fi

    if [ ${REBUILD_KDUMP_INITRD} -eq 1 ]; then
	log_info "Try to rebuild kdump initrd"
	rebuild_kdump_initrd ${MOUNT_DIR}
    fi

    if [ ${REWRITE_GRUB_CFG} -eq 1 ]; then
	log_info "Create a new grub2 config"
	chroot ${MOUNT_DIR} /usr/sbin/grub2-mkconfig > ${MOUNT_DIR}/boot/grub2/grub.cfg
	if [ $? -ne 0 ]; then
	    log_error "ERROR: grub2-mkconfig failed!"
	    EXITCODE=1;
	fi
    fi

    # Unmount everything we don't need anymore:
    for directory in proc sys $DIR_TO_MOUNT ; do
	umount -R ${SNAPSHOT_DIR}/$directory
	if [ $? -ne 0 ]; then
	    log_error "ERROR: umount of $directory failed!"
	    fuser -v ${SNAPSHOT_DIR}/$directory >> ${LOGFILE}
	    lsof ${SNAPSHOT_DIR}/$directory >> ${LOGFILE}
            # Try again after some time
            sleep 30
            umount ${SNAPSHOT_DIR}/$directory
            if [ $? -ne 0 ]; then
                log_error "ERROR 2nd try: umount of $directory failed!"
	        EXITCODE=1;
            fi
	fi
    done
    umount ${MOUNT_DIR}

    # Cleanup of temporary mount point
    rmdir ${MOUNT_DIR}

    # cleanup other stuff
    # cleanup cache directory
    if [ $VAR_CACHE_CLEANUP -eq 1 ]; then
	rm -rf ${SNAPSHOT_DIR}/var/cache/*
    fi
    # systemd-tmpfiles creates directories/files even if /run is no tmpfs:
    rm -rf ${SNAPSHOT_DIR}/run/*
    # WARNING: /var/spool/ can contain changes through RPM!
    rm -rf ${SNAPSHOT_DIR}/var/spool/*

    # Somersault:
    if [ $EXITCODE -eq 0 ]; then
	BTRFS_ID=`btrfs subvolume list / |grep ${SNAPSHOT_DIR} | awk '{print $2}'`
	if [ -z $BTRFS_ID ]; then
	    log_error "ERROR: couldn't determine btrfs subvolume ID"
	    EXITCODE=1
	else
	    btrfs subvolume set-default $BTRFS_ID ${SNAPSHOT_DIR}
	    if [ $? -ne 0 ]; then
		log_error "ERROR: btrfs set-default $BTRFS_ID failed!"
		EXITCODE=1;
	    else
		LAST_WORKING_SNAPSHOT_ID=${ORIG_SNAPSHOT_ID}
		save_state_file ${SNAPSHOT_ID}
	    fi
	fi
    fi

    # Make the snapshot ro flag identical to current root:
    btrfs property set  ${SNAPSHOT_DIR} ro ${RO_ROOT}
    if [ $? -ne 0 ]; then
	log_error "ERROR: changing ${SNAPSHOT_DIR} to ro=${RO_ROOT} failed!"
	EXITCODE=1
    fi

    if [ ${EXITCODE} -ne 0 ]; then
	quit ${EXITCODE}
    fi
fi

if [ ${EXITCODE} -eq 0 ]; then
    if [ $REBOOT_AFTERWARDS -eq 1 ]; then
	if [ $USE_SALT_GRAINS -eq 1 ]; then
	    log_info "transactional-update finished - created salt grains"
	    if [ -f /etc/salt/grains ]; then
		grep -q tx_update_reboot_needed /etc/salt/grains
		if [ $? -ne 0 ]; then
		    # Add variable to existing salt grains
		    echo "tx_update_reboot_needed: true" >> /etc/salt/grains
		else
		    # modify variable in existing salt grains
		    sed -i -e 's|tx_update_reboot_needed:.*|tx_update_reboot_needed: true|g' /etc/salt/grains
		fi
	    else
		echo "tx_update_reboot_needed: true" > /etc/salt/grains
	    fi
	    # Reset tx_update_failed if exist
	    sed -i -e 's|tx_update_failed:.*|tx_update_failed: false|g' /etc/salt/grains
	    exit 0
	else
	    if [ -x /usr/sbin/rebootmgrctl ]; then
		/usr/sbin/rebootmgrctl is-active --quiet
		if [ $? -eq 0 ]; then
		    # rebootmgrctl is running
		    /usr/sbin/rebootmgrctl reboot
		    log_info "transactional-update finished - informed rebootmgr"
		    exit 0
		fi
	    fi
	    log_info "transactional-update finished - rebooting machine"
	    sync
	    systemctl reboot |& tee -a ${LOGFILE}
	fi
    elif [ $PACKAGE_UPDATES -gt 1 ]; then
	echo "Please reboot your machine to activate the changes and avoid data loss"
    fi
fi

log_info "transactional-update finished"

exit $EXITCODE
