dotfiles/.config/shell/bin/git-review

#!/bin/bash
# git-code-review - easier code reviews
# Usage - see `git-code-review --help`

# I know variables don't expand in single quotes, thank you shellcheck
# shellcheck disable=SC2016,SC1007

# Coding practice:
# - Keep main() looking simple, each line should read mostly as english,
# because this could get fairly cryptic otherwise. Make things into short
# functions to do this.
# - Try to make these ^ abstractions only one-level deep, so there isn't more
# abstraction than needed

# File layout:
# - error/logging functions (eg. error_basic)
# - git value getters (eg. get_git_remote_url)
# - value checkers (eg. exists_remote)
# - git mutating operations
#   - adding operations (eg. create_remote)
#   - removing operations (eg. remove_remote)
# - repo setup functions (eg. action_on_both_repos)
# - generic helper functions (eg. search_replace)
# - usage (help page)
# - argument parsing (eg. parse_ref)
# - main

# set -x
set -euo pipefail

# global for the name of the script
script_name="${0##*/}"

# Call printf, write to stderr
printf_err() {
   printf "$@" >&2
}

# Call echo, write to stderr
echo_err() {
   echo "$@" >&2
}

# Errors for basic usage things
# $1 - message type (eg. "Error")
# $2 - message (eg. "Monkeys failed to climb trees")
# $@?- Additional message (will be printed with a space before each line)
error_basic() {
   type="${1:-ERROR}"; shift
   msg_main="${1:-UNDEFINED ERROR}"; shift
   printf_err '%s ' "$script_name:" "$type:" "$msg_main"
   printf_err '\n'
   if [ $# -gt 0 ]; then
      printf_err '  %s\n' "$@"
      printf_err '\n'
   fi
}

# Errors for internal issues
# $1 - caller name (your $0)
# $2 - error line (your $LINENO)
# $@ - options for error_basic
error() {
   caller="$1"; shift
   line="$1"; shift
   printf_err '%s ' "$script_name: In function $caller, line $line:"
   error_basic 'Error' "$@"
}

# Errors that should exit for internal issues
# $1 - message
# $2 - Additional message (will be printed with a space before each line)
fatal_error() {
   error "$@"
   exit 1
}

# Errors that should exit for user-facing issues
# See error_basic for usage
fatal_error_basic() {
   error_basic "$@"
   exit 1
}

# Validate the value of an argument and exit if it's bad
# $1 - prepend this to the err_msg variable
# $@ - command to run to validate argument
# $err_msg - a variable containing the error to print
arg_assert() {
   # prepend to the error
   err_msg_internal="$1${err_msg:-}"
   shift
   if ! "$@"; then
      echo_err 'Error: Invalid argument:' "$err_msg_internal"
      arg_error=1
   fi
}

# Call this after all arg_assert calls to exit if there was an error
arg_assert_end() {
   if [ "${arg_error:-0}" != 0 ]; then
      exit "$arg_error"
   else
      unset arg_error
   fi
}

# Prints variables passed into it
# $@ - the name of the variables (eg. 'myvar', not "$myvar")
log_vars() {
   echo "Variable dump:"
   for var; do
      if [ "$var" = '' ]; then
         echo
      else
         eval value="\"\$$var\""
         # shellcheck disable=SC2154
         printf '  %s\t%s\n' "$var" "$value"
      fi
   done | column -tl 2 | sed 's/^/  /'
}

# exit if cmd is false
# $1 - name of calling function (your $0)
# $2 - line called on
# $@ - the command to assert
assert() {
   caller="$1"; shift
   line="$1"; shift
   if ! ("$@"); then
      fatal_error "$caller" "$line" "Assertion failure: $*"
   fi
}

#
# Git getters
#

# find the folder for a branch's worktree, if it has one
# $1 - branch to check for
get_worktree_folder() {
   branch="$1"
   assert "$0" "$LINENO" exists_branch "$branch"
   git worktree list | grep -F "[$branch]" | rev | cut -d ' ' -f 3- | sed -E 's/^ +//g' | rev
}

get_current_remote() {
   if ! git rev-parse --abbrev-ref --symbolic-full-name '@{u}' 2> /dev/null; then
      printf ''
      :
   fi
}

get_git_remote_url_nofail() {
   # Test a few potential remotes
   # WARN: this is a bit hard-coded based on standard practices, as well as my own practices
   remotes=(upstream origin "$(get_current_remote)")
   for remote_url in "${remotes[@]}"; do
      if remote_url="$(git remote get-url "$remote_url")"; then
         printf '%s' "$remote_url"
         return
      fi
   done
   printf 'NONE'
   return 1
}

# Get the url for a git repository's remote, choosing from a few options.
# Accepts no arguments.
get_git_remote_url() {
   if ! get_git_remote_url_nofail; then
      # If no remotes were found, we get here
      fatal_error "$0" "$LINENO" \
         "Couldn't find a valid remote. Please specify a remote url with the --remote-url option." \
         "See $script_name --help for more information."
   fi
}

get_git_branch_current() {
   git rev-parse --abbrev-ref --symbolic-full-name HEAD
}

#
# Checker functions
#

# Check if a stream contains a stream
# $1 - regex to filter the stream for the search term (passed to grep -P)
# $2 - the search term (must match the whole line)
check_exists() {
   regex="$1"; shift
   term="$1"; shift
   grep -Po "$regex" | grep -Fxq "$term"
}

exists_remote() {
   remote="$1"
   git remote | check_exists '^.+$' "$remote"
}

# check if a branch exists
# $1 - branch
exists_branch() {
   branch="$1"
   git branch | check_exists '..\K.+$' "$branch"
}

# check if a worktree exists with a specific branch attached to it
# $1 - branch
exists_branch_as_worktree() {
   branch="$1"
   git worktree list | check_exists '\[[^\]]+\]$' "[$branch]"
}

#
# Git mutating operation functions
#

# Fetch a remote
# $1 - the remote to fetch
fetch_remote() {
   remote="$1"
   shell_cmd git fetch "$remote"
}

# Add a remote if it doesn't exist
# $1 - Name of the remote
# $2 - URL to the remote
create_remote() {
   name="$1"; shift
   url="$1"; shift
   # Check if we have the remote already
   if ! exists_remote "$name"; then
      shell_cmd git remote add "$name" "$url"
   else
      shell_cmd git remote set-url "$name" "$url"
   fi
   fetch_remote "$remote_name"
}

# remove a remote
# $1 - remote to remove
remove_remote() {
   remote="$1"
   shell_cmd git remote remove "$remote"
}

# Create a git branch and set its upstream
# $1 - branch name
# $2 - remote name
# $3 - remote branch name
create_branch() {
   branch="$1"; shift
   remote="$1"; shift
   remote_branch="$1"; shift
   if ! exists_branch "$branch"; then
      shell_cmd git branch "$branch" "$remote/$remote_branch"
   fi
}

# remove a branch
# $1 - branch to remove
remove_branch() {
   branch="$1"; shift
   shell_cmd git branch -d "$branch"
}

# Create a worktree if it doesn't exist
# $1 - where to put the worktree
# $2 - branch the worktree should point at (MUST ALREADY EXIST)
create_worktree() {
   dir="$1"; shift
   branch="$1"; shift

   # Ensure the branch exists. It should be created before this function is called.
   assert "$0" "$LINENO" exists_branch "$branch"

   # See if a worktree already exists with the branch attached
   if ! exists_branch_as_worktree "$branch"; then
      shell_cmd git worktree add --checkout "$dir" "$branch"
   fi
}

# Remove a worktree if it exists
# $1 - worktree dir to remove
remove_worktree() {
   dir="$1"; shift
   shell_cmd git worktree remove "$dir"
}

#
# Repo setup functions
#

# Symlinks files from a git repository to another place
# $@ - command to evaluate, with '$f' passed where a file should be
# pipe - list of relative paths to files that should be linked from src to dst
action_on_both_repos() {
   # shellcheck disable=SC2034
   while IFS=$'\n' read -r f; do
      # shellcheck disable=SC2294
      eval "$@"
      # error_basic 'Log' "Eval action:" "$@"
   done
}

get_ignored_files() {
   git clean -ndX | grep -Po 'Would remove \K.*$' | sed -E 's-/$--'
}

get_submodules() {
   git submodule | grep -Po '.[^ ]+ \K.*$' | rev | grep -Po '[^( ]+\( \K.*$' | rev
}

# See usage for `symlink_targets`. Takes no pipe input
symlink_ignored() {
   get_ignored_files | action_on_both_repos cp -lr "$1"/'$f' "$2"/'$f'
}

symlink_subs() {
   # Hardlink files
   get_submodules | action_on_both_repos rmdir "$2"/'$f' '&&' cp -lr "$1"/'$f' "$2"/'$f'
   # Update submodule ".git" files
   get_submodules | action_on_both_repos rm "$2"/'$f'/.git '&&' echo 'gitdir: '"$1"'/.git/modules/$f' '>' "$2"/'$f'/.git
}

remove_symlinked_ignores() {
   get_ignored_files | action_on_both_repos rm -r "$2"/'$f'
}

remove_symlinked_subs() {
   # error_basic Info Removing submodules
   get_submodules | action_on_both_repos rm -r "$2"/'$f'
   get_submodules | action_on_both_repos git -C "$2" restore "$2"/'$f'
}

create_links() {
   symlink_ignored "$@" || true
   symlink_subs "$@" || true
}

remove_links() {
   remove_symlinked_ignores "$@" || true
   remove_symlinked_subs "$@" || true
}

#
# Generic helper functions
#

# run a shell command and print the command that was run
shell_cmd() {
   Green="[32m"
   Nc="(B[m" # No Color
   printf_err '%s' "$Green"
   printf_err '%s' " \$ $*$Nc"
   printf_err '\n'
   "$@"
}

# Run a command in a directory without changing the current directory
# $1 - What directory to run it in
# $@ - Command to run
shell_cmd_dir() {
   dir="$1"; shift
   (
      # Cd
      shell_cmd cd "$dir" || fatal_error "$0" "$LINENO" "Couldn't \`cd\` into directory:" "  $dir"
      # Run
      shell_cmd "$@"
   )
}

# Extract a field N fields from the end.
# $1? - how many fields from the end to select (default: 1)
# $2? - field separator (default: /)
get_from_end() {
   dist_from_end="${1:-1}"; shift
   sep="${1:-/}"; shift
   rev | cut -d "${sep:0:1}" -f "$dist_from_end" | rev
}

# Search and replace a string *literal* (WITHOUT evaluating a regular expression or code of any kind)
# $1 - search term
# $2 - replace with
# pipe - input
search_replace() {
   search="$1"; shift
   replace="$1"; shift
   awk -v search="$search" -v replace="$replace" '{sub(search, replace); print}'
}

#
# Usage information (--help)
#

# FUTURE INTERFACE FOR `opt`  (NOT YET IMPLEMENTED)
# # makes an option for getopts
# # $1 - short option, single character. (eg. 'f')
# # $2 - long option (eg. 'file')
# # $3 - option category, controls what array the options' descriptions are put in (eg. 'info' puts it in opts_info)
# # $4?- argument for the option, for `getopt` (eg. ':' or '::')
# # $4?- true/false to indicate whether the option is a boolean option. Requires
# #      a long opt to be passed. Generates a corresponding version of the long
# #      option with "no-" prepended to it. (eg. 'true', which generates
# #      `--cool-opt` and `--no-cool-opt`)

# $1 - short option, single character. (eg. 'f')
# $2 - long option (eg. 'file')
# $3?- argument for the option, for `getopt` (eg. ':' or '::')
# $3?- true/false to indicate whether the option is a boolean option. Requires
#      a long opt to be passed. Generates a corresponding version of the long
#      option with "no-" prepended to it. (eg. 'true', which generates
#      `--cool-opt` and `--no-cool-opt`)
# TODO: make this create data to go in the usage information
opt() {
   # help_category="${1:-info}"; shift
   short="${1:0:1}"; shift
   long="$1"; shift
   arg="${1:-false}"; shift
   # arg_type="${1:-}"; shift

   # # take data from pipe
   # if [ -p /dev/stdin ];then
   #    desc="$(cat)"
   # else
   #    fatal_error "$0" "$LINENO" "Received no description for command $long (category $help_category)"
   # fi

   case "$arg" in
      true) arg= getopts_long+="${getopts_long:+,}no-$long" ;; #longbool_help="[no-]"
      false) arg=;;
      # :) arg_type="";;
      :|::);;
   esac

   case "$short" in
      '') ;;#shorthelp="   "
      *)  getopts_short+="$short$arg";; #shorthelp="-$short,"
   esac

   case "$long" in
      '') ;; #longhelp=
      *) getopts_long+="${getopts_long:+,}$long$arg";; #longhelp="--$longbool_help$long"
   esac

   # # help stuff
   # eval opts_"$help_category"+='("$shorthelp$longhelp" "$desc")'
}

opt_is_supported() {
   [[ "$1" =~ $2 ]]
}

opt_accepts_arg() {
   [[ "$1:" =~ $2 ]]
}

opt_requires_arg() {
   ! [[ "$1::" =~ $2 ]]
}

usage() {
   # ideas for new options
   # $script_name review [(--no-cleanup | )] <REF> [CMD...]  Open a shell to review code from REF, then clean up
   # $script_name clean [REF]            Clean the results of REF
   # --no-cleanup  Don't remove the remote and cloned code by default

   cat << EOF
$script_name - review changes on a remote branch

Usage:
   $script_name [opts] <branch_ref> [cmd...]
   $script_name review [opts] <branch_ref> [cmd...]
   $script_name clean [opts] [branch_ref]

Subcommands:  (NOT IMPLEMENTED)
   (none)  If no subcommand is specified, the review subcommand will be assumed.
   review  Review branch_ref and execute the cmd
   clean   Remove remotes, branches, and worktrees associated with a
              specific branch_ref. If no branch_ref is provided, all remotes,
              branches, and worktrees containing the "review" prefix will be
              pruned.

Options for all subcommands:  (NOT IMPLEMENTED)
   $(printf "%s\n" "${opts_info[@]}" | paste - - | column -tc 80 -s $'\t' -N opt,desc -W desc -d)

Options for 'review' subcommand:  (NOT IMPLEMENTED)
   --[no-]clean-on-success  Clean the worktree only if the command exits with
                            error code 0.

Options for 'clean' subcommand:  (NOT IMPLEMENTED)
   -f, --force              Force clean worktree (any changes will be lost).

Definitions:
   branch_ref - Where the remote code and branch is. One of the following:
      BRANCH                branch located on the default remote
      USERNAME:BRANCH       username and branch to use as the source for a remote.
                            Repository name is be assumed to be the same as the
                            default remote. (currently $(get_git_remote_url_nofail))
      USERNAME/REPO         username and repo to use as the source for the
                            remote (current branch is assumed to be the name of
                            the remote branch)
      USERNAME/REPO:BRANCH  repository, username, and branch name to use as the

   cmd - A command and arguments to run on the remote. Default: \`bash\`

Example:
   $ pwd
   ~/git/gimp
   $ git remote get-url origin
   https://gitlab.gnome.org/GNOME/gimp
   $ $script_name joe/cool-feature
    # git remote add, git fetch, git worktree add
   $ pwd
   ~/.cache/$script_name/gimp/joe-cool-feature
   $ exit
    # git worktree remove, rm -r, git remote remove
   $ pwd
   ~/git/gimp
   $ ls ~/.cache/$script_name/gimp/joe-cool-feature
   No such file or directory.

The URL for the remote git repository is needed to be able to make new remotes.
Since many code forges follow the same format for their URL's, we are often
able to extrapolate the information we need to make a new remote based on just
the contents of the URL on the defult remote.
The assumed format for URL's on remotes is as follows:
   https://git.example.net/joe-mama/code
         git@forge.site.io:joe-mama/code
                           ^~~~~~~^ ^~~^
                           USERNAME REPO
The USERNAME and REPO parts of the URL will be replaced with their
corresponding parts from the given branch_ref to create a new URL. If the remote
on your URL doesn't follow this format, specify the URL with the --remote-url
option. In this case, the specified branch_ref will still be used to get
information used for creating folders.
EOF
}

# Parse a branch_ref into the ref_type, username, repo, and branch variables
# $1 - a branch_ref
# stdout - the normalized ref
normalize_ref() {
   ref="$1"

   # assume U/R:B form, and take stuff from it
   # take the end of ref
   branch_name_fromref="${ref##*:}"
   # take the beginning of ref
   username_fromref="${ref%%/*}"
   # isolate the middle part of the ref
   repo_fromref="${ref#*/}"
   repo_fromref="${repo_fromref%:*}"

   # TODO: process --remote-url here
   remote_url_fromlocalrepo="$(get_git_remote_url)"
   # remove all trailing slashes from the url
   remote_url_fromlocalrepo="${remote_url_fromlocalrepo%%/}"

   # Default value for the repo
   ## assign to the repo name part of the url - "foo[:/]username/(repo).git"
   repo_fromremote="${remote_url_fromlocalrepo##*/}" # url with everything but the last slash
   repo_fromremote="${repo_fromremote%.git}" # remove ".git" (if it's there)
   ## assign to the username part of the url - "foo[:/](username)/repo.git"
   username_fromremote="$(echo "$remote_url_fromlocalrepo" | search_replace "/$repo_fromremote" "" | rev | grep -Po '^[^/:]+' | rev)"

   # Decide how to proceed based on the format of ref
   case "$ref" in
      */*:*)
         ref_type="USERNAME/REPO:BRANCH"
         username="$username_fromref"
         repo="$repo_fromref"
         branch="$branch_name_fromref"
         ;;
      *:*)
         ref_type="USERNAME:BRANCH"
         username="${ref%:*}"
         repo="$repo_fromremote"
         branch="$branch_name_fromref"
         ;;
      */*)
         ref_type="USERNAME/REPO"
         username="$username_fromref"
         repo="$repo_fromref"
         branch="$(get_git_branch_current)"
         ;;
      *)
         ref_type="BRANCH"
         username="$username_fromremote"
         repo="$repo_fromremote"
         branch="$ref"
         ;;
   esac

   ref="$username/$repo:$branch"
}

process_opt() {
   case "$1" in
      -h|--help) usage; exit;;
   esac
}

# Set the username, repo, and branch_name_remote variables from a ref
# $1 - a branch ref to pass to normalize_ref
setup_from_ref() {
   ref="$1"

   normalize_ref "$1"

   # take the beginning of ref
   username="${ref%%/*}"
   # isolate the middle part of the ref
   repo="${ref#*/}"
   repo="${repo%:*}"
   # take the end of ref
   branch_name_remote="${ref##*:}"
}

# Parse arguments into variables that the rest of the script needs
process_opts() {
   opt h help
   opt v verbose
   opt r remote-url :
   opt w worktree-dir :
   opt f force
   opt c clean-on-success true

   # opts_long=('help,verbose,remote-url:,worktree-dir:,force,clean-on-success,no-clean-on-success')
   # args=$(getopt \
   #    -o "$getopts_short" \
   #    --long "$getopts_long" \
   #    -n "$script_name" \
   #    -- "$@")
   #
   # getopt_exitcode=$?
   # if [ $getopt_exitcode != 0 ]; then
   #    exit $getopt_exitcode
   # fi

   # track errors
   error_encountered=false
   # throw errors when passed --opt=arg when opt takes no argument
   error_on_longopts_withEqualSigns_thatTakeNoArgs=true

   while true; do
      case "$1" in
         (--) # Process command
            shift;
            # now, ${@:-bash} is the command to run
            cmd=("${@:-${SHELL}}")
            break;
            ;;
         (--*) # longopts

            # --opt=arg -> --opt
            opt="${1%%=*}"

            # --opt=arg -> =arg
            # --opt     -> --opt
            argeq="${1/#*=/=}"

            shift

            if opt_is_supported "$opt" "$getopts_long"; then
               # Handle arguments set with equal signs
               case "$argeq" in
                  =*) # --opt=arg
                     if opt_accepts_arg "$opt" "$getopts_long"; then
                        arg="${argeq#=}"
                     elif [ "$error_on_longopts_withEqualSigns_thatTakeNoArgs" = true ]; then
                        error="Option $opt recieved an argument ('$arg'), but does not accept arguments"
                     fi
                     ;;
                  *) # --opt arg OR --opt --otheropt
                     if opt_requires_arg "$opt" "$getopts_long"; then
                        if [ $# = 0 ]; then
                           error="Option $opt requires an argument, but none was given"
                        else
                           arg="$1"
                        fi
                     elif opt_accepts_arg "$opt" "$getopts_long"; then
                        # If we get here, the option doesn't require an argument, but it may optionally accept one
                        if [ $# != 0 ]; then
                           case "$1" in
                              -*) ;; # option will get no argument since next argument is an option (whether the option is a valid option or not)
                              *) arg="$1"; shift;;
                           esac
                        fi
                     fi
                     ;;
               esac

               # Now our option is normalized and $@ has been shifted to not
               # include it or any of the arguments it takes

               # if arg is set (if we have an argument), pass it along
               if [ "${arg:+true}" = true ]; then
                  process_opt "$opt" "$arg" "$@"
               else
                  process_opt "$opt" "$@"
               fi
            else
               error="No such option: $opt"
            fi
            ;;
         (-*);;
         (*);;
      esac
      if [ "$error" != false ]; then
         error=false
         # error_encountered=true
         error_basic 'Error' "$error"
         shift
         continue
      fi
   done

}

main() {
   if [ $# = 0 ]; then
      error_basic 'Error' "Please pass at least one argument."
      printf_err '\n'
      usage
      exit 1
   fi

   # ensure running inside a git repository
   if ! git_repo="$(git rev-parse --show-toplevel)"; then
      fatal_error "$0" "$LINENO" "Please run from a git repository"
   fi

   ref="$1"
   setup_from_ref "$ref"
   shift

   cmd=("${@:-${SHELL}}")

   # TODO: process --remote-url here
   remote_url="$(echo "$remote_url_fromlocalrepo" | search_replace "$username_fromremote" "$username" | search_replace "$repo_fromremote" "$repo")"

   err_msg=" in $ref_type-style branch_ref cannot be empty"
   arg_assert url      [ "$remote_url"         != '' ]
   arg_assert repo     [ "$repo"               != '' ]
   arg_assert branch   [ "$branch_name_remote" != '' ]
   arg_assert username [ "$username"           != '' ]
   arg_assert_end

   # Constants
   k_git_ref_prefix=review

   # derive:
   #  - REMOTE (review-USER)
   #  - BRANCH_LOCAL (review-BRANCH)
   #  - WORKTREE_FOLDER (~/.cache/REPO/REMOTE_BRANCH)

   # Derived variables
   remote_name="$k_git_ref_prefix-$username"
   branch_name_local="$k_git_ref_prefix-$username-$branch_name_remote"
   worktree_folder="${XDG_CACHE_HOME:-"$HOME"/.cache}/$script_name/$repo/$username-$branch_name_remote"

   clean_worktree=true

   #
   # Main actions
   #

   # make remote
   create_remote "$remote_name" "$remote_url"
   # make branch
   create_branch "$branch_name_local" "$remote_name" "$branch_name_remote"
   # make worktree if it doesn't exist
   if ! exists_branch_as_worktree "$branch_name_local"; then
      # make worktree
      create_worktree "$worktree_folder" "$branch_name_local"
      create_links "$git_repo" "$worktree_folder"
   else
      # worktree already exists, use it
      worktree_folder="$(get_worktree_folder "$branch_name_local")"
      # Since we didn't create the worktree, don't remove it
      # clean_worktree=false
   fi

   # do the command in the worktree folder
   shell_cmd_dir "$worktree_folder" "${cmd[@]}"

   if [ "$clean_worktree" = true ]; then
      # clean symlinked files
      remove_links "$git_repo" "$worktree_folder" &&
         # clean up worktree
         remove_worktree "$worktree_folder" &&
         # clean up branch
         remove_branch "$branch_name_local" &&
         # clean up remote
         remove_remote "$remote_name"
   fi
}

main "$@"