#!/bin/bash
#
# project_stats
#
# stats on project/release from git database
#

###
### settings
###

set -e               # exit on any uncaught error
set +o histexpand    # don't expand history expressions
shopt -s nocasematch # case-insensitive regular expressions

###
### configurable global variables
###

# these paths relative to project root
declare -a cask_paths=(Casks)
declare -a code_paths=(bin developer lib spec test brew-cask.rb Rakefile Gemfile Gemfile.lock .travis.yml .gitignore)
declare -a doc_paths=(doc LICENSE "*.md")
end_object="HEAD"

###
### global variables
###

cask_authors=''

# prefer GNU xargs
xargs="$(/usr/bin/which gxargs || printf '/usr/bin/xargs')"

###
### functions
###

warn () {
    local message="$@"
    message="${message//\\t/$'\011'}"
    message="${message//\\n/$'\012'}"
    message="${message%"${message##*[![:space:]]}"}"
    printf "%s\n" "$message" 1>&2
}

die () {
    warn "$@"
    exit 1
}

cd_to_project_root () {
    local script_dir="$(/usr/bin/dirname "$0")"
    cd "$script_dir"
    local git_root="$(git rev-parse --show-toplevel)"
    if [[ -z "$git_root" ]]; then
        die "ERROR: Could not find git project root"
    fi
    cd "$git_root"
}

warn_if_off_branch () {
    local wanted_branch='master'
    if [[ -n "$1" ]]; then
        wanted_branch="$1"
    fi

    local current_branch="$(git rev-parse --abbrev-ref HEAD)"
    if ! [[ "$current_branch" = "$wanted_branch" ]]; then
        warn "\nWARNING: you are running from branch '$current_branch', not '$wanted_branch'\n\n"
    fi
}

verify_git_object () {
    local object="$1"

    if ! git rev-parse --verify "$object" -- >/dev/null 2>&1; then
        die "\nERROR: No such commit object: '$object'\n\n"
    fi
}

print_contributor_stats () {
    local start_object="$1"
    local initial_commit="$2"

    printf "====================\n"
    printf "Contributors\n"
    printf "====================\n"

    local -a git_log_cmd=("git" "log" "--no-merges" "--format='%ae'" "${start_object}..${end_object}")
    printf "Unique contributors"
    if ! [[ "$start_object" = "$initial_commit" ]]; then
        printf " since %s" "${start_object#v}"
    fi
    printf "\n"
    cask_authors="$("${git_log_cmd[@]}" -- "${cask_paths[@]}" | /usr/bin/sort | /usr/bin/uniq | /usr/bin/wc -l)"
    printf "  Casks\t%s\n" "$cask_authors"
    printf "  code\t"
    "${git_log_cmd[@]}" -- "${code_paths[@]}" | /usr/bin/sort | /usr/bin/uniq | /usr/bin/wc -l
    printf "  docs\t"
    "${git_log_cmd[@]}" -- "${doc_paths[@]}"  | /usr/bin/sort | /usr/bin/uniq | /usr/bin/wc -l
    printf "  any\t"
    "${git_log_cmd[@]}" -- .                  | /usr/bin/sort | /usr/bin/uniq | /usr/bin/wc -l
    if ! [[ "$start_object" = "$initial_commit" ]]; then
        local alltime_contribs="$(git log --no-merges --format='%ae' "${initial_commit}".."${end_object}" -- . | /usr/bin/sort | /usr/bin/uniq | /usr/bin/wc -l)"
        local prior_contribs="$(git log --no-merges --format='%ae' "${initial_commit}".."${start_object}" -- . | /usr/bin/sort | /usr/bin/uniq | /usr/bin/wc -l)"
        # arithmetic removes whitespace
        ((alltime_contribs += 0))
        ((new_contribs      = alltime_contribs - prior_contribs))
        printf "\nAll-time contributors\t%s\n" "$alltime_contribs"
        printf "New contributors since %s\t%s\n" "${start_object#v}" "$new_contribs"
    fi
    printf "\n"
}

print_commit_stats () {
    local start_object="$1"
    local initial_commit="$2"

    printf "====================\n"
    printf "Commits\n"
    printf "====================\n"

    local -a git_log_cmd=("git" "log" "--no-merges" "--format='%ae'" "${start_object}..${end_object}")
    printf "Commit count"
    if ! [[ "$start_object" = "$initial_commit" ]]; then
        printf " since %s" "${start_object#v}"
    fi
    printf "\n"
    printf "  Casks\t"
    "${git_log_cmd[@]}" -- "${cask_paths[@]}" | /usr/bin/wc -l
    printf "  code\t"
    "${git_log_cmd[@]}" -- "${code_paths[@]}" | /usr/bin/wc -l
    printf "  docs\t"
    "${git_log_cmd[@]}" -- "${doc_paths[@]}"  | /usr/bin/wc -l
    printf "  any\t"
    "${git_log_cmd[@]}" -- .                  | /usr/bin/wc -l
    if ! [[ "$start_object" = "$initial_commit" ]]; then
        printf "\nAll-time commits\t"
        git log --no-merges --format='%ae' "${initial_commit}".."${end_object}" -- . | /usr/bin/wc -l
    fi
    printf "\n"
}

print_doc_stats () {
    local start_object="$1"
    local initial_commit="$2"

    printf "====================\n"
    printf "Docs\n"
    printf "====================\n"

    local -a git_log_cmd=("git" "log" "--no-merges" "--format='%ae'" "${start_object}..${end_object}")
    printf "Doc contributors"
    if ! [[ "$start_object" = "$initial_commit" ]]; then
        printf " since %s" "${start_object#v}"
    fi
    printf "\n  "
    "${git_log_cmd[@]}" -- "${doc_paths[@]}" | /usr/bin/sort | /usr/bin/uniq | \
      /usr/bin/egrep -v $'^\'(paul\\.t\\.hinze@gmail\\.com|fanquake@users\\.noreply\\.github\\.com|fanquake@gmail\\.com|info@vitorgalvao\\.com|calebcenter@live\\.com|hagins\\.josh@gmail\\.com|dragon\\.vctr@gmail\\.com|mail@sebastianroeder\\.de|github@adityadalal\\.com|adityadalal924@users\\.noreply\\.github\\.com)\'$' | \
      "$xargs" | /usr/bin/perl -pe 's{ }{, }g'           # '
    printf "\n"
}

print_cask_stats () {
    local start_object="$1"
    local initial_commit="$2"

    printf "====================\n"
    printf "Casks\n"
    printf "====================\n"

    if ! [[ "$start_object" = "$initial_commit" ]]; then
        local     new_casks="$(git diff --name-status "$start_object" "$end_object" -- "${cask_paths[@]}" | /usr/bin/grep '^A.*\.rb' | cut -f2 | /usr/bin/sort | /usr/bin/uniq | /usr/bin/wc -l)"
        local deleted_casks="$(git diff --name-status "$start_object" "$end_object" -- "${cask_paths[@]}" | /usr/bin/grep '^D.*\.rb' | cut -f2 | /usr/bin/sort | /usr/bin/uniq | /usr/bin/wc -l)"
        local updated_casks="$(git diff --name-status "$start_object" "$end_object" -- "${cask_paths[@]}" | /usr/bin/grep '^M.*\.rb' | cut -f2 | /usr/bin/sort | /usr/bin/uniq | /usr/bin/wc -l)"
        # arithmetic removes whitespace
        ((cask_authors  += 0))
        ((deleted_casks += 0))
        ((new_casks     -= deleted_casks))
        ((updated_casks += 0))
        printf "%s Casks added (%s updated) by %s contributors since %s\n" "$new_casks" "$updated_casks" "$cask_authors" "${start_object#v}"
    fi

    printf "Total current Casks in HEAD\t"
    /usr/bin/find "${cask_paths[@]}" -name '*.rb' | /usr/bin/wc -l
    printf "\n"
}

###
### main
###

_project_stats () {
    local arg_object="$1"

    cd_to_project_root
    warn_if_off_branch 'master'

    local initial_commit="$(git log --pretty=format:%H -- | /usr/bin/tail -1)"
    verify_git_object "$initial_commit"
    local start_object="$initial_commit"

    if [[ "$arg_object" = 'release' ]]; then
        start_object="$(./developer/bin/get_release_tag)"
    elif [[ -n "$arg_object" ]]; then
        start_object="$arg_object"
    fi
    verify_git_object "$start_object"

    print_contributor_stats "$start_object" "$initial_commit"
    print_commit_stats      "$start_object" "$initial_commit"
    print_doc_stats         "$start_object" "$initial_commit"
    print_cask_stats        "$start_object" "$initial_commit"
}

# process args
if [[ $1 =~ ^-+h(elp)?$ ]]; then
    printf "project_stats [ <commit-object> ]

With optional single argument, (eg a tag or commit-hash)
show statistics since that commit object.

Use the special argument 'release' to calculate since the
most recent tag (usually the same as the last release).

Without argument, show statistics since first commit.

"
    exit
fi

# dispatch main
_project_stats "${@}"
