#!/usr/bin/env ruby
#
# the_long_tail
#
# A histogram view on contributor stats
#
# notes
#
# Since this script does not track file-renames in the git history, the
# dependence of Casks upon occasional contributors/non-maintainers can
# only be expressed as a range or lower bound.
#

###
### dependencies
###

require 'open3'
require 'set'

###
### configurable constants
###

BINS = [
        (1..10).to_a,
        100,
        1000,
       ].flatten

OCCASIONAL_CUTOFF = 5

CASK_PATH = 'Casks'

# all maintainers, past and present
MAINTAINERS = %w[
                  paul.t.hinze@gmail.com
                  fanquake@users.noreply.github.com
                  fanquake@gmail.com
                  kevin@suttle.io
                  leoj3n@gmail.com
                  nano@fdp.io
                  nanoid.xd@gmail.com
                  me@passcod.name
                  walker@pobox.com
                  info@vitorgalvao.com
                  calebcenter@live.com
                  ndr@qef.io
                  josh@joshbutts.com
                  goxberry@gmail.com
                  radek.simko@gmail.com
                  federicobond@gmail.com
                  claui@users.noreply.github.com
                  amorymeltzer@gmail.com
                  hagins.josh@gmail.com
                  dragon.vctr@gmail.com
                  mail@sebastianroeder.de
                  github@adityadalal.com
                  adityadalal924@users.noreply.github.com
                 ]

###
### git methods
###

def cd_to_project_root
  Dir.chdir File.dirname(File.expand_path(__FILE__))
  @git_root ||= Open3.popen3(*%w[
                                 git rev-parse --show-toplevel
                                ]) do |stdin, stdout, stderr|
    begin
      stdout.gets.chomp
    rescue
    end
  end
  Dir.chdir @git_root
  @git_root
end

def authors
  @authors ||= Open3.popen3(*%w[
                                git log --no-merges --format=%ae --
                               ]) do |stdin, stdout, stderr|
    h = {}
    stdout.each_line do |line|
      line.chomp!
      h[line] ||= 0
      h[line]  += 1
    end
    h
  end
end

def casks_by_author
  @casks_by_author ||= Open3.popen3(*%w[
                                        git log --no-merges --name-only --format=%ae --
                                       ],
                                    CASK_PATH) do |stdin, stdout, stderr|
    email = nil
    h = {}
    stdout.each_line.to_a.join('').split("\n\n").each do |paragraph|
      if paragraph.include?('Casks/')
        lines=paragraph.split("\n")
        email = lines.pop
        h[email] ||= Set.new
        h[email].merge(lines.compact)
      else
        email = paragraph.chomp
      end
    end
    h
   end
end

###
### filesystem methods
###

def all_casks
  @all_casks ||= Open3.popen3('/usr/bin/find',
                               CASK_PATH,
                               *%w[-type f -name *.rb]
                              ) do |stdin, stdout, stderr|
    stdout.each_line.map(&:chomp)
  end
end

###
### analysis and report methods
###

def histogram
  if @histogram.nil?
    @histogram = Hash[*BINS.map{ |elt| [elt, 0] }.flatten]
    authors.each do |name, num_commits|
      bottom = 0
      BINS.each do |top|
        if num_commits >= bottom and num_commits < top
          @histogram[bottom] += 1
        end
        bottom = top
      end
    end
  end
  @histogram
end

def historic_occasional_cask_set
  @historic_occasional_cask_set = authors.each.collect do |name, num_commits|
    if num_commits > OCCASIONAL_CUTOFF
      nil
    elsif ! casks_by_author.key?(name)
      nil
    else
      casks_by_author[name].to_a
    end
  end.flatten.compact.to_set
end

def extant_occasional_cask_count
  # avoid double-counting renames by intersecting with extant Casks
  historic_occasional_cask_set.intersection(all_casks).count
end

def historic_nonmaintainer_cask_set
  @historic_nonmaintainer_cask_set = authors.each.collect do |name, num_commits|
    if MAINTAINERS.include?(name)
      nil
    else
      casks_by_author[name].to_a
    end
  end.flatten.compact.to_set
end

def extant_nonmaintainer_cask_count
  # avoid double-counting renames by intersecting with extant Casks
  historic_nonmaintainer_cask_set.intersection(all_casks).count
end

def extant_occasional_cask_percentage
  @extant_occasional_cask_percentage ||= (100 * extant_occasional_cask_count / all_casks.count).to_i
end

def historic_occasional_cask_percentage
  @historic_occasional_cask_percentage ||= (100 * historic_occasional_cask_set.count / all_casks.count).to_i
end

def extant_nonmaintainer_cask_percentage
  @extant_nonmaintainer_cask_percentage ||= (100 * extant_nonmaintainer_cask_count / all_casks.count).to_i
end

def historic_nonmaintainer_cask_percentage
  # this is so large, it might cross 100%
  @historic_nonmaintainer_cask_percentage ||= [100, (100 * historic_nonmaintainer_cask_set.count / all_casks.count).to_i].min
end

def onetime_author_percentage
  @onetime_author_percentage ||= (100 *
                                  histogram[1] /
                                  authors.length).to_i
end

def occasional_author_percentage
  # why is it so hard to slice a hash?
  @occasional_author_percentage ||= (100 *
                                 (1 .. OCCASIONAL_CUTOFF).to_a.collect{ |bin| histogram[bin] }.reduce(:+) /
                                 authors.length).to_i
end

def graph_width
  if @graph_width.nil?
    @graph_width  = `/bin/stty size 2>/dev/null`.chomp.split(" ").last.to_i
    @graph_width  = 80 if @graph_width <= 0
    @graph_width -= 20 if @graph_width > 20
  end
  @graph_width
end

def graph_normalization
  @graph_normalization ||= histogram.values.max.to_f
end

def print_header
  puts "Commits\tContributors"
  puts "---------------------"
end

def print_table
  BINS.each do |bin|
    plural = (bin % 10) == 0 ? "'s" : ''
    graph = '.' * ((histogram[bin]/graph_normalization) * graph_width)
    puts "#{bin}#{plural}\t#{histogram[bin]}\t#{graph}"
  end
end

def print_footer
  puts %Q[\n#{occasional_author_percentage}% of contributors are "occasional" (with <= #{OCCASIONAL_CUTOFF} commits)]
  puts "\n#{onetime_author_percentage}% of contributors commit only once"
  puts "\n#{extant_occasional_cask_percentage}% - #{historic_occasional_cask_percentage}% of Casks depend on an occasional contributor"
  puts "\n#{extant_nonmaintainer_cask_percentage}% - #{historic_nonmaintainer_cask_percentage}% of Casks depend on a contributor who is not a maintainer"
  puts "\n"
end

def generate_report
  print_header
  print_table
  print_footer
end

###
### main
###

cd_to_project_root
generate_report
