diff --git a/developer/bin/the_long_tail b/developer/bin/the_long_tail new file mode 100755 index 0000000000000..5553c31ad0b28 --- /dev/null +++ b/developer/bin/the_long_tail @@ -0,0 +1,241 @@ +#!/usr/bin/env ruby +# +# the_long_tail +# +# A histogram view on contributor stats +# +# notes +# +# Since this script does not track file-renames in the git history, the +# dependence of Casks upon occasional contributors/non-maintainers can +# only be expressed as a range or lower bound. +# + +### +### dependencies +### + +require 'open3' +require 'set' + +### +### configurable constants +### + +BINS = [ + (1..10).to_a, + 100, + 1000, + ].flatten + +OCCASIONAL_CUTOFF = 5 + +CASK_PATH = 'Casks' + +MAINTAINERS = %w[ + paul.t.hinze@gmail.com + fanquake@users.noreply.github.com + fanquake@gmail.com + kevin@suttle.io + leoj3n@gmail.com + nano@fdp.io + nanoid.xd@gmail.com + me@passcod.name + walker@pobox.com + info@vitorgalvao.com + ] + +### +### git methods +### + +def cd_to_project_root + Dir.chdir File.dirname(File.expand_path(__FILE__)) + @git_root ||= Open3.popen3(*%w[ + git rev-parse --show-toplevel + ]) do |stdin, stdout, stderr| + begin + stdout.gets.chomp + rescue + end + end + Dir.chdir @git_root + @git_root +end + +def authors + @authors ||= Open3.popen3(*%w[ + git log --no-merges --format=%ae -- + ]) do |stdin, stdout, stderr| + h = {} + stdout.each_line do |line| + line.chomp! + h[line] ||= 0 + h[line] += 1 + end + h + end +end + +def casks_by_author + @casks_by_author ||= Open3.popen3(*%w[ + git log --no-merges --name-only --format=%ae -- + ], + CASK_PATH) do |stdin, stdout, stderr| + email = nil + h = {} + stdout.each_line.to_a.join('').split("\n\n").each do |paragraph| + if paragraph.include?('Casks/') + lines=paragraph.split("\n") + email = lines.pop + h[email] ||= Set.new + h[email].merge(lines.compact) + else + email = paragraph.chomp + end + end + h + end +end + +### +### filesystem methods +### + +def all_casks + @all_casks ||= Open3.popen3('/usr/bin/find', + CASK_PATH, + *%w[-type f -name *.rb] + ) do |stdin, stdout, stderr| + stdout.each_line.map(&:chomp) + end +end + +### +### analysis and report methods +### + +def histogram + if @histogram.nil? + @histogram = Hash[*BINS.map{ |elt| [elt, 0] }.flatten] + authors.each do |name, num_commits| + bottom = 0 + BINS.each do |top| + if num_commits >= bottom and num_commits < top + @histogram[bottom] += 1 + end + bottom = top + end + end + end + @histogram +end + +def historic_occasional_cask_set + @historic_occasional_cask_set = authors.each.collect do |name, num_commits| + if num_commits > OCCASIONAL_CUTOFF + nil + elsif ! casks_by_author.key?(name) + nil + else + casks_by_author[name].to_a + end + end.flatten.compact.to_set +end + +def extant_occasional_cask_count + # avoid double-counting renames by intersecting with extant Casks + historic_occasional_cask_set.intersection(all_casks).count +end + +def historic_nonmaintainer_cask_set + @historic_nonmaintainer_cask_set = authors.each.collect do |name, num_commits| + if MAINTAINERS.include?(name) + nil + else + casks_by_author[name].to_a + end + end.flatten.compact.to_set +end + +def extant_nonmaintainer_cask_count + # avoid double-counting renames by intersecting with extant Casks + historic_nonmaintainer_cask_set.intersection(all_casks).count +end + +def extant_occasional_cask_percentage + @extant_occasional_cask_percentage ||= (100 * extant_occasional_cask_count / all_casks.count).to_i +end + +def historic_occasional_cask_percentage + @historic_occasional_cask_percentage ||= (100 * historic_occasional_cask_set.count / all_casks.count).to_i +end + +def extant_nonmaintainer_cask_percentage + @extant_nonmaintainer_cask_percentage ||= (100 * extant_nonmaintainer_cask_count / all_casks.count).to_i +end + +def historic_nonmaintainer_cask_percentage + # this is so large, it might cross 100% + @historic_nonmaintainer_cask_percentage ||= [100, (100 * historic_nonmaintainer_cask_set.count / all_casks.count).to_i].min +end + +def onetime_author_percentage + @onetime_author_percentage ||= (100 * + histogram[1] / + authors.length).to_i +end + +def occasional_author_percentage + # why is it so hard to slice a hash? + @occasional_author_percentage ||= (100 * + (1 .. OCCASIONAL_CUTOFF).to_a.collect{ |bin| histogram[bin] }.reduce(:+) / + authors.length).to_i +end + +def graph_width + if @graph_width.nil? + @graph_width = `/bin/stty size 2>/dev/null`.chomp.split(" ").last.to_i + @graph_width = 80 if @graph_width <= 0 + @graph_width -= 20 if @graph_width > 20 + end + @graph_width +end + +def graph_normalization + @graph_normalization ||= histogram.values.max.to_f +end + +def print_header + puts "Commits\tContributors" + puts "---------------------" +end + +def print_table + BINS.each do |bin| + plural = (bin % 10) == 0 ? "'s" : '' + graph = '.' * ((histogram[bin]/graph_normalization) * graph_width) + puts "#{bin}#{plural}\t#{histogram[bin]}\t#{graph}" + end +end + +def print_footer + puts %Q[\n#{occasional_author_percentage}% of contributors are "occasional" (with <= #{OCCASIONAL_CUTOFF} commits)] + puts "\n#{onetime_author_percentage}% of contributors commit only once" + puts "\n#{extant_occasional_cask_percentage}% - #{historic_occasional_cask_percentage}% of Casks depend on an occasional contributor" + puts "\n#{extant_nonmaintainer_cask_percentage}% - #{historic_nonmaintainer_cask_percentage}% of Casks depend on a contributor who is not a maintainer" + puts "\n" +end + +def generate_report + print_header + print_table + print_footer +end + +### +### main +### + +cd_to_project_root +generate_report