Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A histogram view on contributor statistics #2925

Merged
merged 1 commit into from
Feb 15, 2014
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
241 changes: 241 additions & 0 deletions developer/bin/the_long_tail
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
#!/usr/bin/env ruby
#
# the_long_tail
#
# A histogram view on contributor stats
#
# notes
#
# Since this script does not track file-renames in the git history, the
# dependence of Casks upon occasional contributors/non-maintainers can
# only be expressed as a range or lower bound.
#

###
### dependencies
###

require 'open3'
require 'set'

###
### configurable constants
###

BINS = [
(1..10).to_a,
100,
1000,
].flatten

OCCASIONAL_CUTOFF = 5

CASK_PATH = 'Casks'

MAINTAINERS = %w[
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
]

###
### git methods
###

def cd_to_project_root
Dir.chdir File.dirname(File.expand_path(__FILE__))
@git_root ||= Open3.popen3(*%w[
git rev-parse --show-toplevel
]) do |stdin, stdout, stderr|
begin
stdout.gets.chomp
rescue
end
end
Dir.chdir @git_root
@git_root
end

def authors
@authors ||= Open3.popen3(*%w[
git log --no-merges --format=%ae --
]) do |stdin, stdout, stderr|
h = {}
stdout.each_line do |line|
line.chomp!
h[line] ||= 0
h[line] += 1
end
h
end
end

def casks_by_author
@casks_by_author ||= Open3.popen3(*%w[
git log --no-merges --name-only --format=%ae --
],
CASK_PATH) do |stdin, stdout, stderr|
email = nil
h = {}
stdout.each_line.to_a.join('').split("\n\n").each do |paragraph|
if paragraph.include?('Casks/')
lines=paragraph.split("\n")
email = lines.pop
h[email] ||= Set.new
h[email].merge(lines.compact)
else
email = paragraph.chomp
end
end
h
end
end

###
### filesystem methods
###

def all_casks
@all_casks ||= Open3.popen3('/usr/bin/find',
CASK_PATH,
*%w[-type f -name *.rb]
) do |stdin, stdout, stderr|
stdout.each_line.map(&:chomp)
end
end

###
### analysis and report methods
###

def histogram
if @histogram.nil?
@histogram = Hash[*BINS.map{ |elt| [elt, 0] }.flatten]
authors.each do |name, num_commits|
bottom = 0
BINS.each do |top|
if num_commits >= bottom and num_commits < top
@histogram[bottom] += 1
end
bottom = top
end
end
end
@histogram
end

def historic_occasional_cask_set
@historic_occasional_cask_set = authors.each.collect do |name, num_commits|
if num_commits > OCCASIONAL_CUTOFF
nil
elsif ! casks_by_author.key?(name)
nil
else
casks_by_author[name].to_a
end
end.flatten.compact.to_set
end

def extant_occasional_cask_count
# avoid double-counting renames by intersecting with extant Casks
historic_occasional_cask_set.intersection(all_casks).count
end

def historic_nonmaintainer_cask_set
@historic_nonmaintainer_cask_set = authors.each.collect do |name, num_commits|
if MAINTAINERS.include?(name)
nil
else
casks_by_author[name].to_a
end
end.flatten.compact.to_set
end

def extant_nonmaintainer_cask_count
# avoid double-counting renames by intersecting with extant Casks
historic_nonmaintainer_cask_set.intersection(all_casks).count
end

def extant_occasional_cask_percentage
@extant_occasional_cask_percentage ||= (100 * extant_occasional_cask_count / all_casks.count).to_i
end

def historic_occasional_cask_percentage
@historic_occasional_cask_percentage ||= (100 * historic_occasional_cask_set.count / all_casks.count).to_i
end

def extant_nonmaintainer_cask_percentage
@extant_nonmaintainer_cask_percentage ||= (100 * extant_nonmaintainer_cask_count / all_casks.count).to_i
end

def historic_nonmaintainer_cask_percentage
# this is so large, it might cross 100%
@historic_nonmaintainer_cask_percentage ||= [100, (100 * historic_nonmaintainer_cask_set.count / all_casks.count).to_i].min
end

def onetime_author_percentage
@onetime_author_percentage ||= (100 *
histogram[1] /
authors.length).to_i
end

def occasional_author_percentage
# why is it so hard to slice a hash?
@occasional_author_percentage ||= (100 *
(1 .. OCCASIONAL_CUTOFF).to_a.collect{ |bin| histogram[bin] }.reduce(:+) /
authors.length).to_i
end

def graph_width
if @graph_width.nil?
@graph_width = `/bin/stty size 2>/dev/null`.chomp.split(" ").last.to_i
@graph_width = 80 if @graph_width <= 0
@graph_width -= 20 if @graph_width > 20
end
@graph_width
end

def graph_normalization
@graph_normalization ||= histogram.values.max.to_f
end

def print_header
puts "Commits\tContributors"
puts "---------------------"
end

def print_table
BINS.each do |bin|
plural = (bin % 10) == 0 ? "'s" : ''
graph = '.' * ((histogram[bin]/graph_normalization) * graph_width)
puts "#{bin}#{plural}\t#{histogram[bin]}\t#{graph}"
end
end

def print_footer
puts %Q[\n#{occasional_author_percentage}% of contributors are "occasional" (with <= #{OCCASIONAL_CUTOFF} commits)]
puts "\n#{onetime_author_percentage}% of contributors commit only once"
puts "\n#{extant_occasional_cask_percentage}% - #{historic_occasional_cask_percentage}% of Casks depend on an occasional contributor"
puts "\n#{extant_nonmaintainer_cask_percentage}% - #{historic_nonmaintainer_cask_percentage}% of Casks depend on a contributor who is not a maintainer"
puts "\n"
end

def generate_report
print_header
print_table
print_footer
end

###
### main
###

cd_to_project_root
generate_report