Skip to content

Commit

Permalink
better & updated visualizations
Browse files Browse the repository at this point in the history
  • Loading branch information
ronikaufman committed Nov 27, 2024
1 parent 78eab80 commit b48ad6c
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 34 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
76 changes: 42 additions & 34 deletions code/myriad/loam_paper/visualization/script.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,47 @@
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

repos_info = pd.read_json("../dataset/repos_info.json")
all_loggedin_contributors = pd.read_json("../dataset/all_loggedin_contributors.json")

with open("../dataset/categories_info.json", "r") as f:
categories_info = json.load(f)
category_names = [c["category"] for c in categories_info]
category_names.sort()

def avg_contributors_per_category_barplot():
# average number of anon/loggedin contributors per category
data = []
for cat_info in categories_info:
category = cat_info["category"]
repos = repos_info.loc[repos_info["category"] == category]
avg_anonymous = repos.loc[:, "anonymous_contributors"].mean()
avg_loggedin = repos.loc[:, "loggedin_contributors"].mean()
if avg_anonymous == avg_anonymous: data.append(
{
avg_anonymous = repos.loc[:, "anonymous_contributors"].mean()
if avg_anonymous is not None:
data.append({
"category": category,
"logged-in": avg_loggedin,
"anonymous": avg_anonymous
}
)
})
data = pd.DataFrame(data)
data.set_index("category").plot(kind="bar", stacked=False, color=["steelblue", "red"])
plt.yscale("log")
plt.xlabel("category")
plt.ylabel("average number of contributors")
plt.show()

def contributions_years_scatterplot():
# relationship between date created and number of contributions
g = sns.relplot(
data=repos_info,
x="created_at", y="total_contributions",
hue="category", size=10
)
g.set(yscale="log")
plt.xticks(rotation=90)
plt.xlabel("year of creation/apparition on GitHub")
plt.ylabel("total number of contributions")
fig = plt.figure()
ax = fig.add_subplot(111)
ax2 = ax.twinx()

data.set_index("category").plot(kind="bar", color=["red", "orange"], stacked=False, ax=ax, width=0.5, position=0.5)
x = [cat["category"] for cat in categories_info]
y = [len(cat["repos"]) for cat in categories_info]
second_color = "#377eb8"
ax2.plot(x, y, ".", markersize=15)

ax.set_ylabel("average number of contributors")
ax.set_yscale("log")
ax.legend(title="type of contributor", loc="upper left")
ax2.set_ylabel("number of repos", color=second_color)
ax2.tick_params(axis="y", colors=second_color)
ax2.spines["right"].set(color=second_color, linewidth=2)

plt.show()

def repos_per_year_barplot():
Expand Down Expand Up @@ -72,16 +70,27 @@ def multi_contributors_barplot():
idx = 0
while all_loggedin_contributors.iloc[idx]["type"] == "Bot":
idx+=1
data = [{"number of repos contributed to": i+1, "number of accounts": 0} for i in range(len(all_loggedin_contributors.iloc[idx]["contributions"]))]
data = [{"number of repos contributed to": i+1, "one category": 0, "multiple categories": 0} for i in range(len(all_loggedin_contributors.iloc[idx]["contributions"]))]
for index, row in all_loggedin_contributors.iterrows():
if (row["type"] == "Bot"): continue
nb_repos_contributed_to = len(row["contributions"])-1
data[nb_repos_contributed_to]["number of accounts"] += 1
data = pd.DataFrame(data)
g = sns.barplot(data, x="number of repos contributed to", y="number of accounts", color="orange")
g.bar_label(g.containers[0])
plt.yscale("log")
contributions = row["contributions"]
categories = []
for contrib in contributions:
for cat in categories_info:
if contrib["repo_name"] in cat["repos"]:
categories.append(cat["category"])
break
categories = list(set(categories))
nb_repos_contributed_to = len(contributions)-1
if len(categories) == 1:
data[nb_repos_contributed_to]["one category"] += 1
else:
data[nb_repos_contributed_to]["multiple categories"] += 1
data = pd.DataFrame(data[1:])
data.set_index("number of repos contributed to").plot(kind="bar", stacked=False)
plt.ylabel("user count")
plt.yscale("log")
plt.legend(title="type of contributions")
plt.show()

def exclusive_repos_per_artwork_barplot():
Expand All @@ -91,7 +100,7 @@ def exclusive_repos_per_artwork_barplot():
artworks.remove("loam")
artworks.sort()
data = []
for a in ["A ... past", "Apocryph", "Dear Ai", "Glommen", "... Sand Sorter", "Megatouch", "Pain", "RELAX...", "Why ... this?"]:
for a in ["ANSDP", "A", "DA", "G", "ISS", "M", "PC", "R", "WAIST"]:
o = {"exclusivity": a}
for cat in category_names:
o[cat] = 0
Expand All @@ -103,14 +112,13 @@ def exclusive_repos_per_artwork_barplot():
category = row["category"]
data[idx][category] += 1
data = pd.DataFrame(data)
data.set_index("exclusivity").plot(kind="bar", stacked=True)
data.set_index("exclusivity").plot(kind="bar", stacked=True, cmap="Set1")
plt.xlabel("artworks")
plt.ylabel("number of exclusive repositories")
plt.legend(title="category")
plt.show()

avg_contributors_per_category_barplot()
#contributions_years_scatterplot()
repos_per_year_barplot()
#repos_per_year_barplot()
multi_contributors_barplot()
exclusive_repos_per_artwork_barplot()
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit b48ad6c

Please sign in to comment.