From 8b788f04879e065a890aa7a12d97967e704d7726 Mon Sep 17 00:00:00 2001 From: Isabel Drost-Fromm Date: Tue, 13 Feb 2024 10:15:42 +0100 Subject: [PATCH] First stab at figuring out the number of very active mentors in a project. --- most_active_mentors.py | 149 ++++++++++++++++++++++++++++++++++++ test_most_active_mentors.py | 54 +++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 most_active_mentors.py create mode 100644 test_most_active_mentors.py diff --git a/most_active_mentors.py b/most_active_mentors.py new file mode 100644 index 0000000..8af2142 --- /dev/null +++ b/most_active_mentors.py @@ -0,0 +1,149 @@ +"""A module for measuring the number of very active mentors + +This module provides functions for measuring the number of active mentors on a project. + +This is measured by number of PR comments. We are working under the assumption that PR +comments are left in good faith to move contributors further instead of nitpicking and +dis-couraging them. + +Open questions: + - should there be an option to limit this to certain users, e.g. core maintainers? + - should there be a limit to how many comments per PR we consider to avoid having + the statistic dominated by contested PRs? + - should this metric count consecutive comments coming from the same user as only + one to avoid people unnessesarily splitting their comments to game the metric? + - instead of PR comments should we count PRs on which a username was seen as commenter? + +Functions: + collect_response_usernames( + issue: Union[github3.issues.Issue, None], + discussion: Union[dict, None], + pull_request: Union[github3.pulls.PullRequest, None], + max_comments_to_evaluate, + ) -> ____________ + Collect the number of responses per username for single item. Take only top n + comments (max_comments_to_evaluate) into consideration. + get_number_of_active_reviewers( + mentors: List [mentors with metrics) + ) -> int active_number + Count the number of mentors active at least n times + +""" +from datetime import datetime, timedelta +from typing import List, Union + +import github3 +import numpy + +from classes import IssueWithMetrics + + +def count_comments_per_user( + issue: Union[github3.issues.Issue, None], # type: ignore + discussion: Union[dict, None] = None, + pull_request: Union[github3.pulls.PullRequest, None] = None, + ready_for_review_at: Union[datetime, None] = None, + ignore_users: List[str] = None, + max_comments_to_eval = 20, +) -> dict: + """Count the number of times a user was seen commenting on a single item. + + Args: + issue (Union[github3.issues.Issue, None]): A GitHub issue. + discussion (Union[dict, None]): A GitHub discussion. + pull_request (Union[github3.pulls.PullRequest, None]): A GitHub pull request. + ignore_users (List[str]): A list of GitHub usernames to ignore. + max_comments_to_eval: Maximum number of comments per item to look at. + + Returns: + dict: A dictionary of usernames seen and number of comments they left. + + """ + if ignore_users is None: + ignore_users = [] + mentor_count = {} + + # Get the first comments + if issue: + comments = issue.issue.comments( + number=max_comments_to_eval, sort="created", direction="asc" + ) # type: ignore + for comment in comments: + if ignore_comment( + issue.issue.user, + comment.user, + ignore_users, + comment.created_at, + ready_for_review_at, + ): + continue + # increase the number of comments left by current user by 1 + if (comment.user.login in mentor_count): + mentor_count[comment.user.login] += 1 + else: + mentor_count[comment.user.login] = 1 + + # Check if the issue is actually a pull request + # so we may also get the first review comment time + if pull_request: + review_comments = pull_request.reviews(number=max_comments_to_eval) # type: ignore + for review_comment in review_comments: + if ignore_comment( + issue.issue.user, + review_comment.user, + ignore_users, + review_comment.submitted_at, + ready_for_review_at, + ): + continue + + # increase the number of comments left by current user by 1 + if (review_comment.user.login in mentor_count): + mentor_count[review_comment.user.login] += 1 + else: + mentor_count[review_comment.user.login] = 1 + + return mentor_count + + +def ignore_comment( + issue_user: github3.users.User, + comment_user: github3.users.User, + ignore_users: List[str], + comment_created_at: datetime, + ready_for_review_at: Union[datetime, None], +) -> bool: + """Check if a comment should be ignored.""" + return ( + # ignore comments by IGNORE_USERS + comment_user.login in ignore_users + # ignore comments by bots + or comment_user.type == "Bot" + # ignore comments by the issue creator + or comment_user.login == issue_user.login + # ignore comments created before the issue was ready for review + or (ready_for_review_at and comment_created_at < ready_for_review_at) + ) + + +def get_mentor_count( + mentor_activity: dict, + cutoff: int +) -> int: + """ Calculate the number of active mentors on the project. + + Args: + mentor_activity (dict: A dictionary with usernames to count of comments left. + cutoff (int: the minimum number of comments a user has to leave to count as active mentor.) + + Returns: + int: Number of active mentors + + """ + active_mentor_count = 0 + for mentor, count in mentor_activity.items(): + if (count >= cutoff): + active_mentor_count += 1 + + return active_mentor_count + diff --git a/test_most_active_mentors.py b/test_most_active_mentors.py new file mode 100644 index 0000000..76501e9 --- /dev/null +++ b/test_most_active_mentors.py @@ -0,0 +1,54 @@ +"""A module containing unit tests for the most_active_mentors module. + +This module contains unit tests for the count_comments_per_user and +get_mentor_count functions in the most_active_mentors module. +The tests use mock GitHub issues and comments to test the functions' behavior. + +Classes: + TestCountCommentsPerUser: A class to test the count_comments_per_user function. + TestGetMentorCount: A class to test the + get_mentor_count function. + +""" +import unittest +from datetime import datetime, timedelta +from unittest.mock import MagicMock + +from classes import IssueWithMetrics +from most_active_mentors import ( + count_comments_per_user, + get_mentor_count, +) + + +class TestCountCommentsPerUser(unittest.TestCase): + """Test the count_comments_per_user function.""" + + def test_count_comments_per_user(self): + """Test that count_comments_per_user correctly counts user comments. + + This test mocks the GitHub connection and issue comments, and checks that + count_comments_per_user correctly considers user comments for counting. + + """ + # Set up the mock GitHub issues + mock_issue1 = MagicMock() + mock_issue1.comments = 2 + mock_issue1.issue.user.login = "issue_owner" + mock_issue1.created_at = "2023-01-01T00:00:00Z" + + # Set up 21 mock GitHub issue comments - only 20 should be counted + mock_issue1.issue.comments.return_value = [] + for i in range(22): + mock_comment1 = MagicMock() + mock_comment1.user.login = "very_active_user" + mock_comment1.created_at = datetime.fromisoformat(f"2023-01-02T{i:02d}:00:00Z") + mock_issue1.issue.comments.return_value.append(mock_comment1) + + # Call the function + result = count_comments_per_user(mock_issue1) + expected_result = {"very_active_user": 20} + + # Check the results + self.assertEqual(result, expected_result) +