From 3de4ec2f7a4c91224f6727e8ae17c60e488d7192 Mon Sep 17 00:00:00 2001 From: Ksenia Berezina Date: Thu, 27 May 2021 18:11:55 -0400 Subject: [PATCH] Issue #3574: Incorporate bugbug ml labelling into moderation process --- config/environment.py | 3 ++ .../webhooks/private_issue_opened.json | 3 +- .../webhooks/private_milestone_accepted.json | 1 + .../private_milestone_ml_autoclosed.json | 42 ++++++++++++++++ tests/unit/test_issues.py | 14 ++++-- tests/unit/test_webhook.py | 23 +++++++++ tests/unit/test_webhook_model.py | 43 +++++++++++++--- webcompat/issues.py | 49 ++++++++++++++----- webcompat/webhooks/helpers.py | 28 +++++++++-- webcompat/webhooks/ml.py | 18 +++++-- webcompat/webhooks/model.py | 42 +++++++++++++--- 11 files changed, 224 insertions(+), 42 deletions(-) create mode 100644 tests/fixtures/webhooks/private_milestone_ml_autoclosed.json diff --git a/config/environment.py b/config/environment.py index 387f9ad76..fb7057f3d 100644 --- a/config/environment.py +++ b/config/environment.py @@ -41,6 +41,7 @@ ) BUGBUG_HTTP_SERVER = "https://bugbug.herokuapp.com" CLASSIFIER_PATH = "needsdiagnosis/predict/github/webcompat/web-bugs-private" # noqa + AUTOCLOSED_MILESTONE_ID = 15 if STAGING: GITHUB_CLIENT_ID = os.environ.get('STAGING_GITHUB_CLIENT_ID') @@ -59,6 +60,7 @@ ) BUGBUG_HTTP_SERVER = "https://bugbug.herokuapp.com" CLASSIFIER_PATH = "needsdiagnosis/predict/github/webcompat/webcompat-tests-private" # noqa + AUTOCLOSED_MILESTONE_ID = 5 if LOCALHOST: # for now we are using .env only on localhost @@ -82,6 +84,7 @@ ) BUGBUG_HTTP_SERVER = "http://0.0.0.0:8000" CLASSIFIER_PATH = "needsdiagnosis/predict/github/webcompat/webcompat-tests-private" # noqa + AUTOCLOSED_MILESTONE_ID = 5 # BUG STATUS # The id will be initialized when the app is started. diff --git a/tests/fixtures/webhooks/private_issue_opened.json b/tests/fixtures/webhooks/private_issue_opened.json index d680bb970..796a8a853 100644 --- a/tests/fixtures/webhooks/private_issue_opened.json +++ b/tests/fixtures/webhooks/private_issue_opened.json @@ -16,6 +16,7 @@ "description": "issue in the process of being moderated" } ], - "state": "open" + "state": "open", + "html_url": "https://github.com/webcompat/webcompat-tests-private/issues/600" } } diff --git a/tests/fixtures/webhooks/private_milestone_accepted.json b/tests/fixtures/webhooks/private_milestone_accepted.json index 6bd5707c2..c38621182 100644 --- a/tests/fixtures/webhooks/private_milestone_accepted.json +++ b/tests/fixtures/webhooks/private_milestone_accepted.json @@ -16,6 +16,7 @@ "description": "issue in the process of being moderated" } ], + "html_url": "https://github.com/webcompat/webcompat-tests-private/issues/600", "state": "open", "milestone": { "url": "https://api.github.com/repos/webcompat/webcompat-tests-private/milestones/2", diff --git a/tests/fixtures/webhooks/private_milestone_ml_autoclosed.json b/tests/fixtures/webhooks/private_milestone_ml_autoclosed.json new file mode 100644 index 000000000..8e5a7e232 --- /dev/null +++ b/tests/fixtures/webhooks/private_milestone_ml_autoclosed.json @@ -0,0 +1,42 @@ +{ + "action": "milestoned", + "issue": { + "title": "www.netflix.com - test private issue autoclosed", + "repository_url": "https://api.github.com/repos/webcompat/webcompat-tests-private", + "number": 600, + "html_url": "https://github.com/webcompat/webcompat-tests-private/issues/600", + "body": "\n\n\n\n\n**URL**: https://www.netflix.com/", + "labels": [ + { + "id": 1788251357, + "node_id": "MDU6TGFiZWwxNzg4MjUxMzU3", + "url": "https://api.github.com/repos/webcompat/webcompat-tests/labels/action-needsmoderation", + "name": "action-needsmoderation", + "color": "d36200", + "default": false, + "description": "issue in the process of being moderated" + } + ], + "state": "open", + "milestone": { + "url": "https://api.github.com/repos/webcompat/webcompat-tests-private/milestones/5", + "html_url": "https://github.com/webcompat/webcompat-tests-private/milestone/5", + "labels_url": "https://api.github.com/repos/webcompat/webcompat-tests-private/milestones/5/labels", + "id": 6795520, + "node_id": "MDk6TWlsZXN0b25lNjc5NTUyMA==", + "number": 5, + "title": "ml-autoclosed", + "description": "Issues closed automatically based on ml prediction", + "open_issues": 1, + "closed_issues": 3, + "state": "open", + "created_at": "2021-05-26T20:24:14Z", + "updated_at": "2021-05-27T16:56:08Z", + "due_on": null, + "closed_at": null + } + }, + "milestone": { + "title": "ml-autoclosed" + } + } diff --git a/tests/unit/test_issues.py b/tests/unit/test_issues.py index dd1ba67b0..ffc886ed1 100644 --- a/tests/unit/test_issues.py +++ b/tests/unit/test_issues.py @@ -132,6 +132,13 @@ def test_moderation_template_rejected(setup): assert 'Its original content has been deleted' in actual['body'] +def test_moderation_template_autoclosed(setup): + """Check the return values are for the rejected case.""" + actual = moderation_template('autoclosed') + assert actual['title'] == 'Issue closed.' + assert 'We have closed this issue\nautomatically as we suspect it is invalid' in actual['body'] # noqa + + def test_moderation_template_ongoing(setup): """Check the return values are for the needsmoderation case.""" # test the default @@ -142,7 +149,6 @@ def test_moderation_template_ongoing(setup): actual = moderation_template('ongoing') assert actual['title'] == 'In the moderation queue.' assert 'put in the moderation queue.' in actual['body'] - # bad keyword, we go back to the default. - actual = moderation_template('punkcat') - assert actual['title'] == 'In the moderation queue.' - assert 'put in the moderation queue.' in actual['body'] + # bad keyword causes error. + with pytest.raises(ValueError): + moderation_template('punkcat') diff --git a/tests/unit/test_webhook.py b/tests/unit/test_webhook.py index afab60b65..edf220f12 100644 --- a/tests/unit/test_webhook.py +++ b/tests/unit/test_webhook.py @@ -454,6 +454,21 @@ def test_prepare_rejected_issue(self): self.assertEqual(type(actual), dict) self.assertEqual(actual, expected) + def test_prepare_rejected_issue_autoclosed(self): + """Test payload for the rejected issue that we want to auto close.""" + expected = { + 'body': '

Thanks for the report. We have closed this issue\n' + 'automatically as we suspect it is invalid. If we made ' + 'a mistake, please\nfile a new issue and try to provide ' + 'more context.

', + 'labels': ['bugbug-probability-high'], + 'milestone': 8, + 'state': 'closed', + 'title': 'Issue closed.'} + actual = helpers.prepare_rejected_issue("autoclosed") + self.assertEqual(type(actual), dict) + self.assertEqual(actual, expected) + @patch('webcompat.webhooks.ml.make_classification_request') def test_get_issue_classification(self, mock_class): """Make only one request if it returns 200 status code right away. @@ -479,6 +494,14 @@ def test_get_issue_classification_exception(self, mock_class, mock_time): assert mock_class.call_count == 4 + def test_prepare_private_url(self): + """Test private issue url is in the right format.""" + expected = '\n\n' # noqa + actual = helpers.prepare_private_url( + 'https://github.com/webcompat/webcompat-tests-private/issues/600' + ) + self.assertEqual(actual, expected) + if __name__ == '__main__': unittest.main() diff --git a/tests/unit/test_webhook_model.py b/tests/unit/test_webhook_model.py index 98494544e..64b8f2007 100644 --- a/tests/unit/test_webhook_model.py +++ b/tests/unit/test_webhook_model.py @@ -18,6 +18,8 @@ from tests.unit.test_webhook import event_data from webcompat.webhooks.model import WebHookIssue +AUTOCLOSED_MILESTONE_ID = webcompat.app.config['AUTOCLOSED_MILESTONE_ID'] + # Some expected responses as tuples accepted = ('Moderated issue accepted', 200, {'Content-Type': 'text/plain'}) rejected = ('Moderated issue rejected', 200, {'Content-Type': 'text/plain'}) @@ -25,6 +27,8 @@ 200, {'Content-Type': 'text/plain'}) invalid = ('Moderated issue closed as invalid', 200, {'Content-Type': 'text/plain'}) +autoclosed = ('Issue closed as invalid by ml bot', + 200, {'Content-Type': 'text/plain'}) boring = ('Not an interesting hook', 403, {'Content-Type': 'text/plain'}) gracias = ('gracias, amigo.', 200, {'Content-Type': 'text/plain'}) wrong_repo = ('Wrong repository', 403, {'Content-Type': 'text/plain'}) @@ -40,7 +44,9 @@ 'public_url': 'https://github.com/webcompat/webcompat-tests/issues/1', 'repository_url': 'https://api.github.com/repos/webcompat/webcompat-tests-private', # noqa 'title': 'www.netflix.com - test valid event', - 'host_reported_from': ''} + 'host_reported_from': '', + 'html_url': 'https://github.com/webcompat/webcompat-tests-private/issues/600' # noqa +} issue_info2 = { 'action': 'milestoned', 'state': 'open', 'milestoned_with': 'accepted', @@ -50,7 +56,9 @@ 'public_url': 'https://github.com/webcompat/webcompat-tests/issues/1', 'repository_url': 'https://api.github.com/repos/webcompat/webcompat-tests-private', # noqa 'title': 'www.netflix.com - test private issue accepted', - 'host_reported_from': ''} + 'host_reported_from': '', + 'html_url': 'https://github.com/webcompat/webcompat-tests-private/issues/600' # noqa +} def test_model_instance(): @@ -164,6 +172,21 @@ def test_closing_public_issues(mock_mr): assert issue.get_public_issue_number() in uri +@patch('webcompat.webhooks.model.make_request') +def test_autoclose_public_issue(mock_mr): + """Test API request that is sent to GitHub for auto closed issue.""" + json_event, signature = event_data('private_milestone_ml_autoclosed.json') + payload = json.loads(json_event) + issue = WebHookIssue.from_dict(payload) + issue.close_public_issue(reason='autoclosed') + method, uri, data = mock_mr.call_args[0] + # make sure we sent a patch with the right data to GitHub + assert method == 'patch' + assert type(data) == dict + assert issue.get_public_issue_number() in uri + assert issue.html_url in data['body'] + + def test_prepare_public_comment(): """Test we prepare the right comment body.""" expected_payload = '{"body": "[Original issue 1](https://github.com/webcompat/webcompat-tests/issues/1)"}' # noqa @@ -306,9 +329,9 @@ def test_process_issue_action_scenarios(mock_mr, mock_classification): ('private_milestone_closed_unmoderated.json', rejected), ('private_milestone_accepted_incomplete.json', incomplete), ('private_milestone_accepted_invalid.json', invalid), - ('private_milestone_accepted_closed.json', boring), ('private_issue_opened.json', comment_added), - ('public_milestone_needscontact.json', outreach_comment_added) + ('public_milestone_needscontact.json', outreach_comment_added), + ('private_milestone_ml_autoclosed.json', autoclosed), ] mock_classification.return_value = ( {'prob': [0.03385603427886963, 0.9661439657211304], 'class': 1} @@ -349,6 +372,9 @@ def test_process_issue_action_github_api_exception(mock_mr, caplog): 'close_public_issue'), ('private_milestone_accepted_incomplete.json', 'private:closing public issue as incomplete failed', + 'close_public_issue'), + ('private_milestone_ml_autoclosed.json', + 'private:closing public issue as invalid by ml-bot failed', 'close_public_issue') ] for scenario in scenarios: @@ -376,6 +402,7 @@ def test_process_issue_action_close_scenarios(mock_close, mock_mr): ('private_milestone_closed_unmoderated.json', 'rejected'), ('private_milestone_accepted_incomplete.json', 'incomplete'), ('private_milestone_accepted_invalid.json', 'invalid'), + ('private_milestone_ml_autoclosed.json', 'autoclosed'), ] for scenario in called: issue_payload, arg = scenario @@ -426,11 +453,11 @@ def test_classify_issue_probability_high(mock_mr, mock_classification): issue.classify() method, uri, data = mock_mr.call_args[0] - # make sure we set a bugbug-probability-high label and - # send a post request to Github - assert method == 'post' + # make sure we move the issue to ml-autoclosed milestone and + # send a patch request to Github + assert method == 'patch' assert type(data) == dict - assert data.get('labels') == ['bugbug-probability-high'] + assert data.get('milestone') == AUTOCLOSED_MILESTONE_ID @patch('webcompat.webhooks.model.get_issue_classification') diff --git a/webcompat/issues.py b/webcompat/issues.py index 4fce1672b..a6ada0176 100644 --- a/webcompat/issues.py +++ b/webcompat/issues.py @@ -43,6 +43,33 @@ REJECTED_BODY = '''

The content of this issue doesn't meet our acceptable use guidelines. Its original content has been deleted.

''' +AUTOCLOSE_TITLE = 'Issue closed.' +AUTOCLOSE_BODY = '''

Thanks for the report. We have closed this issue +automatically as we suspect it is invalid. If we made a mistake, please +file a new issue and try to provide more context.

''' + +TEMPLATE_CONFIG = { + 'rejected': { + 'title': REJECTED_TITLE, + 'body': REJECTED_BODY + }, + 'invalid': { + 'title': '', + 'body': INVALID_BODY + }, + 'incomplete': { + 'title': '', + 'body': INCOMPLETE_BODY + }, + 'autoclosed': { + 'title': AUTOCLOSE_TITLE, + 'body': AUTOCLOSE_BODY + }, + 'ongoing': { + 'title': ONGOING_TITLE, + 'body': ONGOING_BODY + } +} def moderation_template(choice='ongoing'): @@ -53,22 +80,20 @@ def moderation_template(choice='ongoing'): - rejected: the issue has been rejected. - incomplete: the issue is incomplete (but not rejected) - invalid: the issue is invalid (but not rejected) + - autoclosed: the issue is auto closed by ml bot The default is 'ongoing' even with unknown keywords. """ - if choice == 'rejected': - title = REJECTED_TITLE - body = REJECTED_BODY - elif choice == 'invalid': - title = '' - body = INVALID_BODY - elif choice == 'incomplete': - title = '' - body = INCOMPLETE_BODY + if choice in TEMPLATE_CONFIG: + return { + 'title': TEMPLATE_CONFIG[choice].get('title'), + 'body': TEMPLATE_CONFIG[choice].get('body') + } else: - title = ONGOING_TITLE - body = ONGOING_BODY - return {'title': title, 'body': body} + temp = (", ".join(TEMPLATE_CONFIG.keys())) + raise ValueError( + f'Template choice must be one of the following: {temp}' + ) def report_private_issue(form, public_url): diff --git a/webcompat/webhooks/helpers.py b/webcompat/webhooks/helpers.py index 22340e497..065cfd45c 100644 --- a/webcompat/webhooks/helpers.py +++ b/webcompat/webhooks/helpers.py @@ -19,6 +19,7 @@ from webcompat.helpers import proxy_request from webcompat.helpers import to_bytes from webcompat.issues import moderation_template +from webcompat.form import wrap_metadata BROWSERS = ['blackberry', 'brave', 'chrome', 'edge', 'firefox', 'iceweasel', 'ie', 'lynx', 'myie', 'opera', 'puffin', 'qq', 'safari', 'samsung', 'seamonkey', 'uc', 'vivaldi'] # noqa GECKO_BROWSERS = ['browser-android-components', @@ -32,6 +33,16 @@ IOS_BROWSERS = ['browser-firefox-ios', ] PUBLIC_REPO = app.config['ISSUES_REPO_URI'] PRIVATE_REPO = app.config['PRIVATE_REPO_URI'] +REJECTED_CONFIG = { + 'rejected': { + 'template': 'rejected', + 'labels': ['status-notacceptable'] + }, + 'autoclosed': { + 'template': 'autoclosed', + 'labels': ['bugbug-probability-high'] + } +} def extract_metadata(body): @@ -211,11 +222,12 @@ def msg_log(msg, issue_number): log.info(msg) -def prepare_rejected_issue(): +def prepare_rejected_issue(reason='rejected'): """Create the payload for the rejected moderated issue. - When the issue has been moderated as rejected, - we need to change a couple of things in the public space + When the issue has been moderated as rejected or labelled + invalid by the bot we need to change a couple of things + in the public space - change Title - change body @@ -225,8 +237,14 @@ def prepare_rejected_issue(): """ # Extract the relevant information invalid_id = app.config['STATUSES']['invalid']['id'] - payload_request = moderation_template('rejected') - payload_request['labels'] = ['status-notacceptable'] + config = REJECTED_CONFIG[reason] + payload_request = moderation_template(config['template']) + payload_request['labels'] = config['labels'] payload_request['state'] = 'closed' payload_request['milestone'] = invalid_id return payload_request + + +def prepare_private_url(private_url): + """Create metadata with private url""" + return "\n" + wrap_metadata(('private_url', private_url)) diff --git a/webcompat/webhooks/ml.py b/webcompat/webhooks/ml.py index 2e37ebf84..2ea7c6842 100644 --- a/webcompat/webhooks/ml.py +++ b/webcompat/webhooks/ml.py @@ -8,6 +8,7 @@ import requests import time +import logging from requests.exceptions import ConnectionError @@ -26,7 +27,7 @@ def make_classification_request(issue_number): return response -def get_issue_classification(issue_number, retry_count=4, retry_sleep=3): +def get_issue_classification(issue_number, max_retry_count=4, retry_sleep=7): """Get issue classification from bugbug. As classification happens in the background we need to make a second @@ -35,16 +36,23 @@ def get_issue_classification(issue_number, retry_count=4, retry_sleep=3): The service returns 202 status if request is still in process and 200 status if the issue is classified """ - for _ in range(retry_count): + log = app.logger + log.setLevel(logging.INFO) + start = time.monotonic() + retry_count = 0 + for _ in range(max_retry_count): response = make_classification_request(issue_number) if response.status_code == 202: time.sleep(retry_sleep) else: + retry_count = _ break else: - total_sleep = retry_count * retry_sleep - msg = f"Couldn't classify issue {issue_number} in {total_sleep} seconds, aborting" # noqa + total = round(time.monotonic() - start, 2) + msg = f"ML FAIL: issue {issue_number} in {total} seconds with {max_retry_count} retries" # noqa raise ConnectionError(msg) - + total = round(time.monotonic() - start, 2) + msg = f"ML OK: issue {issue_number} in {total} seconds with {retry_count} retries" # noqa + log.info(msg) return response.json() diff --git a/webcompat/webhooks/model.py b/webcompat/webhooks/model.py index 8576a6206..5782b9fd2 100644 --- a/webcompat/webhooks/model.py +++ b/webcompat/webhooks/model.py @@ -20,11 +20,13 @@ from webcompat.webhooks.helpers import oops from webcompat.webhooks.helpers import prepare_rejected_issue from webcompat.webhooks.helpers import repo_scope +from webcompat.webhooks.helpers import prepare_private_url from webcompat.webhooks.ml import get_issue_classification from webcompat.issues import moderation_template PUBLIC_REPO = app.config['ISSUES_REPO_URI'] PRIVATE_REPO = app.config['PRIVATE_REPO_URI'] +AUTOCLOSED_MILESTONE_ID = app.config['AUTOCLOSED_MILESTONE_ID'] THRESHOLD = 0.97 @@ -43,6 +45,7 @@ class WebHookIssue: milestone: str milestoned_with: str host_reported_from: str + html_url: str @classmethod def from_dict(cls, payload, host=None): @@ -55,6 +58,7 @@ def from_dict(cls, payload, host=None): domain = full_title.partition(' ')[0] public_url = extract_metadata(issue_body).get('public_url', '').strip() original_labels = [label['name'] for label in labels] + html_url = issue.get('html_url') # webhook with a milestone already set milestone = '' if issue.get('milestone'): @@ -74,7 +78,7 @@ def from_dict(cls, payload, host=None): state=issue.get('state'), title=full_title, original_labels=original_labels, milestone=milestone, milestoned_with=milestoned_with, - host_reported_from=host_reported_from) + host_reported_from=host_reported_from, html_url=html_url) def close_private_issue(self): """Mark the private issue as closed.""" @@ -185,12 +189,19 @@ def close_public_issue(self, reason='rejected'): Right now the accepted reasons are: 'incomplete' 'invalid' + 'autoclosed' 'rejected' (default) """ if reason == 'incomplete': payload_request = self.prepare_accepted_issue('incomplete') elif reason == 'invalid': payload_request = self.prepare_accepted_issue('invalid') + elif reason == 'autoclosed': + payload_request = prepare_rejected_issue(reason) + # We add the private issue link to the body of the public issue, + # and it will be used to make a request during ml data fetching + # as public issue will no longer have relevant issue data + payload_request['body'] += prepare_private_url(self.html_url) else: payload_request = prepare_rejected_issue() public_number = self.get_public_issue_number() @@ -234,19 +245,19 @@ def get_public_issue_number(self): return url def classify(self): - """Make a request to bugbug and label the issue. + """Make a request to bugbug and add a milestone to the issue. - Gets issue classification from bugbug and labels - the issue if probability is high + Gets issue classification from bugbug and adds a milestone + to the issue if probability is high """ data = get_issue_classification(self.number) needsdiagnosis_false = data.get('class') proba = data.get('prob') if needsdiagnosis_false and proba and proba[1] > THRESHOLD: - payload = {'labels': ['bugbug-probability-high']} - path = f'repos/{PRIVATE_REPO}/{self.number}/labels' - make_request('post', path, payload) + path = f'repos/{PRIVATE_REPO}/{self.number}' + payload_request = {'milestone': AUTOCLOSED_MILESTONE_ID} + make_request('patch', path, payload_request) def process_issue_action(self): """Route the actions and provide different responses. @@ -354,6 +365,23 @@ def process_issue_action(self): self.comment_closed_reason(reason='invalid') self.close_private_issue() return make_response('Moderated issue closed as invalid', 200) + elif (self.action == 'milestoned' and scope == 'private' and + self.milestoned_with == 'ml-autoclosed'): + # The private issue has been automatically moved to ml-autoclosed + # milestone. This will close the public and private issues, and + # will replace the content of the public issue with placeholder. + try: + self.close_public_issue(reason='autoclosed') + except HTTPError as e: + msg_log( + 'private:closing public issue as invalid by ml-bot failed', + self.number) + return oops() + else: + # we didn't get exceptions, so it's safe to + # close the private issue. + self.close_private_issue() + return make_response('Issue closed as invalid by ml bot', 200) elif (scope == 'private' and self.action == 'closed' and self.milestone == 'unmoderated'): # The private issue has been closed. It is rejected and the