Skip to content

Commit

Permalink
Merge pull request #3573 from webcompat/issue/3571/1
Browse files Browse the repository at this point in the history
Issue #3571: Use bugbug to classify issues
  • Loading branch information
Karl Dubost authored May 19, 2021
2 parents 3e4a21b + 91cc36c commit fbe2cb9
Show file tree
Hide file tree
Showing 5 changed files with 198 additions and 22 deletions.
6 changes: 6 additions & 0 deletions config/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
ANONYMOUS_REPORTING_ENABLED = strtobool(
os.environ.get("PROD_ANON_REPORTING", "off")
)
BUGBUG_HTTP_SERVER = "https://bugbug.herokuapp.com"
CLASSIFIER_PATH = "needsdiagnosis/predict/github/webcompat/web-bugs-private" # noqa

if STAGING:
GITHUB_CLIENT_ID = os.environ.get('STAGING_GITHUB_CLIENT_ID')
Expand All @@ -55,6 +57,8 @@
ANONYMOUS_REPORTING_ENABLED = strtobool(
os.environ.get("STAGING_ANON_REPORTING", "off")
)
BUGBUG_HTTP_SERVER = "https://bugbug.herokuapp.com"
CLASSIFIER_PATH = "needsdiagnosis/predict/github/webcompat/webcompat-tests-private" # noqa

if LOCALHOST:
# for now we are using .env only on localhost
Expand All @@ -76,6 +80,8 @@
AB_EXPERIMENTS = strtobool(
os.environ.get("AB_EXPERIMENT", "off")
)
BUGBUG_HTTP_SERVER = "http://0.0.0.0:8000"
CLASSIFIER_PATH = "needsdiagnosis/predict/github/webcompat/webcompat-tests-private" # noqa

# BUG STATUS
# The id will be initialized when the app is started.
Expand Down
32 changes: 28 additions & 4 deletions tests/unit/test_webhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,13 @@

import flask
import pytest
from requests.exceptions import HTTPError
from requests.models import Response
from requests.exceptions import ConnectionError

import webcompat

from webcompat.db import Site
from webcompat.helpers import to_bytes
from webcompat.webhooks import helpers
from webcompat.webhooks.model import WebHookIssue
from webcompat.webhooks import helpers, ml


# The key is being used for testing and computing the signature.
Expand Down Expand Up @@ -455,6 +454,31 @@ def test_prepare_rejected_issue(self):
self.assertEqual(type(actual), dict)
self.assertEqual(actual, expected)

@patch('webcompat.webhooks.ml.make_classification_request')
def test_get_issue_classification(self, mock_class):
"""Make only one request if it returns 200 status code right away.
If make_classification_request function returns 200 status code,
make sure that get_issue_classification is not calling it again.
"""
mock_class.return_value.status_code = 200
ml.get_issue_classification(12345)
mock_class.assert_called_once()

@patch('time.sleep', return_value=None)
@patch('webcompat.webhooks.ml.make_classification_request')
def test_get_issue_classification_exception(self, mock_class, mock_time):
"""Poll bugbug and raise an exception if request limit exceeded
If make_classification_request function returns 202 status code,
call get_issue_classification again until exception occurs.
"""
mock_class.return_value.status_code = 202
with pytest.raises(ConnectionError):
ml.get_issue_classification(12345)

assert mock_class.call_count == 4


if __name__ == '__main__':
unittest.main()
95 changes: 84 additions & 11 deletions tests/unit/test_webhook_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import pytest
from requests.exceptions import HTTPError
from requests.models import Response

import webcompat
from tests.unit.test_webhook import event_data
Expand All @@ -30,7 +29,7 @@
gracias = ('gracias, amigo.', 200, {'Content-Type': 'text/plain'})
wrong_repo = ('Wrong repository', 403, {'Content-Type': 'text/plain'})
oops = ('oops', 400, {'Content-Type': 'text/plain'})
comment_added = ('public url added', 200, {'Content-Type': 'text/plain'})
comment_added = ('public url added and issue classified', 200, {'Content-Type': 'text/plain'}) # noqa
outreach_comment_added = ('outreach generator url added', 200, {'Content-Type': 'text/plain'}) # noqa

issue_info1 = {
Expand Down Expand Up @@ -76,7 +75,6 @@ def test_model_instance():
@patch('webcompat.webhooks.model.make_request')
def test_close_private_issue(mock_mr):
"""Test issue state and API request that is sent to GitHub."""
mock_mr.return_value.status_code == 200
json_event, signature = event_data('private_issue_opened.json')
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
Expand Down Expand Up @@ -105,7 +103,6 @@ def test_close_private_issue_fails(mock_mr):
@patch('webcompat.webhooks.model.make_request')
def test_comment_public_uri(mock_mr):
"""Test issue state and API request that is sent to GitHub."""
mock_mr.return_value.status_code == 200
json_event, signature = event_data('private_issue_opened.json')
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
Expand All @@ -120,7 +117,6 @@ def test_comment_public_uri(mock_mr):
@patch('webcompat.webhooks.model.make_request')
def test_comment_closed_reason(mock_mr):
"""Test comment API request that is sent to GitHub."""
mock_mr.return_value.status_code == 200
json_event, signature = event_data('private_issue_opened.json')
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
Expand All @@ -139,7 +135,6 @@ def test_comment_closed_reason(mock_mr):
@patch('webcompat.webhooks.model.make_request')
def test_moderate_public_issue(mock_mr):
"""Test issue state and API request that is sent to GitHub."""
mock_mr.return_value.status_code == 200
json_event, signature = event_data('private_issue_opened.json')
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
Expand All @@ -156,7 +151,6 @@ def test_moderate_public_issue(mock_mr):
@patch('webcompat.webhooks.model.make_request')
def test_closing_public_issues(mock_mr):
"""Test issue state and API request that is sent to GitHub."""
mock_mr.return_value.status_code == 200
json_event, signature = event_data('private_issue_opened.json')
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
Expand Down Expand Up @@ -211,7 +205,6 @@ def test_get_public_issue_number():
@patch('webcompat.webhooks.model.make_request')
def test_tag_as_public(mock_mr):
"""Test tagging an issue as public."""
mock_mr.return_value.status_code == 200
json_event, signature = event_data('new_event_valid.json')
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
Expand Down Expand Up @@ -301,8 +294,9 @@ def test_prepare_accepted_issue(mock_priority):
assert expected == actual


@patch('webcompat.webhooks.model.get_issue_classification')
@patch('webcompat.webhooks.model.make_request')
def test_process_issue_action_scenarios(mock_mr):
def test_process_issue_action_scenarios(mock_mr, mock_classification):
"""Test we are getting the right response for each scenario."""
test_data = [
('new_event_valid.json', gracias),
Expand All @@ -316,7 +310,10 @@ def test_process_issue_action_scenarios(mock_mr):
('private_issue_opened.json', comment_added),
('public_milestone_needscontact.json', outreach_comment_added)
]
mock_mr.return_value.status_code == 200
mock_classification.return_value = (
{'prob': [0.03385603427886963, 0.9661439657211304], 'class': 1}
)

for issue_event, expected_rv in test_data:
json_event, signature = event_data(issue_event)
payload = json.loads(json_event)
Expand Down Expand Up @@ -390,20 +387,96 @@ def test_process_issue_action_close_scenarios(mock_close, mock_mr):
mock_close.assert_called_with(reason=arg)


@patch('webcompat.webhooks.model.get_issue_classification')
@patch('webcompat.webhooks.model.make_request')
@patch('webcompat.webhooks.model.WebHookIssue.close_public_issue')
def test_process_issue_action_not_closed_scenarios(mock_close, mock_mr):
def test_process_issue_action_not_closed_scenarios(mock_close, mock_mr, mock_classification): # noqa
"""Test scenarios where close_public_issue is never called."""
not_called = [
'private_milestone_closed_invalid.json',
'new_event_valid.json',
'private_milestone_accepted_wrong_repo.json',
'private_issue_opened.json'
]

mock_classification.return_value = (
{'prob': [0.03385603427886963, 0.9661439657211304], 'class': 1}
)

for scenario in not_called:
json_event, signature = event_data(scenario)
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
with webcompat.app.test_request_context():
issue.process_issue_action()
mock_close.assert_not_called()


@patch('webcompat.webhooks.model.get_issue_classification')
@patch('webcompat.webhooks.model.make_request')
def test_classify_issue_probability_high(mock_mr, mock_classification):
"""Test classifying an issue and adding a label."""
mock_classification.return_value = (
{'prob': [0.03385603427886963, 0.9761439657211304], 'class': 1}
)

json_event, signature = event_data('private_issue_opened.json')
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
issue.classify()
method, uri, data = mock_mr.call_args[0]

# make sure we set a bugbug-probability-high label and
# send a post request to Github
assert method == 'post'
assert type(data) == dict
assert data.get('labels') == ['bugbug-probability-high']


@patch('webcompat.webhooks.model.get_issue_classification')
@patch('webcompat.webhooks.model.make_request')
def test_classify_issue_probability_low(mock_mr, mock_classification):
"""Test classifying and not setting a label.
Use case when classification came back with probability threshold
lower than minimum.
"""
mock_classification.return_value = (
{'prob': [0.03385603427886963, 0.8261439657211304], 'class': 1}
)

json_event, signature = event_data('private_issue_opened.json')
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
issue.classify()
mock_mr.assert_not_called()


@patch('webcompat.webhooks.model.get_issue_classification')
@patch('webcompat.webhooks.model.make_request')
def test_classify_issue_needsdiagnosis_true(mock_mr, mock_classification):
"""Test classifying and not setting a label if needsdiagnosis=True."""
mock_classification.return_value = (
{'prob': [0.8261439657211304, 0.03385603427886963], 'class': 0}
)

json_event, signature = event_data('private_issue_opened.json')
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
issue.classify()
mock_mr.assert_not_called()


@patch('webcompat.webhooks.ml.make_classification_request')
@patch('webcompat.webhooks.model.make_request')
def test_classify_issue_service_exception(mock_mr, mock_classification, caplog): # noqa
"""Test that ml server error exception handled gracefully."""
caplog.set_level(logging.INFO)
mock_classification.side_effect = HTTPError()
json_event, signature = event_data('private_issue_opened.json')
payload = json.loads(json_event)
issue = WebHookIssue.from_dict(payload)
with webcompat.app.test_request_context():
rv = issue.process_issue_action()
assert rv == oops
assert 'classification failed' in caplog.text
50 changes: 50 additions & 0 deletions webcompat/webhooks/ml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""Helpers methods for machine learning classification."""

import requests
import time

from requests.exceptions import ConnectionError

from webcompat import app

BUGBUG_HTTP_SERVER = app.config['BUGBUG_HTTP_SERVER']
CLASSIFIER_PATH = app.config['CLASSIFIER_PATH']


def make_classification_request(issue_number):
"""Make a request to bugbug http service."""
url = f"{BUGBUG_HTTP_SERVER}/{CLASSIFIER_PATH}/{issue_number}"
headers = {"X-Api-Key": "webcompat"}
response = requests.get(url, headers=headers)
response.raise_for_status()
return response


def get_issue_classification(issue_number, retry_count=4, retry_sleep=3):
"""Get issue classification from bugbug.
As classification happens in the background we need to make a second
request to get results, so we're polling the endpoint.
The service returns 202 status if request is still in process
and 200 status if the issue is classified
"""
for _ in range(retry_count):
response = make_classification_request(issue_number)

if response.status_code == 202:
time.sleep(retry_sleep)
else:
break
else:
total_sleep = retry_count * retry_sleep
msg = f"Couldn't classify issue {issue_number} in {total_sleep} seconds, aborting" # noqa
raise ConnectionError(msg)

return response.json()
37 changes: 30 additions & 7 deletions webcompat/webhooks/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,9 @@
"""WebCompat Issue Model for webhooks."""

from dataclasses import dataclass
from dataclasses import field
from typing import Any
from typing import Dict
from typing import List

from requests.exceptions import HTTPError
from requests.exceptions import HTTPError, ConnectionError

from webcompat import app
from webcompat.webhooks.helpers import extract_metadata
Expand All @@ -23,10 +20,12 @@
from webcompat.webhooks.helpers import oops
from webcompat.webhooks.helpers import prepare_rejected_issue
from webcompat.webhooks.helpers import repo_scope
from webcompat.webhooks.ml import get_issue_classification
from webcompat.issues import moderation_template

PUBLIC_REPO = app.config['ISSUES_REPO_URI']
PRIVATE_REPO = app.config['PRIVATE_REPO_URI']
THRESHOLD = 0.97


@dataclass
Expand Down Expand Up @@ -234,6 +233,21 @@ def get_public_issue_number(self):
url = self.public_url.strip().rsplit('/', 1)[1]
return url

def classify(self):
"""Make a request to bugbug and label the issue.
Gets issue classification from bugbug and labels
the issue if probability is high
"""
data = get_issue_classification(self.number)
needsdiagnosis_false = data.get('class')
proba = data.get('prob')

if needsdiagnosis_false and proba and proba[1] > THRESHOLD:
payload = {'labels': ['bugbug-probability-high']}
path = f'repos/{PRIVATE_REPO}/{self.number}/labels'
make_request('post', path, payload)

def process_issue_action(self):
"""Route the actions and provide different responses.
Expand Down Expand Up @@ -266,6 +280,8 @@ def process_issue_action(self):
return make_response('gracias, amigo.', 200)
elif (self.action == 'milestoned' and scope == 'public' and
self.milestoned_with == 'needscontact'):
# add a comment with a link to outreach template generator
# when issue is moved to needscontact
try:
self.comment_outreach_generator_uri()
except HTTPError as e:
Expand All @@ -274,14 +290,21 @@ def process_issue_action(self):
else:
return make_response('outreach generator url added', 200)
elif self.action == 'opened' and scope == 'private':
# webcompat-bot needs to comment on this issue with the URL
# webcompat-bot needs to comment public URL of the issue
# and we try to classify the issue using bugbug
try:
self.comment_public_uri()
except HTTPError as e:
msg_log(f'comment failed ({e})', self.number)
return oops()
else:
return make_response('public url added', 200)

try:
self.classify()
except (HTTPError, ConnectionError) as e:
msg_log(f'classification failed ({e})', self.number)
return oops()

return make_response('public url added and issue classified', 200)
elif (self.action == 'milestoned' and scope == 'private' and
self.milestoned_with == 'accepted'):
# private issue have been moderated and we will make it public
Expand Down

0 comments on commit fbe2cb9

Please sign in to comment.