Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature implement linked commits #1226

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docs/source/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,10 @@ Improver
^^^^^^^^^

All the Improvers must inherit from ``Improver`` superclass and implement the
``interesting_advisories`` property and the ``get_inferences`` method.
``interesting_advisories`` property and the ``get_inferences`` method,
unless they are not improving advisory data. In this case they should override
``is_custom_improver`` property to True and implement the ``run`` method.


Writing an improver
---------------------
Expand Down
13 changes: 11 additions & 2 deletions vulnerabilities/improve_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,23 @@ class ImproveRunner:
def __init__(self, improver_class):
self.improver_class = improver_class

def run(self) -> None:
def __run_advisory_improver(self) -> None:
improver = self.improver_class()
logger.info(f"Running improver: {improver.qualified_name}")
for advisory in improver.interesting_advisories:
inferences = improver.get_inferences(advisory_data=advisory.to_advisory_data())
process_inferences(
inferences=inferences, advisory=advisory, improver_name=improver.qualified_name
)

def __run_custom_improver(self) -> None:
self.improver_class().run()

def run(self) -> None:
logger.info(f"Running improver: {self.improver_class().qualified_name}")
if self.improver_class().is_custom_improver:
self.__run_custom_improver()
else:
self.__run_advisory_improver()
logger.info("Finished improving using %s.", self.improver_class.qualified_name)


Expand Down
16 changes: 16 additions & 0 deletions vulnerabilities/improver.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,16 @@ class Improver:
required to override the ``interesting_advisories`` property method to return a QuerySet of
``Advisory`` objects. These advisories are then passed to ``get_inferences`` method which is
responsible for returning an iterable of ``Inferences`` for that particular ``Advisory``
Some improvers are related to already imported data, but not related the advisories directly
Such improver must set 'custom_improver' to true and implement the run method in the improver file.
"""

@classproperty
def is_custom_improver(cls):
return False

@classproperty
def qualified_name(cls):
"""
Expand All @@ -135,3 +143,11 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
Subclasses must implement.
"""
raise NotImplementedError

def run(self) -> None:
"""
Runs a custom Improver which doesn't improve the advisory data, and needs custom action.
Subclasses must implement.
"""
raise NotImplementedError
2 changes: 2 additions & 0 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

from vulnerabilities.improvers import add_commit_ref
from vulnerabilities.improvers import default
from vulnerabilities.improvers import valid_versions

Expand All @@ -24,6 +25,7 @@
valid_versions.IstioImprover,
valid_versions.DebianOvalImprover,
valid_versions.UbuntuOvalImprover,
add_commit_ref.CommitRelationImprover,
]

IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY}
78 changes: 78 additions & 0 deletions vulnerabilities/improvers/add_commit_ref.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
import re

from django.db import transaction
from django.db.models.query import QuerySet

from vulnerabilities.improver import Improver
from vulnerabilities.models import Commit
from vulnerabilities.models import VulnerabilityReference

logger = logging.getLogger(__name__)

"""
Improver that looks for commits related to a vulnerability
"""


class CommitRelationImprover(Improver):
"""
Detect related commits to an advisory by applying a REGEX.
"""

def __init__(self):
# using cached insertion for memory efficiency
self.insert_chunk_size = 500
self.commit_instances = []

@property
def is_custom_improver(cls):
return True

@property
def interesting_references(self) -> QuerySet:
# Regex base coming from: https://github.com/secureIT-project/CVEfixes/
# Below regex is the compatible form for Postgresql
# For now, we are only interested in Bitbucket, Github and Gitlab sources
# TODO: Add other sources such as Apache related sources, Linux kernel, etc.
git_url = r"((https|http)://(bitbucket|github|gitlab)\.(org|com)/([^/]+)/([^/]*))/(commit|commits)/(\w+)#?"
return VulnerabilityReference.objects.filter(
url__iregex=git_url,
)

def __generate_instance(self):
commit_pattern = r"(((?P<repo>(https|http):\/\/(bitbucket|github|gitlab)\.(org|com)\/(?P<owner>[^\/]+)\/(?P<project>[^\/]*))\/(commit|commits)\/(?P<hash>\w+)#?)+)"
for ref in self.interesting_references:
commit_groups = re.search(commit_pattern, ref.url)
yield Commit(
reference=ref,
hash=commit_groups.group("hash"),
)

def __insert_bulk(self) -> None:
if len(self.commit_instances) == 0:
return

with transaction.atomic():
# Ignore_conflicts allows mass
Commit.objects.bulk_create(self.commit_instances, ignore_conflicts=True)

# Empty the cache buffer further inserts
self.commit_instances.clear()

def run(self) -> None:
for i, commit in enumerate(self.__generate_instance()):
self.commit_instances.append(commit)
if len(self.commit_instances) >= self.insert_chunk_size:
self.__insert_bulk()
# Add remaining commits
self.__insert_bulk()
45 changes: 45 additions & 0 deletions vulnerabilities/migrations/0040_commit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Generated by Django 4.1.7 on 2023-07-05 17:38

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0039_alter_vulnerabilityseverity_scoring_system"),
]

operations = [
migrations.CreateModel(
name="Commit",
fields=[
(
"id",
models.AutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
(
"hash",
models.CharField(blank=True, help_text="Hash of the commit", max_length=1024),
),
(
"chain_urls",
models.JSONField(
blank=True, default=list, help_text="List of URLS used to reach the commit"
),
),
(
"reference",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="vulnerabilities.vulnerabilityreference",
),
),
],
options={
"ordering": ["reference_id"],
},
),
]
30 changes: 30 additions & 0 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -886,3 +886,33 @@ class ApiUser(UserModel):

class Meta:
proxy = True


class Commit(models.Model):
"""
Reference to a commit(s) that fixed the vulnerability
"""

reference = models.ForeignKey(
VulnerabilityReference,
on_delete=models.CASCADE,
)

hash = models.CharField(
max_length=1024,
help_text="Hash of the commit",
blank=True,
)

chain_urls = models.JSONField(
default=list,
help_text="List of URLS used to reach the commit",
blank=True,
)

class Meta:
ordering = ["reference_id"]

def __str__(self):
reference_id = f" {self.reference_id}" if self.reference_id else ""
return f"{self.url}{reference_id}"