Skip to content

Commit

Permalink
Merge pull request #2869 from chaoss/augur_0.76.1
Browse files Browse the repository at this point in the history
Augur 0.76.1 Release
  • Loading branch information
sgoggins authored Jul 23, 2024
2 parents 6bf35a5 + 0210296 commit 5483169
Show file tree
Hide file tree
Showing 21 changed files with 438 additions and 306 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ augur_export_env.sh
!docker.config.json
config.yml
reports.yml
*.pid

node_modules/
.idea/
Expand Down
36 changes: 3 additions & 33 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,6 @@
# go here to check pylint codes if not explained
#https://vald-phoenix.github.io/pylint-errors/

#doc string checkers
#enable=C0112,C0114,C0115,C0116

# checks for black listed names being used
#enable=C0102

#refactoring checker
#enable=R

disable=E0611,E1101,W1203,R0801,W0614,W0611,C0411,C0103,C0301,C0303,C0304,C0305,W0311,E0401,C0116


# Analyse import fallback blocks. This can be used to support both Python 2 and
Expand Down Expand Up @@ -150,29 +140,9 @@ confidence=HIGH,
INFERENCE_FAILURE,
UNDEFINED

# Disable the message, report, category or checker with the given id(s). You
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once). You can also use "--disable=all" to
# disable everything first and then re-enable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=raw-checker-failed,
bad-inline-option,
locally-disabled,
file-ignored,
suppressed-message,
useless-suppression,
deprecated-pragma,
use-symbolic-message-instead

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
# multiple time (only on the command line, not in the configuration file where
# it should appear only once). See also the "--disable" option for examples.
enable=c-extension-no-member
# Only enable specific messages
disable=all
enable=unused-import,redefined-outer-name,E1206,E1205,E0704,E0107,E4702,E1101,E0211,E0213,E0103,E1133,E1120,E3102,E0602,E1123,E0001,W0702,W1404,W0706,W0101,W0120,W0718,R1737,R1705,R1720,R1724,R1723,R0401,R1701,C1802,C0200,C0501,C0201,W1001,E1102,R0923


[LOGGING]
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Augur NEW Release v0.76.0
# Augur NEW Release v0.76.1

Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data. Less data carpentry for everyone else!
The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot) ... A public instance of 8Knot is available at https://metrix.chaoss.io ... That is tied to a public instance of Augur at https://ai.chaoss.io
Expand All @@ -10,7 +10,7 @@ The primary way of looking at Augur data is through [8Knot](https://github.com/o
## NEW RELEASE ALERT!
### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md)

Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.76.0
Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.76.1

- The `main` branch is a stable version of our new architecture, which features:
- Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks.
Expand Down
9 changes: 8 additions & 1 deletion augur/api/routes/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,20 @@
import sqlalchemy as s
import pandas as pd
import json
from flask import Response, current_app
from flask import Response, current_app, jsonify

from augur.application.db.lib import get_value
from augur.application.logs import AugurLogger

logger = AugurLogger("augur").get_logger()

@app.route("/api")
def get_api_version():
return jsonify({
"status": "up",
"route": AUGUR_API_VERSION
})

@app.route('/{}/repo-groups'.format(AUGUR_API_VERSION))
def get_all_repo_groups(): #TODO: make this name automatic - wrapper?
repoGroupsSQL = s.sql.text("""
Expand Down
29 changes: 24 additions & 5 deletions augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
import logging
import psutil
import signal
from redis.exceptions import ConnectionError as RedisConnectionError
import uuid
import traceback
import requests
from redis.exceptions import ConnectionError as RedisConnectionError
from urllib.parse import urlparse

from augur.tasks.start_tasks import augur_collection_monitor, create_collection_status_records
Expand All @@ -38,14 +39,17 @@ def cli(ctx):
@cli.command("start")
@click.option("--disable-collection", is_flag=True, default=False, help="Turns off data collection workers")
@click.option("--development", is_flag=True, default=False, help="Enable development mode, implies --disable-collection")
@click.option("--pidfile", default="main.pid", help="File to store the controlling process ID in")
@click.option('--port')
@test_connection
@test_db_connection
@with_database
@click.pass_context
def start(ctx, disable_collection, development, port):
def start(ctx, disable_collection, development, pidfile, port):
"""Start Augur's backend server."""

with open(pidfile, "w") as pidfile:
pidfile.write(str(os.getpid()))

try:
if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1":
raise_open_file_limit(100000)
Expand Down Expand Up @@ -75,9 +79,25 @@ def start(ctx, disable_collection, development, port):
gunicorn_command = f"gunicorn -c {gunicorn_location} -b {host}:{port} augur.api.server:app --log-file gunicorn.log"
server = subprocess.Popen(gunicorn_command.split(" "))

time.sleep(3)
logger.info("awaiting Gunicorn start")
while not server.poll():
try:
api_response = requests.get(f"http://{host}:{port}/api")
except requests.exceptions.ConnectionError as e:
time.sleep(0.5)
continue

if not api_response.ok:
logger.critical("Gunicorn failed to start or was not reachable. Exiting")
exit(247)
break
else:
logger.critical("Gunicorn was shut down abnormally. Exiting")
exit(247)

logger.info('Gunicorn webserver started...')
logger.info(f'Augur is running at: {"http" if development else "https"}://{host}:{port}')
logger.info(f"The API is available at '{api_response.json()['route']}'")

processes = start_celery_worker_processes(float(worker_vmem_cap), disable_collection)

Expand All @@ -91,7 +111,6 @@ def start(ctx, disable_collection, development, port):
celery_beat_process = subprocess.Popen(celery_command.split(" "))

if not disable_collection:

with DatabaseSession(logger, engine=ctx.obj.engine) as session:

clean_collection_status(session)
Expand Down
4 changes: 2 additions & 2 deletions augur/application/db/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,15 +249,15 @@ def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys
data = [data]

else:
logger.info("Data must be a list or a dict")
logger.error("Data must be a list or a dict")
return None

if len(data) == 0:
# self.logger.info("Gave no data to insert, returning...")
return None

if isinstance(data[0], dict) is False:
logger.info("Must be list of dicts")
logger.error("Must be list of dicts")
return None

# remove any duplicate data
Expand Down
72 changes: 35 additions & 37 deletions augur/tasks/git/dependency_tasks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from augur.tasks.util.worker_util import parse_json_from_subprocess_call
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path
from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
from augur.tasks.util.metadata_exception import MetadataException


def generate_deps_data(logger, repo_git):
Expand Down Expand Up @@ -94,50 +95,47 @@ def generate_scorecard(logger, repo_git):

try:
required_output = parse_json_from_subprocess_call(logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard)
except Exception as e:
logger.error(f"Could not parse required output! Error: {e}")
raise e

# end

logger.info('adding to database...')
logger.debug(f"output: {required_output}")
logger.info('adding to database...')
logger.debug(f"output: {required_output}")

if not required_output['checks']:
logger.info('No scorecard checks found!')
return

#Store the overall score first
to_insert = []
overall_deps_scorecard = {
'repo_id': repo_id,
'name': 'OSSF_SCORECARD_AGGREGATE_SCORE',
'scorecard_check_details': required_output['repo'],
'score': required_output['score'],
'tool_source': 'scorecard_model',
'tool_version': '0.43.9',
'data_source': 'Git',
'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
}
to_insert.append(overall_deps_scorecard)
# bulk_insert_dicts(overall_deps_scorecard, RepoDepsScorecard, ["repo_id","name"])

#Store misc data from scorecard in json field.
for check in required_output['checks']:
repo_deps_scorecard = {
if not required_output['checks']:
logger.info('No scorecard checks found!')
return

#Store the overall score first
to_insert = []
overall_deps_scorecard = {
'repo_id': repo_id,
'name': check['name'],
'scorecard_check_details': check,
'score': check['score'],
'name': 'OSSF_SCORECARD_AGGREGATE_SCORE',
'scorecard_check_details': required_output['repo'],
'score': required_output['score'],
'tool_source': 'scorecard_model',
'tool_version': '0.43.9',
'data_source': 'Git',
'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
}
to_insert.append(repo_deps_scorecard)

bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["repo_id","name"])

logger.info(f"Done generating scorecard for repo {repo_id} from path {path}")
to_insert.append(overall_deps_scorecard)
# bulk_insert_dicts(overall_deps_scorecard, RepoDepsScorecard, ["repo_id","name"])

#Store misc data from scorecard in json field.
for check in required_output['checks']:
repo_deps_scorecard = {
'repo_id': repo_id,
'name': check['name'],
'scorecard_check_details': check,
'score': check['score'],
'tool_source': 'scorecard_model',
'tool_version': '0.43.9',
'data_source': 'Git',
'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
}
to_insert.append(repo_deps_scorecard)

bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["repo_id","name"])

logger.info(f"Done generating scorecard for repo {repo_id} from path {path}")

except Exception as e:

raise MetadataException(e, f"required_output: {required_output}")
Loading

0 comments on commit 5483169

Please sign in to comment.