Skip to content

Commit

Permalink
Merge pull request #2812 from chaoss/dev
Browse files Browse the repository at this point in the history
Release Ready
  • Loading branch information
sgoggins authored Jun 10, 2024
2 parents 2eb423f + a7d5a27 commit 99ee81f
Show file tree
Hide file tree
Showing 83 changed files with 2,823 additions and 2,008 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Augur NEW Release v0.70.0
# Augur NEW Release v0.71.0

Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data. Less data carpentry for everyone else!
The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot) ... A public instance of 8Knot is available at https://metrix.chaoss.io ... That is tied to a public instance of Augur at https://ai.chaoss.io
Expand All @@ -10,7 +10,7 @@ The primary way of looking at Augur data is through [8Knot](https://github.com/o
## NEW RELEASE ALERT!
### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md)

Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.70.0
Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.71.0

- The `main` branch is a stable version of our new architecture, which features:
- Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks.
Expand Down
3 changes: 2 additions & 1 deletion augur/api/routes/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

# Disable the requirement for SSL by setting env["AUGUR_DEV"] = True
from augur.application.config import get_development_flag
from augur.application.db.lib import get_session
from augur.application.db.models import Config
from augur.application.config import AugurConfig
from augur.application.db.session import DatabaseSession
Expand Down Expand Up @@ -45,7 +46,7 @@ def update_config():

update_dict = request.get_json()

with DatabaseSession(logger, engine=current_app.engine) as session:
with get_session() as session:

for section, data in update_dict.items():

Expand Down
2 changes: 1 addition & 1 deletion augur/api/routes/dei.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def core_task_success_util_gen(repo_git):
deiHook = CollectionRequest("core",primary_enabled_phases)
deiHook.repo_list = [repo_url]

singleRoutine = AugurTaskRoutine(session,[deiHook])
singleRoutine = AugurTaskRoutine(logger, session,[deiHook])
singleRoutine.start_data_collection()
#start_block_of_repos(logger, session, [repo_url], primary_enabled_phases, "new")

Expand Down
1 change: 1 addition & 0 deletions augur/api/view/augur_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def internal_server_error(error):
traceback.print_tb(error.__traceback__, file=errout)
# traceback.print_exception(error, file=errout)
stacktrace = errout.getvalue()
stacktrace += f"\n{type(error).__name__}: {str(error)}"
errout.close()
except Exception as e:
logger.error(e)
Expand Down
9 changes: 5 additions & 4 deletions augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def start(ctx, disable_collection, development, port):
create_collection_status_records.si().apply_async()
time.sleep(3)

#put contributor breadth back in. Not sure why it was commented out
contributor_breadth_model.si().apply_async()

# start cloning repos when augur starts
Expand Down Expand Up @@ -164,15 +165,15 @@ def determine_worker_processes(ratio,maximum):
process_list.append(subprocess.Popen(scheduling_worker.split(" ")))
sleep_time += 6

#60% of estimate, Maximum value of 45
core_num_processes = determine_worker_processes(.6, 45)
#60% of estimate, Maximum value of 45 : Reduced because it can be lower
core_num_processes = determine_worker_processes(.15, 10)
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
process_list.append(subprocess.Popen(core_worker.split(" ")))
sleep_time += 6

#20% of estimate, Maximum value of 25
secondary_num_processes = determine_worker_processes(.25, 45)
secondary_num_processes = determine_worker_processes(.70, 60)
logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}")
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
process_list.append(subprocess.Popen(secondary_worker.split(" ")))
Expand Down Expand Up @@ -317,7 +318,7 @@ def assign_orphan_repos_to_default_user(session):
repos = session.execute_sql(query).fetchall()

for repo in repos:
UserRepo.insert(session,repo[0],1)
UserRepo.insert(session, repo[0],1)


@cli.command('export-env')
Expand Down
8 changes: 4 additions & 4 deletions augur/application/cli/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,15 @@ def determine_worker_processes(ratio,maximum):
process_list.append(subprocess.Popen(scheduling_worker.split(" ")))
sleep_time += 6

#60% of estimate, Maximum value of 45
core_num_processes = determine_worker_processes(.6, 45)
#60% of estimate, Maximum value of 45: Reduced because not needed
core_num_processes = determine_worker_processes(.15, 10)
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
process_list.append(subprocess.Popen(core_worker.split(" ")))
sleep_time += 6

#20% of estimate, Maximum value of 25
secondary_num_processes = determine_worker_processes(.25, 45)
secondary_num_processes = determine_worker_processes(.70, 60)
logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}")
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
process_list.append(subprocess.Popen(secondary_worker.split(" ")))
Expand Down Expand Up @@ -301,4 +301,4 @@ def assign_orphan_repos_to_default_user(session):
repos = session.execute_sql(query).fetchall()

for repo in repos:
UserRepo.insert(session,repo[0],1)
UserRepo.insert(session, repo[0],1)
4 changes: 2 additions & 2 deletions augur/application/cli/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def start():
secondary_worker_process = None

scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling"
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=45 -n core:{uuid.uuid4().hex}@%h"
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=45 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=20 -n core:{uuid.uuid4().hex}@%h"
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=60 -n secondary:{uuid.uuid4().hex}@%h -Q secondary"

scheduling_worker_process = subprocess.Popen(scheduling_worker.split(" "))
core_worker_process = subprocess.Popen(core_worker.split(" "))
Expand Down
1 change: 1 addition & 0 deletions augur/application/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def add_or_update_settings(self, settings: List[dict]):
query = self.session.query(Config).filter(and_(Config.section_name == setting["section_name"],Config.setting_name == setting["setting_name"]) )

if execute_session_query(query, 'first') is None:
# TODO: Update to use bulk insert dicts so config doesn't require database session
self.session.insert_data(setting,Config, ["section_name", "setting_name"])
else:
#If setting exists. use raw update to not increase autoincrement
Expand Down
Loading

0 comments on commit 99ee81f

Please sign in to comment.