Skip to content
This repository has been archived by the owner on Feb 8, 2018. It is now read-only.

Commit

Permalink
Reimplement based on the npm change stream
Browse files Browse the repository at this point in the history
  • Loading branch information
chadwhitacre committed May 3, 2017
1 parent 8f6dac5 commit 0d068a7
Show file tree
Hide file tree
Showing 14 changed files with 192 additions and 317 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ language: python
git:
depth: 5
addons:
postgresql: 9.3
postgresql: 9.6
firefox: latest-esr
before_install:
- git branch -vv | grep '^*'
Expand Down
15 changes: 7 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Quick Start
Local
-----

Given Python 2.7, Postgres 9.3, and a C/make toolchain:
Given Python 2.7, Postgres 9.6, and a C/make toolchain:

```shell
git clone https://github.com/gratipay/gratipay.com.git
Expand Down Expand Up @@ -116,7 +116,7 @@ On Debian or Ubuntu you will need the following packages:

```shell
sudo apt-get install \
postgresql-9.3 \
postgresql-9.6 \
postgresql-contrib \
libpq-dev \
python-dev \
Expand Down Expand Up @@ -386,7 +386,7 @@ Modifying the Database
======================

We write SQL, specifically the [PostgreSQL
variant](https://www.postgresql.org/docs/9.3/static/). We keep our database
variant](https://www.postgresql.org/docs/9.6/static/). We keep our database
schema in
[`schema.sql`](https://github.com/gratipay/gratipay.com/blob/master/sql/schema.sql),
and we write schema changes for each PR branch in a `sql/branch.sql` file, which
Expand Down Expand Up @@ -436,11 +436,10 @@ database configured in your testing environment.
Local Database Setup
--------------------
For the best development experience, you need a local
installation of [Postgres](https://www.postgresql.org/download/). The best
version of Postgres to use is 9.3.5, because that's what we're using in
production at Heroku. You need at least 9.2, because we depend on being able to
specify a URI to `psql`, and that was added in 9.2.
For the best development experience, you need a local installation of
[Postgres](https://www.postgresql.org/download/). The best version of Postgres
to use is 9.6.2, because that's what we're using in production at Heroku. You
need at least 9.5 to support the features we depend on.
+ Mac: use Homebrew: `brew install postgres`
+ Ubuntu: use Apt: `apt-get install postgresql postgresql-contrib libpq-dev`
Expand Down
92 changes: 72 additions & 20 deletions gratipay/cli/sync_npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,92 @@
"""
from __future__ import absolute_import, division, print_function, unicode_literals

import sys
import argparse
import time

from aspen import log
from couchdb import Database

from gratipay import wireup
from gratipay.sync_npm import serialize, upsert
from gratipay.utils import sentry


def get_last_seq(db):
return db.one('SELECT npm_last_seq FROM worker_coordination')


def production_change_stream(seq):
"""Given a sequence number in the npm registry change stream, start
streaming from there!
"""
npm = Database('https://skimdb.npmjs.com/registry')
return npm.changes(feed='continuous', include_docs=True, since=seq)


def process_doc(doc):
"""Return a smoothed-out doc, or None if it's not a package doc, meaning
there's no name key and it's probably a design doc, per:
https://github.com/npm/registry/blob/aef8a275/docs/follower.md#clean-up
def parse_args(argv):
p = argparse.ArgumentParser()
p.add_argument('command', choices=['serialize', 'upsert'])
p.add_argument('path', help='the path to the input file', nargs='?', default='/dev/stdin')
return p.parse_args(argv)
"""
if 'name' not in doc:
return None
name = doc['name']
description = doc.get('description', '')
emails = [e for e in [m.get('email') for m in doc.get('maintainers', [])] if e.strip()]
return {'name': name, 'description': description, 'emails': sorted(set(emails))}


def consume_change_stream(change_stream, db):
"""Given a function similar to :py:func:`production_change_stream` and a
:py:class:`~GratipayDB`, read from the stream and write to the db.
The npm registry is a CouchDB app, which means we get a change stream from
it that allows us to follow registry updates in near-realtime. Our strategy
here is to maintain open connections to both the registry and our own
database, and write as we read.
"""
last_seq = get_last_seq(db)
log("Picking up with npm sync at {}.".format(last_seq))
with db.get_connection() as conn:
for change in change_stream(last_seq):
processed = process_doc(change['doc'])
if not processed:
continue
cursor = conn.cursor()
cursor.run('''
INSERT INTO packages
(package_manager, name, description, emails)
VALUES ('npm', %(name)s, %(description)s, %(emails)s)
subcommands = { 'serialize': serialize.main
, 'upsert': upsert.main
}
ON CONFLICT (package_manager, name) DO UPDATE
SET description=%(description)s, emails=%(emails)s
''', processed)
cursor.run('UPDATE worker_coordination SET npm_last_seq=%s', (change['seq'],))
cursor.connection.commit()


def main(argv=sys.argv):
def main():
"""This function is installed via an entrypoint in ``setup.py`` as
``sync-npm``.
Usage::
sync-npm {serialize,upsert} {<filepath>}
``<filepath>`` defaults to stdin.
.. note:: Sphinx is expanding ``sys.argv`` in the parameter list. Sorry. :-/
sync-npm
"""
env = wireup.env()
args = parse_args(argv[1:])
db = wireup.db(env)

subcommands[args.command](env, args, db)
while 1:
with sentry.teller(env):
consume_change_stream(production_change_stream, db)
try:
last_seq = get_last_seq(db)
sleep_for = 60
log( 'Encountered an error, will pick up with %s in %s seconds (Ctrl-C to exit) ...'
% (last_seq, sleep_for)
)
time.sleep(sleep_for) # avoid a busy loop if thrashing
except KeyboardInterrupt:
return
43 changes: 0 additions & 43 deletions gratipay/sync_npm/__init__.py

This file was deleted.

107 changes: 0 additions & 107 deletions gratipay/sync_npm/serialize.py

This file was deleted.

57 changes: 0 additions & 57 deletions gratipay/sync_npm/upsert.py

This file was deleted.

1 change: 1 addition & 0 deletions gratipay/testing/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def clear_tables(self):
except (IntegrityError, InternalError):
tablenames.insert(0, tablename)
self.db.run("ALTER SEQUENCE participants_id_seq RESTART WITH 1")
self.db.run("INSERT INTO worker_coordination DEFAULT VALUES")


def make_elsewhere(self, platform, user_id, user_name, **kw):
Expand Down
Loading

0 comments on commit 0d068a7

Please sign in to comment.