Skip to content
This repository has been archived by the owner on Feb 8, 2018. It is now read-only.

Commit

Permalink
Separate out fetching and processing of readmes
Browse files Browse the repository at this point in the history
  • Loading branch information
chadwhitacre committed Nov 8, 2016
1 parent a332fb6 commit 99b8544
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 18 deletions.
55 changes: 45 additions & 10 deletions gratipay/package_managers/readmes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def http_fetch(package_name):
return r.json()


def Syncer(db):
def sync(dirty, fetch=http_fetch):
def Fetcher(db):
def fetch(dirty, fetch=http_fetch):
"""Update all info for one package.
"""
log(dirty.name)
Expand All @@ -43,23 +43,58 @@ def sync(dirty, fetch=http_fetch):
db.run('''
UPDATE packages
SET readme=%s
SET readme_needs_to_be_processed=true
, readme_raw=%s
, readme_type=%s
WHERE package_manager=%s
AND name=%s
''', ( markdown.marky(full['readme'])
, full['readme']
, 'x-markdown/npm'
''', ( full['readme']
, 'x-markdown/marky'
, dirty.package_manager
, dirty.name
))

return fetch


def Processor(db):
def process(dirty):
"""Processes the readme for a single page.
"""
log(dirty.name)
raw = db.one( 'SELECT readme_raw FROM packages '
'WHERE package_manager=%s and name=%s and readme_needs_to_be_processed'
, (dirty.package_manager, dirty.name)
)
if raw is None:
return
processed = markdown.marky(raw)
db.run('''
UPDATE packages
SET readme=%s
, readme_needs_to_be_processed=false
WHERE package_manager=%s
AND name=%s
''', ( processed
, dirty.package_manager
, dirty.name
))

return sync
return process


def fetch(db):
dirty = db.all('SELECT package_manager, name '
'FROM packages WHERE readme_raw IS NULL '
'ORDER BY package_manager DESC, name DESC')
threaded_map(Fetcher(db), dirty, 4)


def sync_all(db):
dirty = db.all('SELECT package_manager, name FROM packages WHERE readme_raw IS NULL '
def process(db):
dirty = db.all('SELECT id, package_manager, name, description, readme_raw '
'FROM packages WHERE readme_needs_to_be_processed'
'ORDER BY package_manager DESC, name DESC')
threaded_map(Syncer(db), dirty, 4)
threaded_map(Processor(db), dirty, 4)
16 changes: 12 additions & 4 deletions gratipay/package_managers/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def log_stats():


def upsert(args):
"""Take a CSV file from stdin and load it into Postgres.
"""
from gratipay import wireup
db = wireup.db(wireup.env())
fp = open(args.path)
Expand Down Expand Up @@ -128,19 +130,25 @@ def upsert(args):
""")


def readmes(args):
def fetch_readmes(args):
from gratipay import wireup
db = wireup.db(wireup.env())
_readmes.fetch(db)


def process_readmes(args):
from gratipay import wireup
db = wireup.db(wireup.env())
_readmes.sync_all(db)
_readmes.process(db)


def parse_args(argv):
p = argparse.ArgumentParser()
p.add_argument('command', choices=['serialize', 'upsert', 'readmes'])
p.add_argument('command', choices=['serialize', 'upsert', 'fetch-readmes', 'process-readmes'])
p.add_argument('path', help='the path to the input file', nargs='?', default='/dev/stdin')
return p.parse_args(argv)


def main(argv=sys.argv):
args = parse_args(argv[1:])
globals()[args.command](args)
globals()[args.command.replace('-', '_')](args)
6 changes: 6 additions & 0 deletions sql/branch.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
BEGIN;
ALTER TABLE packages ALTER COLUMN readme DROP NOT NULL;
ALTER TABLE packages ALTER COLUMN readme SET DEFAULT NULL;
UPDATE packages SET readme=NULL;
ALTER TABLE packages ADD COLUMN readme_needs_to_be_processed bool NOT NULL DEFAULT true;
END;
38 changes: 34 additions & 4 deletions tests/py/test_npm_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ def test_sn_handles_empty_description_and_emails(self):
assert package.emails == []


# rs - readmes.Syncer
# rf - readmes.Fetcher

def test_rs_syncs_a_readme(self):
def test_rf_fetches_a_readme(self):
self.db.run("INSERT INTO packages (package_manager, name, description, emails) "
"VALUES ('npm', 'foo-package', 'A package', ARRAY[]::text[])")

Expand All @@ -114,12 +114,42 @@ class DirtyPackage:
def fetch(name):
return {'name': 'foo-package', 'readme': '# Greetings, program!'}

readmes.Syncer(self.db)(DirtyPackage(), fetch=fetch)
readmes.Fetcher(self.db)(DirtyPackage(), fetch=fetch)

package = self.db.one('SELECT * FROM packages')
assert package.name == 'foo-package'
assert package.description == 'A package'
assert package.readme == None
assert package.readme_needs_to_be_processed
assert package.readme_raw == '# Greetings, program!'
assert package.readme_type == 'x-markdown/marky'
assert package.emails == []


# rp - readmes.Processor

def test_rp_processes_a_readme(self):
self.db.run('''
INSERT
INTO packages (package_manager, name, description, readme_raw, readme_type, emails)
VALUES ('npm', 'foo-package', 'A package', '# Greetings, program!', 'x-markdown/marky',
ARRAY[]::text[])
''')

class DirtyPackage:
package_manager = 'npm'
name = 'foo-package'

readmes.Processor(self.db)(DirtyPackage())

package = self.db.one('SELECT * FROM packages')
assert package.name == 'foo-package'
assert package.description == 'A package'
assert package.readme == '<h1><a id="user-content-greetings-program" class="deep-link" href="#greetings-program"><svg aria-hidden="true" class="deep-link-icon" height="16" version="1.1" width="16"><path d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Greetings, program!</h1>\n'
assert not package.readme_needs_to_be_processed
assert package.readme_raw == '# Greetings, program!'
assert package.readme_type == 'x-markdown/npm'
assert package.readme_type == 'x-markdown/marky'
assert package.emails == []

0 comments on commit 99b8544

Please sign in to comment.