Skip to content
This repository has been archived by the owner on Feb 8, 2018. It is now read-only.

Commit

Permalink
Merge pull request #4159 from gratipay/readmes
Browse files Browse the repository at this point in the history
Here's a first crack at syncing readmes
  • Loading branch information
chadwhitacre authored Oct 27, 2016
2 parents 0d45c20 + 2d7c8c3 commit 12d86e2
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 33 deletions.
3 changes: 1 addition & 2 deletions bin/sync-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,4 @@ pip install -e .
# Sync with npm.
# ==============

URL=https://registry.npmjs.com/-/all
curl $URL | sync-npm serialize /dev/stdin | sync-npm upsert /dev/stdin
curl https://registry.npmjs.com/-/all | sync-npm serialize | sync-npm upsert
24 changes: 1 addition & 23 deletions gratipay/billing/payday.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import os
import itertools
from multiprocessing.dummy import Pool as ThreadPool

import braintree

Expand All @@ -24,35 +23,14 @@
)
from gratipay.exceptions import NegativeBalance
from gratipay.models import check_db
from gratipay.utils.threaded_map import threaded_map
from psycopg2 import IntegrityError


with open(os.path.join(os.path.dirname(__file__), '../../sql/payday.sql')) as f:
PAYDAY = f.read()


class ExceptionWrapped(Exception): pass


def threaded_map(func, iterable, threads=5):
pool = ThreadPool(threads)
def g(*a, **kw):
# Without this wrapper we get a traceback from inside multiprocessing.
try:
return func(*a, **kw)
except Exception as e:
import traceback
raise ExceptionWrapped(e, traceback.format_exc())
try:
r = pool.map(g, iterable)
except ExceptionWrapped as e:
print(e.args[1])
raise e.args[0]
pool.close()
pool.join()
return r


class NoPayday(Exception):
__str__ = lambda self: "No payday found where one was expected."

Expand Down
59 changes: 59 additions & 0 deletions gratipay/package_managers/readmes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import sys

import requests

from gratipay.utils import markdown
from gratipay.utils.threaded_map import threaded_map
from threading import Lock


log_lock = Lock()

def log(*a, **kw):
with log_lock:
print(*a, file=sys.stderr, **kw)


def http_fetch(package_name):
r = requests.get('https://registry.npmjs.com/' + package_name)
if r.status_code != 200:
log(r.status_code, 'for', package_name)
return None
return r.json()


def Syncer(db):
def sync(dirty, fetch=http_fetch):
"""Update all info for one package.
"""
log(dirty.name)
full = fetch(dirty.name)
if not full:
return # try again later
assert full['name'] == dirty.name

db.run('''
UPDATE packages
SET readme=%s
, readme_raw=%s
, readme_type=%s
WHERE package_manager=%s
AND name=%s
''', ( markdown.marky(full['readme'])
, full['readme']
, 'x-markdown/npm'
, dirty.package_manager
, dirty.name
))

return sync


def sync_all(db):
dirty = db.all('SELECT package_manager, name FROM packages WHERE readme_raw IS NULL '
'ORDER BY package_manager DESC, name DESC')
threaded_map(Syncer(db), dirty, 10)
17 changes: 9 additions & 8 deletions gratipay/package_managers/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import uuid

import ijson.backends.yajl2_cffi as ijson
from gratipay.package_managers import readmes as _readmes


log = lambda *a: print(*a, file=sys.stderr)
Expand Down Expand Up @@ -121,16 +122,16 @@ def upsert(args):
""")


def readmes(args):
from gratipay import wireup
db = wireup.db(wireup.env())
_readmes.sync_all(db)


def parse_args(argv):
p = argparse.ArgumentParser()
p.add_argument('command', choices=['serialize', 'upsert'])
p.add_argument('path', help="the path to the input file")
p.add_argument( '-i', '--if_modified_since'
, help='a number of minutes in the past, past which we would like to see new '
'updates (only meaningful for `serialize`; -1 means all!)'
, type=int
, default=-1
)
p.add_argument('command', choices=['serialize', 'upsert', 'readmes'])
p.add_argument('path', help='the path to the input file', nargs='?', default='/dev/stdin')
return p.parse_args(argv)


Expand Down
2 changes: 2 additions & 0 deletions gratipay/utils/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,7 @@ def render(markdown):
def marky(markdown):
"""Process markdown the same way npm does.
"""
if type(markdown) is unicode:
markdown = markdown.encode('utf8')
marky = Popen(("marky-markdown", "/dev/stdin"), stdin=PIPE, stdout=PIPE)
return Markup(marky.communicate(markdown)[0])
26 changes: 26 additions & 0 deletions gratipay/utils/threaded_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from __future__ import absolute_import, division, print_function, unicode_literals

from multiprocessing.dummy import Pool as ThreadPool


class ExceptionWrapped(Exception):
pass


def threaded_map(func, iterable, threads=5):
pool = ThreadPool(threads)
def g(*a, **kw):
# Without this wrapper we get a traceback from inside multiprocessing.
try:
return func(*a, **kw)
except Exception as e:
import traceback
raise ExceptionWrapped(e, traceback.format_exc())
try:
r = pool.map(g, iterable)
except ExceptionWrapped as e:
print(e.args[1])
raise e.args[0]
pool.close()
pool.join()
return r
5 changes: 5 additions & 0 deletions sql/branch.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
BEGIN;
ALTER TABLE packages ALTER COLUMN readme_raw DROP NOT NULL;
ALTER TABLE packages ALTER COLUMN readme_raw SET DEFAULT NULL;
UPDATE packages SET readme_raw=NULL;
END;
26 changes: 26 additions & 0 deletions tests/py/test_npm_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from subprocess import Popen, PIPE

from gratipay.testing import Harness
from gratipay.package_managers import readmes


def load(raw):
Expand Down Expand Up @@ -80,3 +81,28 @@ def test_sn_handles_empty_description_and_emails(self):
assert package.name == 'empty-description'
assert package.description == ''
assert package.emails == []


# rs - readmes.Syncer

def test_rs_syncs_a_readme(self):
self.db.run("INSERT INTO packages (package_manager, name, description, emails) "
"VALUES ('npm', 'foo-package', 'A package', ARRAY[]::text[])")

class DirtyPackage:
package_manager = 'npm'
name = 'foo-package'

def fetch(name):
return {'name': 'foo-package', 'readme': '# Greetings, program!'}

readmes.Syncer(self.db)(DirtyPackage(), fetch=fetch)

package = self.db.one('SELECT * FROM packages')
assert package.name == 'foo-package'
assert package.description == 'A package'
assert package.readme == '<h1 id="user-content-greetings-program" class="deep-link">' \
'<a href="#greetings-program">Greetings, program!</a></h1>\n'
assert package.readme_raw == '# Greetings, program!'
assert package.readme_type == 'x-markdown/npm'
assert package.emails == []

0 comments on commit 12d86e2

Please sign in to comment.