This repository has been archived by the owner on Feb 8, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 308
/
upsert.py
54 lines (41 loc) · 2 KB
/
upsert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# -*- coding: utf-8 -*-
"""Subcommand for upserting data from a CSV into Postgres.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import uuid
# Coordinate with Postgres on how to say "NULL".
# ==============================================
# We can't use the default, which is the empty string, because then we can't
# easily store the empty string itself. We don't want to use something that a
# package author could maliciously or mischieviously take advantage of to
# indicate a null we don't want. If we use a uuid it should be hard enough to
# guess, made harder in that it will change for each processing run.
NULL = uuid.uuid4().hex
def upsert(env, args, db):
fp = open(args.path)
with db.get_cursor() as cursor:
assert cursor.connection.encoding == 'UTF8'
cursor.run("CREATE TEMP TABLE updates (LIKE packages INCLUDING ALL) ON COMMIT DROP")
cursor.copy_expert('COPY updates (package_manager, name, description, emails) '
"FROM STDIN WITH (FORMAT csv, NULL '%s')" % NULL, fp)
cursor.run("""
WITH updated AS (
UPDATE packages p
SET package_manager = u.package_manager
, description = u.description
, emails = u.emails
FROM updates u
WHERE p.name = u.name
RETURNING p.name
)
INSERT INTO packages(package_manager, name, description, emails)
SELECT package_manager, name, description, emails
FROM updates u LEFT JOIN updated USING(name)
WHERE updated.name IS NULL
GROUP BY u.package_manager, u.name, u.description, u.emails
""")
def main(env, args, db, sentrified):
"""Take a CSV file from stdin and load it into Postgres using an `ingenious algorithm`_.
.. _ingenious algorithm: http://tapoueh.org/blog/2013/03/15-batch-update.html
"""
sentrified(upsert)(env, args, db)