Skip to content

Commit

Permalink
[INS-62] Update Vault-PKI to receive/write certs
Browse files Browse the repository at this point in the history
This commit updates Vault-PKI to rely less on the salt reactor for
processing a signed certificate request. Instead of the response
being sent back out of band, and having another salt run to activate,
the Vault-PKI now waits and watches the event bus waiting for a return
message and then reacts internally. This simplifies the overall
execution strategy for this module, and allows for blocking and state
dependency management which has not been possible up to this point.
  • Loading branch information
jfryman committed Jan 31, 2019
1 parent d382425 commit b525884
Show file tree
Hide file tree
Showing 41 changed files with 2,867 additions and 45 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,4 @@ ENV/

# Rope project settings
.ropeproject
.#*
175 changes: 175 additions & 0 deletions cert/files/vault_pki.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
the former into the new keys version directory.
- Send the CSR in a Salt event call to the Salt master onward
to be signed (with a return path to write the certificate at).
- Wait for signed CA from the Salt Master
- Write certificate to disk and activate new version
- Otherwise if certificate is in OK, log that and exit.
activate
Expand Down Expand Up @@ -118,9 +120,11 @@
import os
import platform
import re
import socket
import stat
import subprocess
import sys
import threading

from cryptography import x509
from cryptography.hazmat.backends import default_backend
Expand All @@ -131,6 +135,8 @@
import six

from salt import client as salt_client
from salt import config as salt_config
from salt.utils import event as salt_event

OWNER_UID = 0
ACCESS_GROUP = 'cert-access'
Expand Down Expand Up @@ -179,6 +185,13 @@
}

SALT_EVENT_TAG = 'request/sign'
SALT_EVENT_RESPONSE_TAG = 'request/certificate'
SALT_EVENT_WAIT_TIME = 120
SALT_MINION_CONFIG = '/etc/salt/minion'
SALT_SOCKET_DIR = '/var/run/salt'
SALT_EVENT_TRANSPORT = 'zeromq'

NODE_FQDN = os.getenv('SALT_MINION_NAME', platform.node())

logger = logging.getLogger(__file__)

Expand All @@ -198,6 +211,37 @@ class GenerationError(Exception):
pass


class FakeVersionArgParser(object):
"""Craft an object appropriate to be used with 'activate_main' internally"""
def __init__(self, version):
self.version = [version]


def quit_function(fn_name):
# print to stderr, unbuffered in Python 2.
print('{0} took too long'.format(fn_name), file=sys.stderr)
sys.stderr.flush() # Python 3 stderr is likely buffered.
thread.interrupt_main() # raises KeyboardInterrupt


def exit_after(s):
'''
use as decorator to exit process if
function takes longer than s seconds
'''
def outer(fn):
def inner(*args, **kwargs):
timer = threading.Timer(s, quit_function, args=[fn.__name__])
timer.start()
try:
result = fn(*args, **kwargs)
finally:
timer.cancel()
return result
return inner
return outer


def _setup_directory(dir_path, mode, owner_uid, group_gid):
"""Ensure a given directory exists and conforms to expected settings."""
try:
Expand Down Expand Up @@ -386,6 +430,83 @@ def send_cert_request(event_tag, new_version, dest_cert_path, csr):
path=dest_cert_path)


def _wait_for_signed_cert_request(func):
"""Capture signed certificate from event bus"""
return func()


@exit_after(SALT_EVENT_WAIT_TIME)
def _minion_event(opts={}):
"""Class to wait for cert via get_event"""
opts['id'] = NODE_FQDN
opts['node'] = 'minion'
opts['transport'] = SALT_EVENT_TRANSPORT
opts['sock_dir'] = os.path.join(SALT_SOCKET_DIR, opts['node'])
event = salt_event.get_event(
opts['node'],
sock_dir=opts['sock_dir'],
transport=opts['transport'],
opts=opts,
listen=True)

while True:
ret = event.get_event(full=True)
if ret is None:
logger.debug('[_minion_event] No event data in packet')
continue
data = ret.get('data', False)
if data and ret['tag'] == SALT_EVENT_RESPONSE_TAG:
if _job_contains_cert_data(data):
logger.debug('[_minion_event] Job contains cert data!')
return data['data']
else:
logger.debug('[_minion_event] Job does not contains cert data. :(')
continue


def _job_contains_cert_data(data):
"""Boolean checks to ensure any received job message contains return cert data"""
if data is None:
return False

if 'cert' in data['data']:
return True
else:
return False


def _get_certificate_id(cert_data):
"""Extract the latest ID from certificate response"""
cert_path = cert_data["cert_path"]
index = cert_path.split("/")[5]
if index.isdigit():
return index
else:
return False


def _write_certificate_data(cert, cert_path, ca, ca_path):
"""Write out certificate data to filesystem"""

cert_write = _write_file(cert, cert_path)
ca_write = _write_file(ca, ca_path)

if cert_write and ca_write:
return True
else:
return False


def _write_file(contents, path):
"""Try to write file to filesystem"""
try:
with open(path, 'w') as f:
f.write(contents)
return True
except IOError:
return False


def _atomic_link_switch(source, destination):
"""Does an atomic symlink swap by overwriting the destination symlink.
Expand Down Expand Up @@ -417,6 +538,7 @@ def _activate_version(version_str, live_dir):
live_pkcs8_key_path = os.path.join(live_dir, PKCS8_KEY_FILENAME)
live_cert_path = os.path.join(live_dir, CERT_FILENAME)
live_chain_path = os.path.join(live_dir, FULLCHAIN_FILENAME)
current_path = os.path.join(live_dir, 'current')
(cert_path,
chain_path,
key_path,
Expand All @@ -426,6 +548,9 @@ def _activate_version(version_str, live_dir):
_atomic_link_switch(pkcs8_key_path, live_pkcs8_key_path)
_atomic_link_switch(cert_path, live_cert_path)
_atomic_link_switch(chain_path, live_chain_path)

# Ensure last to assert all atomic link switching has happened
_write_file(version_str, current_path)
except ActivationError:
logger.critical(
'Failed to activate "{}"!'.format(
Expand All @@ -438,6 +563,7 @@ def _activate_version(version_str, live_dir):
def _activate_version_with_rollback(version_str, live_dir):
"""Activate a cert/key version but rollback to if errors occur.
Records the current cert/key version before activation and if it
is sane attempts to restore it in the event of an error occuring.
"""
Expand Down Expand Up @@ -680,10 +806,59 @@ def checkgen_main(args):
csr)
if not sent_ok:
logger.error('Error sending CSR to salt master!')
sys.exit(1)

certificate_data = _wait_for_signed_cert_request(_minion_event)
if not certificate_data:
logger.error('Did not receive certificate from salt master')
sys.exit(1)

certificate_id = _get_certificate_id(certificate_data)
if not certificate_id:
logger.error('Error retrieving certificate ID')
sys.exit(1)

cert = certificate_data['cert']
cert_path = certificate_data['cert_path']
ca = certificate_data['fullchain']
ca_path = certificate_data['fullchain_path']

write_certificates = _write_certificate_data(cert, cert_path, ca, ca_path)
if write_certificates:
# TODO call self.activate with certificate ID
args = FakeVersionArgParser(certificate_id)
activate_main(args)
else:
logger.error('Error writing certificates to disk')
else:
logger.info('Cert Status: OK.')


def checkvalid_main(args):
"""Function to quickly check if a certificate update is needed
Only runs a small part of the checkgen_main script in order to test
for certificate validity. This is used to ensure state in configuration
management tooling.
"""
fqdn = NODE_FQDN
if not fqdn:
raise SetupError('Missing FQDN!')
try:
group_info = grp.getgrnam(ACCESS_GROUP)
group_gid = group_info.gr_gid
except KeyError:
raise SetupError('Missing group: {}'.format(ACCESS_GROUP))

format_settings = {'base': BASE_DIR, 'fqdn': fqdn}
live_dir = LIVE_DIR.format(**format_settings)
cert_path = os.path.join(live_dir, CERT_FILENAME)
if new_cert_needed(cert_path):
sys.exit(1)
else:
sys.exit(0)


def list_main(args):
fqdn = platform.node()
if not fqdn:
Expand Down
4 changes: 3 additions & 1 deletion cert/init.sls
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,17 @@ install python cryptography module:
- group: root
- mode: 0755

run vault_pki to get initial cert:
run vault_pki:
cmd.run:
- name: /usr/local/bin/vault_pki checkgen
- unless: /usr/local/bin/vault_pki checkvalid
- require:
- group: setup new cert-access group
- pkg: install crypto dependencies
- pip: install python cryptography module
- file: /usr/local/bin/vault_pki


checkgen_cert:
cron.present:
- name: (/usr/local/bin/vault_pki list ; /usr/local/bin/vault_pki checkgen ; /usr/local/bin/vault_pki list) 2>&1 | logger -t vault_pki
Expand Down
33 changes: 0 additions & 33 deletions cert/react_activate_cert.sls

This file was deleted.

11 changes: 0 additions & 11 deletions orch/vault_pki_orch.sls
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,3 @@ push_signed_cert:
csr: |
{{ payload['csr']|indent(8, false) }}
path: {{ payload['path'] }}

activate_new_version:
salt.state:
- tgt: {{ target }}
- sls:
- cert.react_activate_cert
- pillar:
# beware version being converted to a number
version: "{{ payload['version'] }}"
- require:
- salt: push_signed_cert
5 changes: 5 additions & 0 deletions test/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.vagrant
intermediate.*
vault-pki-policy.*
*.swp
.#*
Loading

0 comments on commit b525884

Please sign in to comment.