Skip to content

DevDB - Data Sync and Geocoding of HPD and DOB Data #132

DevDB - Data Sync and Geocoding of HPD and DOB Data

DevDB - Data Sync and Geocoding of HPD and DOB Data #132

name: DevDB - Data Sync and Geocoding of HPD and DOB Data
on:
schedule:
- cron: "0 0 * * MON"
workflow_dispatch:
inputs:
geosupport_docker_version:
description: "Geosupport docker version"
required: true
default: latest
jobs:
sync:
name: Sync HPD Data, DOB BIS Applications
runs-on: ubuntu-22.04
container:
image: nycplanning/build-geosupport:latest
env:
RECIPES_BUCKET: edm-recipes
strategy:
matrix:
dataset:
- dob_permitissuance
- dob_jobapplications
- hpd_hny_units_by_building
steps:
- uses: actions/checkout@v4
- name: Load Secrets
uses: 1password/load-secrets-action@v1
with:
export-env: true
env:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
AWS_S3_ENDPOINT: "op://Data Engineering/DO_keys/AWS_S3_ENDPOINT"
AWS_SECRET_ACCESS_KEY: "op://Data Engineering/DO_keys/AWS_SECRET_ACCESS_KEY"
AWS_ACCESS_KEY_ID: "op://Data Engineering/DO_keys/AWS_ACCESS_KEY_ID"
BUILD_ENGINE_SERVER: "op://Data Engineering/EDM_DATA/server_url"
- name: Finish container setup ...
run: ./bash/docker_container_setup.sh
- name: Library Archive
run: library archive --name ${{ matrix.dataset }} --latest --s3
geocode:
name: Geocode HPD Data
needs: sync
runs-on: ubuntu-22.04
container:
image: nycplanning/build-geosupport:${{ inputs.geosupport_docker_version || 'latest' }}
defaults:
run:
shell: bash
working-directory: products/developments
env:
RECIPES_BUCKET: edm-recipes
BUILD_ENGINE: postgresql://postgres:postgres@postgis:5432/postgres
services:
postgis:
image: postgis/postgis:15-3.3-alpine
env:
POSTGRES_PASSWORD: postgres
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
steps:
- uses: actions/checkout@v4
- name: Load Secrets
uses: 1password/load-secrets-action@v1
with:
export-env: true
env:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
AWS_S3_ENDPOINT: "op://Data Engineering/DO_keys/AWS_S3_ENDPOINT"
AWS_SECRET_ACCESS_KEY: "op://Data Engineering/DO_keys/AWS_SECRET_ACCESS_KEY"
AWS_ACCESS_KEY_ID: "op://Data Engineering/DO_keys/AWS_ACCESS_KEY_ID"
BUILD_ENGINE_SERVER: "op://Data Engineering/EDM_DATA/server_url"
- name: Finish container setup ...
working-directory: ./
run: ./bash/docker_container_setup.sh
- name: Load to Database
run: |
./devdb.sh import hpd_hny_units_by_building ${{ github.event.inputs.version }}
./devdb.sh import hpd_historical_units_by_building
- name: Geocode
# Also combines the imported DOB datasets
run: |
python3 python/geocode_hpd_hny.py
python3 python/geocode_hpd_historical.py
- name: Export to csv
run: |
./devdb.sh output hny_geocode_results csv
./devdb.sh output hpd_historical_geocode_results csv
- name: Check file existence
run: ls *.csv
- name: Archive to Data Library
run: |
./devdb.sh library_archive hny_geocode_results hpd_hny_units_by_building
./devdb.sh library_archive hpd_historical_geocode_results hpd_historical_units_by_building
create_issue_on_failure:
needs: geocode
runs-on: ubuntu-22.04
if: ${{ failure() && (github.event_name == 'schedule') }}
steps:
- uses: actions/checkout@v4
with:
sparse-checkout: .github
- name: Create issue on failure
uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ACTION: ${{ github.workflow }}
BUILD_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
with:
filename: .github/ISSUE_TEMPLATE/scheduled_action_failure.md