First Tab Working

NYCPlanning · Nov 22, 2024 · b859aca · b859aca
1 parent 95805f5
commit b859aca
Show file tree

Hide file tree

Showing 8 changed files with 389 additions and 3 deletions.
diff --git a/dcpy/lifecycle/package/abstract_doc.py b/dcpy/lifecycle/package/abstract_doc.py
@@ -0,0 +1,165 @@
+from pydantic import BaseModel
+from typing import Any
+
+from dcpy.models.product.dataset.metadata_v2 import Dataset
+from dcpy.models.product.data_dictionary import DataDictionary, FieldSet
+from dcpy.utils.logging import logger
+
+
+class Font(BaseModel):
+    size: float | None = None
+    rgb: str | None = None
+    italic: bool = False
+    bold: bool = False
+
+
+class CellStyle(BaseModel):
+    font: Font = Font()
+    borders: list[str] | None = None
+    text_alignment_vertical: str | None = None
+
+
+class Cell(BaseModel):
+    value: Any | list["Cell"]  # can be a value or inline cells
+    style: CellStyle = CellStyle()
+
+
+# might not be needed? Probably just use inline cells
+class ValueWithDescription(Cell):
+    """e.g.
+    <b>Dataset Tags</b>
+    <i>my long-winded explanation of dataset tags here...</i>
+    """
+
+    value: str
+    description: str
+
+
+class Row(BaseModel):
+    cells: list[Cell]
+    merge_cells: bool = False
+    height: float | None = None
+
+
+class Table(BaseModel):
+    title: str
+    subtitle: str | None
+    description: str
+    rows: list[Row]
+    cell_widths: list[float | None] = []
+
+    def total_cols(self):
+        return max(len(r.cells) for r in self.rows)
+
+
+BLUE = "FF009DDC"
+TITLE_FONT_SIZE = 18.0
+
+
+def _make_title_subtitle_cell(title: str, subtitle: str):
+    return Cell(
+        style=CellStyle(text_alignment_vertical="bottom"),
+        value=[
+            Cell(
+                value=title + " - ",
+                style=CellStyle(
+                    font=Font(bold=True, size=TITLE_FONT_SIZE),
+                ),
+            ),
+            Cell(
+                value=subtitle,
+                style=CellStyle(
+                    font=Font(bold=True, rgb=BLUE, size=TITLE_FONT_SIZE),
+                ),
+            ),
+        ],
+    )
+
+
+def make_table(
+    *,
+    title: str,
+    subtitle: str,
+    description: str,
+    fields: list[str],
+    dataset: Dataset,
+    data_dict: DataDictionary,
+) -> Table:
+    rows = []
+    attributes = dataset.attributes.model_dump()
+
+    for f in fields:
+        dd_field = data_dict.dataset.fields[f]
+        value = attributes.get(f)
+        if value is None:
+            logger.warning(f"Metadata field is empty for {f}")
+        if type(value) is list:
+            value = ", ".join(value)
+
+        description_paragraphs = [
+            dd_field.extra_description,
+            dd_field.custom.get("oti_extra_notes"),
+        ]
+
+        rows.append(
+            Row(
+                cells=[
+                    Cell(  # <b>field title \n summary<b> for each field
+                        value=[
+                            Cell(
+                                value="\n" + dd_field.summary + "\n",
+                                style=CellStyle(
+                                    font=Font(bold=True, size=11),
+                                ),
+                            ),
+                            Cell(
+                                value=(
+                                    "".join(
+                                        [
+                                            p + "\n"
+                                            for p in description_paragraphs
+                                            if p is not None
+                                        ]
+                                    )
+                                ),
+                                style=CellStyle(
+                                    font=Font(size=9, italic=True),
+                                ),
+                            ),
+                        ],
+                    ),
+                    Cell(  # Value
+                        value=value,
+                        style=CellStyle(font=Font(italic=True)),
+                    ),
+                ]
+            )
+        )
+
+    return Table(
+        title=title,
+        subtitle=subtitle,
+        cell_widths=[50, 80],
+        description=description,
+        rows=[
+            Row(
+                merge_cells=True,
+                height=30,
+                cells=[_make_title_subtitle_cell(title, subtitle)],
+            ),
+            Row(
+                merge_cells=True,
+                height=50,
+                cells=[
+                    Cell(
+                        value=description,
+                        style=CellStyle(
+                            text_alignment_vertical="center",
+                            font=Font(italic=True, rgb=BLUE),
+                        ),
+                    ),
+                ],
+            ),
+            *rows,
+        ],
+    )
diff --git a/dcpy/lifecycle/package/oti_xlsx.py b/dcpy/lifecycle/package/oti_xlsx.py
@@ -1,11 +1,17 @@
+from copy import copy
 import openpyxl  # type: ignore
-from openpyxl.styles import Border, Side, Alignment, Font  # type: ignore
+from openpyxl.styles import NamedStyle, Border, Side, Alignment, Font  # type: ignore
+from openpyxl.cell.text import InlineFont
+from openpyxl.cell.rich_text import TextBlock, CellRichText
+from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder
+from openpyxl.utils import get_column_letter
 from pathlib import Path
 from tabulate import tabulate  # type: ignore
 import typer
 
 
 from dcpy.models.product.dataset import metadata_v2 as md_v2
+from . import abstract_doc
 from dcpy.utils.logging import logger
 
 from . import RESOURCES_PATH
@@ -92,7 +98,7 @@ def _set_default_style(cell, *, is_rightmost=True, is_last_row=False):
     border_side_thin = Side(border_style="thin", color="000000")
     border_side_medium = Side(border_style="medium", color="000000")
 
-    cell.alignment = Alignment(wrapText=True, vertical="top")
+    cell.alignment = Alignment(wrapText=True, vertical="center")
     cell.border = Border(
         top=border_side_thin,
         left=border_side_thin,
@@ -174,6 +180,98 @@ def _write_change_history(xlsx_wb: openpyxl.Workbook, change_log: list[list[str]
         _format_row_slice(row_slice, is_last_row=idx == (len(change_log) - 1))
 
 
+def _abstract_style_to_xlsx(c: abstract_doc.CellStyle):
+    return {
+        k: v
+        for k, v in {
+            "rFont": "Arial",
+            "color": c.font.rgb,
+            "b": c.font.bold,
+            "sz": c.font.size,
+            "i": c.font.italic,
+        }.items()
+        if v
+    }
+
+
+def _to_human_readable_val(v) -> str:
+    if type(v) is bool:
+        return "Yes" if v else "No"
+    else:
+        return str(v)
+
+
+def generate_table(
+    xlsx_wb: openpyxl.Workbook,
+    table: abstract_doc.Table,
+    *,
+    tab_name: str,
+    tab_index: int = -1,
+    table_row_start_index=1,
+):
+    new_sheet = xlsx_wb.create_sheet(title=tab_name, index=tab_index)
+    new_sheet.sheet_view.showGridLines = False
+
+    new_sheet.insert_rows(table_row_start_index, len(table.rows))
+    new_sheet.insert_cols(1, table.total_cols() - 1)
+    new_sheet_rows = [r for r in new_sheet.rows]
+
+    # Set Column Widths when specified
+    dim_holder = DimensionHolder(worksheet=new_sheet)
+    for idx, col in enumerate(range(new_sheet.min_column, new_sheet.max_column + 1)):
+        col_dim = ColumnDimension(new_sheet, min=col, max=col)
+
+        maybe_width = table.cell_widths[idx] if len(table.cell_widths) > idx else None
+        if maybe_width:
+            col_dim.width = maybe_width
+
+        dim_holder[get_column_letter(col)] = col_dim
+    new_sheet.column_dimensions = dim_holder
+
+    for r_idx, r in enumerate(table.rows):
+        row = new_sheet_rows[r_idx]
+        if r.merge_cells:
+            # for merged cells, just format the top-leftmost cell
+            _format_row_slice(row[0:1], is_last_row=r_idx == len(table.rows) - 1)
+            new_sheet.merge_cells(
+                start_row=r_idx + 1,
+                end_row=r_idx + 1,
+                start_column=1,
+                end_column=table.total_cols(),
+            )
+        else:
+            _format_row_slice(row, is_last_row=r_idx == len(table.rows) - 1)
+
+        if r.height:
+            new_sheet.row_dimensions[r_idx + 1].height = r.height
+
+        for c_idx, c in enumerate(r.cells):
+            if type(c.value) is list:
+                # Inline Cells
+                cell = CellRichText(
+                    [
+                        TextBlock(
+                            InlineFont(**_abstract_style_to_xlsx(ic.style)),
+                            _to_human_readable_val(ic.value),
+                        )
+                        for ic in c.value
+                    ]
+                )
+                row[c_idx].value = cell
+            else:
+                row[c_idx].value = CellRichText(
+                    TextBlock(
+                        InlineFont(**_abstract_style_to_xlsx(c.style)),
+                        _to_human_readable_val(c.value),
+                    )
+                )
+
+            if c.style.text_alignment_vertical:
+                alignment = copy(row[c_idx].alignment)
+                alignment.vertical = c.style.text_alignment_vertical
+                row[c_idx].alignment = alignment
+
+
 def write_oti_xlsx(
     *,
     dataset: md_v2.Dataset,

diff --git a/dcpy/models/product/data_dictionary.py b/dcpy/models/product/data_dictionary.py
@@ -0,0 +1,17 @@
+from .dataset.metadata_v2 import CustomizableBase
+from dcpy.models.base import TemplatedYamlReader
+
+
+class FieldDefinition(CustomizableBase):
+    summary: str
+    extra_description: str
+
+
+class FieldSet(CustomizableBase):
+    fields: dict[str, FieldDefinition] = {}
+
+
+class DataDictionary(CustomizableBase, TemplatedYamlReader):
+    org: FieldSet = FieldSet()
+    product: FieldSet = FieldSet()
+    dataset: FieldSet = FieldSet()
diff --git a/dcpy/models/product/dataset/metadata_v2.py b/dcpy/models/product/dataset/metadata_v2.py
@@ -142,19 +142,23 @@ class DatasetOrgProductAttributesOverride(CustomizableBase):
     """Fields that might be set as a default at the Product/Org level."""
 
     agency: str | None = None
+    agency_website_data_updated_automatically: bool | None = None
     attribution: str | None = None
-    attributionLink: str | None = None
+    attribution_link: str | None = None
+    can_be_automated: bool | None = None
     category: str | None = None
     contact_email: str | None = None
     contains_address: bool | None = (
         None  # `contains_address` refers specifically to addresses containing house, numbers + street names. (ie. not just streets, polys, etc.)
     )
     date_made_public: str | None = None
+    on_agency_website: bool | None = None
     potential_uses: str | None = None
     projection: str | None = None
     publishing_frequency: str | None = None  # TODO: picklist values
     publishing_frequency_details: str | None = None
     publishing_purpose: str | None = None
+    rows_removed: bool | None = None
     tags: List[str] | None = []
 
 

diff --git a/dcpy/models/product/metadata.py b/dcpy/models/product/metadata.py
@@ -5,6 +5,7 @@
 import yaml
 
 from dcpy.models.base import SortedSerializedBase, YamlWriter, TemplatedYamlReader
+from dcpy.models.product.data_dictionary import DataDictionary
 from dcpy.models.product.dataset.metadata_v2 import (
     Metadata as DatasetMetadata,
     DatasetColumn,
@@ -122,6 +123,7 @@ class OrgMetadata(SortedSerializedBase, extra="forbid"):
     template_vars: dict = Field(default_factory=dict)
     metadata: OrgMetadataFile
     column_defaults: dict[tuple[str, str], DatasetColumn]
+    data_dictionary: DataDictionary = DataDictionary()
 
     @classmethod
     def get_string_snippets(cls, path: Path) -> dict:
@@ -148,13 +150,17 @@ def get_column_defaults(cls, path: Path) -> dict[tuple[str, str], DatasetColumn]
     @classmethod
     def from_path(cls, path: Path, template_vars: dict | None = None):
         template_vars = merge(cls.get_string_snippets(path), template_vars or {}) or {}
+        dd_default_path = path / "data_dictionary.yml"
         return OrgMetadata(
             root_path=path,
             metadata=OrgMetadataFile.from_path(
                 path / "metadata.yml", template_vars=template_vars
             ),
             template_vars=template_vars,
             column_defaults=cls.get_column_defaults(path),
+            data_dictionary=DataDictionary.from_path(dd_default_path)
+            if dd_default_path.exists()
+            else DataDictionary(),
         )
 
     def product(self, name: str) -> ProductMetadata:
@@ -180,6 +186,10 @@ def validate_metadata(self) -> dict[str, dict[str, list[str]]]:
                 }
         return product_errors
 
+    def describe_field(self, field_path: str, *, product: str, dataset: str):
+        # field_path could be like attributes.tags, or fields.bbl
+        pass
+
     def query_dataset_destinations(
         self, tag: str
     ) -> list[ProductDatasetDestinationKey]: