Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add: update error messages and pydantic validators #276

Merged
merged 3 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions python/src/lib/workbook_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
from src.schemas.latest.schema import (SCHEMA_BY_PROJECT, BaseModel,
CoverSheetRow, LogicSheetVersion, ProjectType,
SubrecipientRow, Project1ARow, Project1BRow, Project1CRow)
from pydantic_core import ErrorDetails

type Errors = List[WorkbookError]

LOGIC_SHEET = "Logic"
COVER_SHEET = "Cover"
PROJECT_SHEET = "Project"
SUBRECIPIENTS_SHEET = "Subrecipients"
INITIAL_STARTING_ROW = 12

_logger = get_logger(__name__)

Expand Down Expand Up @@ -88,6 +90,26 @@ def get_project_use_code(cover_sheet: Worksheet, row_dict: Optional[Dict[str,str
defined in schema.py on a per-field basis, that contains the column for that particular field.
"""

def generate_error_text(
field_name: str,
error: ErrorDetails,
) -> str:
error_type = error["type"]
input = error["input"]
if error_type == 'missing' or input in [None, ""]:
if field_name == "project_use_code":
return f"EC code must be set"
else:
return f"Value is required for {field_name}"
elif error_type == 'string_too_long' or error_type == 'string_too_short':
return error["msg"].replace("String", field_name, 1)
elif error_type in ['decimal_whole_digits', 'decimal_max_places', 'decimal_type']:
return error["msg"].replace("Decimal", field_name, 1)
elif error_type in ['int_type', 'string_type', 'datetime_from_date_parsing', 'decimal_parsing', 'int_parsing']:
return error["msg"].replace("Input", field_name, 1)
else:
return f'Error in field {field_name}-{error["msg"]}'


def get_workbook_errors_for_row(
SheetModelClass: Type[Union[CoverSheetRow, Project1ARow, Project1BRow, Project1CRow, SubrecipientRow]], e: ValidationError, row_num: int, sheet_name: str
Expand Down Expand Up @@ -116,7 +138,7 @@ def get_workbook_errors_for_row(
_logger.error(f"Encountered unexpected exception while getting column for field {erroring_field_name} with error {error}. Details: {exception}")
erroring_column = "Unknown"

message = f'Error in field {erroring_field_name}-{error["msg"]}'
message = generate_error_text(erroring_field_name, error)
workbook_errors.append(
WorkbookError(
message=message,
Expand Down Expand Up @@ -266,27 +288,40 @@ def validate_cover_sheet(
def validate_project_sheet(project_sheet: Worksheet, project_schema: Type[Union[Project1ARow, Project1BRow, Project1CRow]]) -> Errors:
errors = []
project_headers = get_headers(project_sheet, "C3:DS3")
current_row = 12
current_row = INITIAL_STARTING_ROW
sheet_has_data = False
for project_row in project_sheet.iter_rows(
min_row=13, min_col=3, max_col=123, values_only=True
):
current_row += 1
if is_empty_row(project_row):
continue
sheet_has_data = True
row_dict = map_values_to_headers(project_headers, project_row)
try:
project_schema(**row_dict)
except ValidationError as e:
errors += get_workbook_errors_for_row(
project_schema, e, current_row, PROJECT_SHEET
)

if not sheet_has_data:
errors += [WorkbookError(
message="Upload doesn’t include any project records.",
row=INITIAL_STARTING_ROW,
col=0,
tab=PROJECT_SHEET,
field_name="",
severity=ErrorLevel.ERR.name,
)]

return errors


def validate_subrecipient_sheet(subrecipient_sheet: Worksheet) -> Errors:
errors = []
subrecipient_headers = get_headers(subrecipient_sheet, "C3:O3")
current_row = 12
current_row = INITIAL_STARTING_ROW
for subrecipient_row in subrecipient_sheet.iter_rows(
min_row=13, min_col=3, max_col=16, values_only=True
):
Expand Down
149 changes: 132 additions & 17 deletions python/src/schemas/latest/schema.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from datetime import datetime
from enum import Enum
from typing import Optional
from typing import Any, Optional

from pydantic import (BaseModel, ConfigDict, Field, condecimal, conint,
validator)
ValidationInfo, field_validator)


class StateAbbreviation(Enum):
Expand Down Expand Up @@ -230,22 +230,44 @@ class BaseProjectRow(BaseModel):
json_schema_extra={"column":"AQ"}
)

@validator(
@field_validator(
"Projected_Con_Start_Date__c",
"Projected_Con_Completion__c",
"Projected_Init_of_Operations__c",
"Actual_Con_Start_Date__c",
"Actual_Con_Completion__c",
"Actual_operations_date__c",
pre=True,
)
def parse_mm_dd_yyyy_dates(cls, value):
if isinstance(value, str):
@classmethod
def parse_mm_dd_yyyy_dates(cls, v):
if isinstance(v, str):
try:
return datetime.strptime(value, "%m/%d/%Y")
return datetime.strptime(v, "%m/%d/%Y")
except ValueError:
raise ValueError(f"Date {value} is not in 'mm/dd/yyyy' format.")
return value
raise ValueError(f"Date {v} is not in 'mm/dd/yyyy' format.")
return v

@field_validator(
"Project_Name__c",
"Identification_Number__c",
"Project_Description__c",
"Capital_Asset_Ownership_Type__c",
"Total_CPF_Funding_for_Project__c",
"Total_from_all_funding_sources__c",
"Current_Period_Obligation__c",
"Current_Period_Expenditure__c",
"Cumulative_Obligation__c",
"Cumulative_Expenditure__c",
"Cost_Overview__c",
"Project_Status__c",
)
@classmethod
def validate_field(cls, v: Any, info: ValidationInfo, **kwargs):
if isinstance(v, str) and v.strip == "":
raise ValueError(
f"Value is required for {info.field_name}"
)
return v


class Project1ARow(BaseProjectRow):
Expand Down Expand Up @@ -347,7 +369,27 @@ class Project1ARow(BaseProjectRow):
Affordable_Connectivity_Program_ACP__c: YesNoType = Field(
..., serialization_alias="Affordable Connectivity Program (ACP)?", json_schema_extra={"column":"BP"}
)

@field_validator(
"Technology_Type_Planned__c",
"Total_Miles_Planned__c",
"Locations_Served_Planned__c",
"X25_3_Mbps_or_below_Planned__c",
"X25_3_Mbps_and_100_20_Mbps_Planned__c",
"Minimum_100_100_Mbps_Planned__c",
"X100_20_Mbps_to_100_100_Mbps_Planned__c",
"Number_of_Locations_Planned__c",
"Housing_Units_Planned__c",
"Number_of_Bus_Locations_Planned__c",
"Number_of_CAI_Planned__c",
"Affordable_Connectivity_Program_ACP__c",
)
@classmethod
def validate_field(cls, v: Any, info: ValidationInfo, **kwargs):
if isinstance(v, str) and v.strip == "":
raise ValueError(
f"Value is required for {info.field_name}"
)
return v

class AddressFields(BaseModel):
Street_1_Planned__c: str = Field(
Expand Down Expand Up @@ -381,7 +423,18 @@ class AddressFields(BaseModel):
Zip_Code_Actual__c: Optional[str] = Field(
default=None, serialization_alias="Zip Code (Actual)", max_length=5, json_schema_extra={"column":"CA"}
)

@field_validator(
"Street_1_Planned__c",
"City_Planned__c",
"Zip_Code_Planned__c",
)
@classmethod
def validate_field(cls, v: Any, info: ValidationInfo, **kwargs):
if isinstance(v, str) and v.strip == "":
raise ValueError(
f"Value is required for {info.field_name}"
)
return v

class Project1BRow(BaseProjectRow, AddressFields):
Laptops_Planned__c: conint(ge=0, le=9999999999) = Field(
Expand Down Expand Up @@ -467,7 +520,28 @@ class Project1BRow(BaseProjectRow, AddressFields):
Measurement_of_Effectiveness__c: YesNoType = Field(
..., serialization_alias="Measurement of Effectiveness?", json_schema_extra={"column":"DA"}
)

@field_validator(
"Laptops_Planned__c",
"Laptops_Expenditures_Planned__c",
"Tablets_Planned__c",
"Tablet_Expenditures_Planned__c",
"Desktop_Computers_Planned__c",
"Desktop_Computers_Expenditures_Planned__c",
"Public_WiFi_Planned__c",
"Public_WiFi_Expenditures_Planned__c",
"Other_Devices_Planned__c",
"Other_Expenditures_Planned__c",
"Number_of_Users_Planned__c",
"Brief_Narrative_Planned__c",
"Measurement_of_Effectiveness__c",
)
@classmethod
def validate_field(cls, v: Any, info: ValidationInfo, **kwargs):
if isinstance(v, str) and v.strip == "":
raise ValueError(
f"Value is required for {info.field_name}"
)
return v

class Project1CRow(BaseProjectRow, AddressFields):
Type_of_Investment__c: Optional[str] = Field(
Expand Down Expand Up @@ -524,7 +598,16 @@ class Project1CRow(BaseProjectRow, AddressFields):
Access_to_Public_Transit__c: YesNoType = Field(
..., serialization_alias="Access to Public Transit?", json_schema_extra={"column":"DS"}
)

@field_validator(
"Access_to_Public_Transit__c"
)
@classmethod
def validate_field(cls, v: Any, info: ValidationInfo, **kwargs):
if isinstance(v, str) and v.strip == "":
raise ValueError(
f"Value is required for {info.field_name}"
)
return v

class SubrecipientRow(BaseModel):
model_config = ConfigDict(coerce_numbers_to_str=True, loc_by_alias=False)
Expand Down Expand Up @@ -564,7 +647,25 @@ class SubrecipientRow(BaseModel):
State_Abbreviated__c: StateAbbreviation = Field(
..., serialization_alias="State Abbreviated", json_schema_extra={"column":"O"}
)

@field_validator(
"Name",
"EIN__c",
"Unique_Entity_Identifier__c",
"POC_Name__c",
"POC_Phone_Number__c",
"POC_Email_Address__c",
"Zip__c",
"Address__c",
"City__c",
"State_Abbreviated__c",
)
@classmethod
def validate_field(cls, v: Any, info: ValidationInfo, **kwargs):
if isinstance(v, str) and v.strip == "":
raise ValueError(
f"Value is required for {info.field_name}"
)
return v

class Version(Enum):
V2023_12_12 = "v:20231212"
Expand Down Expand Up @@ -631,9 +732,23 @@ class CoverSheetRow(BaseModel):
project_use_code: str = Field(..., alias="Project Use Code", json_schema_extra={"column":"A"})
project_use_name: str = Field(..., alias="Project Use Name", json_schema_extra={"column":"B"})

@validator("project_use_name")
def validate_code_name_pair(cls, v, values, **kwargs):
project_use_code = values.get("project_use_code")
@field_validator("project_use_code")
@classmethod
def validate_code(cls, v: Any, info: ValidationInfo, **kwargs):
if v is None or v.strip() == "":
raise ValueError(
f"EC code must be set"
)
elif v not in ProjectType:
raise ValueError(
f"EC code '{v}' is not recognized."
)
return v

@field_validator("project_use_name")
@classmethod
def validate_code_name_pair(cls, v: Any, info: ValidationInfo, **kwargs):
project_use_code = info.data.get("project_use_code")
expected_name = NAME_BY_PROJECT.get(project_use_code)

if not expected_name:
Expand Down
18 changes: 18 additions & 0 deletions python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,30 @@ def invalid_cover_sheet(valid_coversheet):
return valid_coversheet


@pytest.fixture
def invalid_cover_sheet_missing_code(valid_coversheet):
valid_coversheet["A2"] = None
return valid_coversheet


@pytest.fixture
def invalid_cover_sheet_empty_code(valid_coversheet):
valid_coversheet["A2"] = " "
return valid_coversheet


@pytest.fixture
def invalid_project_sheet(valid_project_sheet):
valid_project_sheet["D13"] = "X" * 21
return valid_project_sheet


@pytest.fixture
def invalid_project_sheet_missing_field(valid_project_sheet):
valid_project_sheet["D13"] = None
return valid_project_sheet


@pytest.fixture
def invalid_subrecipient_sheet(valid_subrecipientsheet):
valid_subrecipientsheet["D13"] = "INVALID"
Expand Down
Loading
Loading