Skip to content

Commit

Permalink
fix(ingest/pulsar): handle missing/invalid schema objects (#11945)
Browse files Browse the repository at this point in the history
Co-authored-by: Alice <[email protected]>
  • Loading branch information
Alice-608 and Alice authored Dec 4, 2024
1 parent cbae728 commit ca46c02
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion metadata-ingestion/src/datahub/ingestion/source/pulsar.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,17 @@ class PulsarSchema:
def __init__(self, schema):
self.schema_version = schema.get("version")

avro_schema = json.loads(schema.get("data"))
schema_data = schema.get("data")
if not schema_data:
logger.warning("Schema data is empty or None. Using default empty schema.")
schema_data = "{}"

try:
avro_schema = json.loads(schema_data)
except json.JSONDecodeError as e:
logger.error(f"Invalid JSON schema: {schema_data}. Error: {str(e)}")
avro_schema = {}

self.schema_name = avro_schema.get("namespace") + "." + avro_schema.get("name")
self.schema_description = avro_schema.get("doc")
self.schema_type = schema.get("type")
Expand Down

0 comments on commit ca46c02

Please sign in to comment.