diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFields.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFields.scala index 27d97a5a4..ae08cd17d 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFields.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFields.scala @@ -10,6 +10,15 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments +import cats.data.NonEmptyList + +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.iglu.client.ClientError.ValidationError +import com.snowplowanalytics.iglu.client.validator.{ValidatorError, ValidatorReport} + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicFields.LimitedAtomicField import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -17,6 +26,8 @@ final case class AtomicFields(value: List[LimitedAtomicField]) object AtomicFields { + val atomicSchema = SchemaKey("com.snowplowanalytics.snowplow", "atomic", "jsonschema", SchemaVer.Full(1, 0, 0)) + final case class AtomicField( name: String, enrichedValueExtractor: EnrichedEvent => String @@ -121,4 +132,23 @@ object AtomicFields { AtomicFields(withLimits) } + + def atomicErrorsToSchemaViolation(errors: NonEmptyList[AtomicError]): FailureDetails.SchemaViolation = { + val messages = errors.map { error => + ValidatorReport(error.message, Some(error.field), Nil, error.value) + } + val validatorError = ValidatorError.InvalidData(messages) + val clientError = ValidationError(validatorError, None) + + FailureDetails.SchemaViolation.IgluError( + AtomicFields.atomicSchema, + clientError + ) + } } + +case class AtomicError( + field: String, + value: Option[String], + message: String +) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFieldsLengthValidator.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFieldsLengthValidator.scala index 5f2f98582..f120fa77f 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFieldsLengthValidator.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFieldsLengthValidator.scala @@ -14,14 +14,11 @@ import org.slf4j.LoggerFactory import cats.Monad import cats.data.Validated.{Invalid, Valid} -import cats.data.{NonEmptyList, ValidatedNel} +import cats.data.NonEmptyList import cats.implicits._ -import com.snowplowanalytics.snowplow.badrows.FailureDetails.EnrichmentFailure -import com.snowplowanalytics.snowplow.badrows.{BadRow, FailureDetails, Processor} - -import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.badrows.FailureDetails import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicFields.LimitedAtomicField import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -35,66 +32,46 @@ object AtomicFieldsLengthValidator { def validate[F[_]: Monad]( event: EnrichedEvent, - rawEvent: RawEvent, - processor: Processor, acceptInvalid: Boolean, invalidCount: F[Unit], atomicFields: AtomicFields - ): F[Either[BadRow, Unit]] = + ): F[Either[FailureDetails.SchemaViolation, Unit]] = atomicFields.value - .map(validateField(event)) + .map(field => validateField(event, field).toValidatedNel) .combineAll match { case Invalid(errors) if acceptInvalid => - handleAcceptableBadRow(invalidCount, event, errors) *> Monad[F].pure(Right(())) + handleAcceptableErrors(invalidCount, event, errors) *> Monad[F].pure(Right(())) case Invalid(errors) => - Monad[F].pure(buildBadRow(event, rawEvent, processor, errors).asLeft) + Monad[F].pure(AtomicFields.atomicErrorsToSchemaViolation(errors).asLeft) case Valid(()) => Monad[F].pure(Right(())) } private def validateField( - event: EnrichedEvent - )( + event: EnrichedEvent, atomicField: LimitedAtomicField - ): ValidatedNel[String, Unit] = { + ): Either[AtomicError, Unit] = { val actualValue = atomicField.value.enrichedValueExtractor(event) if (actualValue != null && actualValue.length > atomicField.limit) - s"Field ${atomicField.value.name} longer than maximum allowed size ${atomicField.limit}".invalidNel + AtomicError( + atomicField.value.name, + Option(actualValue), + s"Field is longer than maximum allowed size ${atomicField.limit}" + ).asLeft else - Valid(()) + Right(()) } - private def buildBadRow( - event: EnrichedEvent, - rawEvent: RawEvent, - processor: Processor, - errors: NonEmptyList[String] - ): BadRow.EnrichmentFailures = - EnrichmentManager.buildEnrichmentFailuresBadRow( - NonEmptyList( - asEnrichmentFailure("Enriched event does not conform to atomic schema field's length restrictions"), - errors.toList.map(asEnrichmentFailure) - ), - EnrichedEvent.toPartiallyEnrichedEvent(event), - RawEvent.toRawEvent(rawEvent), - processor - ) - - private def handleAcceptableBadRow[F[_]: Monad]( + private def handleAcceptableErrors[F[_]: Monad]( invalidCount: F[Unit], event: EnrichedEvent, - errors: NonEmptyList[String] + errors: NonEmptyList[AtomicError] ): F[Unit] = invalidCount *> Monad[F].pure( logger.debug( - s"Enriched event not valid against atomic schema. Event id: ${event.event_id}. Invalid fields: ${errors.toList.mkString(",")}" + s"Enriched event not valid against atomic schema. Event id: ${event.event_id}. Invalid fields: ${errors.map(_.field).toList.mkString(", ")}" ) ) - private def asEnrichmentFailure(errorMessage: String): EnrichmentFailure = - EnrichmentFailure( - enrichment = None, - FailureDetails.EnrichmentFailureMessage.Simple(errorMessage) - ) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/ClientEnrichments.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/ClientEnrichments.scala index 506b4560a..a714a227c 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/ClientEnrichments.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/ClientEnrichments.scala @@ -14,8 +14,6 @@ import java.lang.{Integer => JInteger} import cats.syntax.either._ -import com.snowplowanalytics.snowplow.badrows._ - /** * Contains enrichments related to the client - where the client is the software which is using the * Snowplow tracker. Enrichments relate to browser resolution. @@ -36,21 +34,16 @@ object ClientEnrichments { * @param res The packed string holding the screen dimensions * @return the ResolutionTuple or an error message, boxed in a Scalaz Validation */ - val extractViewDimensions: (String, String) => Either[FailureDetails.EnrichmentFailure, (JInteger, JInteger)] = + val extractViewDimensions: (String, String) => Either[AtomicError, (JInteger, JInteger)] = (field, res) => (res match { case ResRegex(width, height) => Either .catchNonFatal((width.toInt: JInteger, height.toInt: JInteger)) - .leftMap(_ => "could not be converted to java.lang.Integer s") - case _ => s"does not conform to regex ${ResRegex.toString}".asLeft + .leftMap(_ => "Could not be converted to java.lang.Integer s") + case _ => s"Does not conform to regex ${ResRegex.toString}".asLeft }).leftMap { msg => - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(res), - msg - ) - FailureDetails.EnrichmentFailure(None, f) + AtomicError(field, Option(res), msg) } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala index 1d238fdc2..7ff0b7dd8 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala @@ -42,6 +42,8 @@ import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquer import com.snowplowanalytics.snowplow.enrich.common.enrichments.web.{PageEnrichments => WPE} import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import com.snowplowanalytics.snowplow.enrich.common.utils.{IgluUtils, ConversionUtils => CU} +import _root_.com.snowplowanalytics.iglu.core.SchemaKey +import com.snowplowanalytics.iglu.core.SchemaVer object EnrichmentManager { @@ -68,8 +70,15 @@ object EnrichmentManager { atomicFields: AtomicFields ): EitherT[F, BadRow, EnrichedEvent] = for { - enriched <- EitherT.fromEither[F](setupEnrichedEvent(raw, etlTstamp, processor)) - extractResult <- IgluUtils.extractAndValidateInputJsons(enriched, client, raw, processor, registryLookup) + enriched <- EitherT.rightT[F, BadRow](new EnrichedEvent) + extractResult <- mapAndValidateInput( + raw, + enriched, + etlTstamp, + processor, + client, + registryLookup + ) _ = { ME.formatUnstructEvent(extractResult.unstructEvent).foreach(e => enriched.unstruct_event = e) ME.formatContexts(extractResult.contexts).foreach(c => enriched.contexts = c) @@ -84,19 +93,44 @@ object EnrichmentManager { featureFlags.legacyEnrichmentOrder ) _ = ME.formatContexts(enrichmentsContexts ::: extractResult.validationInfoContexts).foreach(c => enriched.derived_contexts = c) - _ <- IgluUtils - .validateEnrichmentsContexts[F](client, enrichmentsContexts, raw, processor, enriched, registryLookup) - _ <- EitherT.rightT[F, BadRow]( - anonIp(enriched, registry.anonIp).foreach(enriched.user_ipaddress = _) + _ <- validateEnriched( + enriched, + raw, + enrichmentsContexts, + client, + processor, + registryLookup, + featureFlags.acceptInvalid, + invalidCount, + atomicFields ) - _ <- EitherT.rightT[F, BadRow] { - piiTransform(enriched, registry.piiPseudonymizer).foreach { pii => - enriched.pii = pii.asString - } - } - _ <- validateEnriched(enriched, raw, processor, featureFlags.acceptInvalid, invalidCount, atomicFields) } yield enriched + private def mapAndValidateInput[F[_]: Clock: Monad]( + raw: RawEvent, + enrichedEvent: EnrichedEvent, + etlTstamp: DateTime, + processor: Processor, + client: IgluCirceClient[F], + registryLookup: RegistryLookup[F] + ): EitherT[F, BadRow, IgluUtils.EventExtractResult] = + EitherT { + val setup = setupEnrichedEvent(raw, enrichedEvent, etlTstamp, processor).toValidatedNel + IgluUtils + .extractAndValidateInputJsons(enrichedEvent, client, registryLookup) + .map((setup, _).mapN((_, extract) => extract)) + .map { + _.leftMap { violations => + buildSchemaViolationsBadRow( + violations, + EnrichedEvent.toPartiallyEnrichedEvent(enrichedEvent), + RawEvent.toRawEvent(raw), + processor + ) + }.toEither + } + } + /** * Run all the enrichments and aggregate the errors if any * @param enriched /!\ MUTABLE enriched event, mutated IN-PLACE /!\ @@ -112,7 +146,7 @@ object EnrichmentManager { inputContexts: List[SelfDescribingData[Json]], unstructEvent: Option[SelfDescribingData[Json]], legacyOrder: Boolean - ): EitherT[F, BadRow.EnrichmentFailures, List[SelfDescribingData[Json]]] = + ): EitherT[F, BadRow, List[SelfDescribingData[Json]]] = EitherT { accState(registry, raw, inputContexts, unstructEvent, legacyOrder) .runS(Accumulation(enriched, Nil, Nil)) @@ -132,6 +166,31 @@ object EnrichmentManager { } } + private def validateEnriched[F[_]: Clock: Monad]( + enriched: EnrichedEvent, + raw: RawEvent, + enrichmentsContexts: List[SelfDescribingData[Json]], + client: IgluCirceClient[F], + processor: Processor, + registryLookup: RegistryLookup[F], + acceptInvalid: Boolean, + invalidCount: F[Unit], + atomicFields: AtomicFields + ): EitherT[F, BadRow, Unit] = + EitherT { + for { + atomic <- AtomicFieldsLengthValidator.validate[F](enriched, acceptInvalid, invalidCount, atomicFields).map(_.toValidatedNel) + contexts <- IgluUtils.validateEnrichmentsContexts[F](client, enrichmentsContexts, registryLookup) + } yield (atomic |+| contexts).void.leftMap { violations => + buildSchemaViolationsBadRow( + violations, + EnrichedEvent.toPartiallyEnrichedEvent(enriched), + RawEvent.toRawEvent(raw), + processor + ) + }.toEither + } + private[enrichments] case class Accumulation( event: EnrichedEvent, errors: List[FailureDetails.EnrichmentFailure], @@ -217,6 +276,8 @@ object EnrichmentManager { _ <- geoLocation[F](registry.ipLookups) // Execute IP lookup enrichment _ <- sqlContexts // Derive some contexts with custom SQL Query enrichment _ <- apiContexts // Derive some contexts with custom API Request enrichment + _ <- anonIp[F](registry.anonIp) // Anonymize the IP + _ <- piiTransform[F](registry.piiPseudonymizer) // Run PII pseudonymization // format: on } yield () else @@ -243,18 +304,20 @@ object EnrichmentManager { _ <- registry.javascriptScript.traverse(getJsScript[F](_)) // Execute the JavaScript scripting enrichment _ <- sqlContexts // Derive some contexts with custom SQL Query enrichment _ <- apiContexts // Derive some contexts with custom API Request enrichment + _ <- anonIp[F](registry.anonIp) // Anonymize the IP + _ <- piiTransform[F](registry.piiPseudonymizer) // Run PII pseudonymization // format: on } yield () } - /** Create the mutable [[EnrichedEvent]] and initialize it. */ + /** Initialize the mutable [[EnrichedEvent]]. */ private def setupEnrichedEvent( raw: RawEvent, + e: EnrichedEvent, etlTstamp: DateTime, processor: Processor - ): Either[BadRow.EnrichmentFailures, EnrichedEvent] = { - val e = new EnrichedEvent() + ): Either[FailureDetails.SchemaViolation, Unit] = { e.event_id = EE.generateEventId() // May be updated later if we have an `eid` parameter e.v_collector = raw.source.name // May be updated later if we have a `cv` parameter e.v_etl = ME.etlVersion(processor) @@ -272,19 +335,11 @@ object EnrichmentManager { val transformed = Transform.transform(raw, e) (collectorTstamp |+| transformed) - .leftMap { enrichmentFailures => - EnrichmentManager.buildEnrichmentFailuresBadRow( - enrichmentFailures, - EnrichedEvent.toPartiallyEnrichedEvent(e), - RawEvent.toRawEvent(raw), - processor - ) - } - .as(e) + .leftMap(AtomicFields.atomicErrorsToSchemaViolation) .toEither } - def setCollectorTstamp(event: EnrichedEvent, timestamp: Option[DateTime]): Either[FailureDetails.EnrichmentFailure, Unit] = + def setCollectorTstamp(event: EnrichedEvent, timestamp: Option[DateTime]): Either[AtomicError, Unit] = EE.formatCollectorTstamp(timestamp).map { t => event.collector_tstamp = t ().asRight @@ -418,12 +473,21 @@ object EnrichmentManager { result.sequence.bimap(NonEmptyList.one(_), _.toList) } - def anonIp(event: EnrichedEvent, anonIp: Option[AnonIpEnrichment]): Option[String] = - Option(event.user_ipaddress).map { ip => - anonIp match { - case Some(anon) => anon.anonymizeIp(ip) - case None => ip - } + def anonIp[F[_]: Applicative](anonIp: Option[AnonIpEnrichment]): EStateT[F, Unit] = + EStateT.fromEither { + case (event, _) => + anonIp match { + case Some(anon) => + Option(event.user_ipaddress) match { + case Some(ip) => + Option(anon.anonymizeIp(ip)).foreach(event.user_ipaddress = _) + Nil.asRight + case None => + Nil.asRight + } + case None => + Nil.asRight + } } def getUaUtils[F[_]: Applicative](userAgentUtils: Option[UserAgentUtilsEnrichment]): EStateT[F, Unit] = @@ -481,10 +545,14 @@ object EnrichmentManager { event.base_currency = currency.baseCurrency.getCode // Note that jBigDecimalToDouble is applied to either-valid-or-null event POJO // properties, so we don't expect any of these four vals to be a Failure - val trTax = CU.jBigDecimalToDouble("tr_tx", event.tr_tax).toValidatedNel - val tiPrice = CU.jBigDecimalToDouble("ti_pr", event.ti_price).toValidatedNel - val trTotal = CU.jBigDecimalToDouble("tr_tt", event.tr_total).toValidatedNel - val trShipping = CU.jBigDecimalToDouble("tr_sh", event.tr_shipping).toValidatedNel + val enrichmentInfo = FailureDetails.EnrichmentInformation( + SchemaKey("com.snowplowanalytics.snowplow", "currency_conversion_config", "jsonschema", SchemaVer.Full(1, 0, 0)), + "currency_conversion" + ) + val trTax = CU.jBigDecimalToDouble("tr_tx", event.tr_tax, enrichmentInfo).toValidatedNel + val tiPrice = CU.jBigDecimalToDouble("ti_pr", event.ti_price, enrichmentInfo).toValidatedNel + val trTotal = CU.jBigDecimalToDouble("tr_tt", event.tr_total, enrichmentInfo).toValidatedNel + val trShipping = CU.jBigDecimalToDouble("tr_sh", event.tr_shipping, enrichmentInfo).toValidatedNel EitherT( (trTotal, trTax, trShipping, tiPrice) .mapN { @@ -745,11 +813,31 @@ object EnrichmentManager { } } - def piiTransform(event: EnrichedEvent, piiPseudonymizer: Option[PiiPseudonymizerEnrichment]): Option[SelfDescribingData[Json]] = - piiPseudonymizer.flatMap(_.transformer(event)) + def piiTransform[F[_]: Applicative](piiPseudonymizer: Option[PiiPseudonymizerEnrichment]): EStateT[F, Unit] = + EStateT.fromEither { + case (event, _) => + piiPseudonymizer match { + case Some(pseudonymizer) => + pseudonymizer.transformer(event).foreach(p => event.pii = p.asString) + Nil.asRight + case None => + Nil.asRight + } + } - /** Build `BadRow.EnrichmentFailures` from a list of `FailureDetails.EnrichmentFailure`s */ - def buildEnrichmentFailuresBadRow( + private def buildSchemaViolationsBadRow( + vs: NonEmptyList[FailureDetails.SchemaViolation], + pee: Payload.PartiallyEnrichedEvent, + re: Payload.RawEvent, + processor: Processor + ): BadRow.SchemaViolations = + BadRow.SchemaViolations( + processor, + Failure.SchemaViolations(Instant.now(), vs), + Payload.EnrichmentPayload(pee, re) + ) + + private def buildEnrichmentFailuresBadRow( fs: NonEmptyList[FailureDetails.EnrichmentFailure], pee: Payload.PartiallyEnrichedEvent, re: Payload.RawEvent, @@ -761,21 +849,4 @@ object EnrichmentManager { Payload.EnrichmentPayload(pee, re) ) - /** - * Validates enriched events against atomic schema. - * For now it's possible to accept enriched events that are not valid. - * See https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 - */ - private def validateEnriched[F[_]: Monad]( - enriched: EnrichedEvent, - raw: RawEvent, - processor: Processor, - acceptInvalid: Boolean, - invalidCount: F[Unit], - atomicFields: AtomicFields - ): EitherT[F, BadRow, Unit] = - EitherT { - //We're using static field's length validation. See more in https://github.com/snowplow/enrich/issues/608 - AtomicFieldsLengthValidator.validate[F](enriched, raw, processor, acceptInvalid, invalidCount, atomicFields) - } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EventEnrichments.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EventEnrichments.scala index eaf10ae62..48d0639f0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EventEnrichments.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EventEnrichments.scala @@ -47,22 +47,17 @@ object EventEnrichments { * @param Optional collectorTstamp * @return Validation boxing the result of making the timestamp Redshift-compatible */ - def formatCollectorTstamp(collectorTstamp: Option[DateTime]): Either[FailureDetails.EnrichmentFailure, String] = - (collectorTstamp match { - case None => - FailureDetails.EnrichmentFailureMessage - .InputData("collector_tstamp", None, "should be set") - .asLeft + def formatCollectorTstamp(collectorTstamp: Option[DateTime]): Either[AtomicError, String] = + collectorTstamp match { + case None => AtomicError("collector_tstamp", None, "Field not set").asLeft case Some(t) => val formattedTimestamp = toTimestamp(t) if (formattedTimestamp.startsWith("-") || t.getYear > 9999 || t.getYear < 0) { - val msg = s"formatted as $formattedTimestamp is not Redshift-compatible" - FailureDetails.EnrichmentFailureMessage - .InputData("collector_tstamp", t.toString.some, msg) - .asLeft + val msg = s"Formatted as $formattedTimestamp is not Redshift-compatible" + AtomicError("collector_tstamp", t.toString.some, msg).asLeft } else formattedTimestamp.asRight - }).leftMap(FailureDetails.EnrichmentFailure(None, _)) + } /** * Calculate the derived timestamp @@ -103,7 +98,7 @@ object EventEnrichments { .EnrichmentFailure( None, FailureDetails.EnrichmentFailureMessage.Simple( - s"exception calculating derived timestamp: ${e.getMessage}" + s"Exception calculating derived timestamp: ${e.getMessage}" ) ) .asLeft @@ -116,30 +111,26 @@ object EventEnrichments { * @param tstamp The timestamp as stored in the Tracker Protocol * @return a Tuple of two Strings (date and time), or an error message if the format was invalid */ - val extractTimestamp: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractTimestamp: (String, String) => Either[AtomicError, String] = (field, tstamp) => try { val dt = new DateTime(tstamp.toLong) val timestampString = toTimestamp(dt) - if (timestampString.startsWith("-") || dt.getYear > 9999 || dt.getYear < 0) { - val msg = s"formatting as $timestampString is not Redshift-compatible" - val f = FailureDetails.EnrichmentFailureMessage.InputData( + if (timestampString.startsWith("-") || dt.getYear > 9999 || dt.getYear < 0) + AtomicError( field, Option(tstamp), - msg - ) - FailureDetails.EnrichmentFailure(None, f).asLeft - } else + s"Formatting as $timestampString is not Redshift-compatible" + ).asLeft + else timestampString.asRight } catch { case _: NumberFormatException => - val msg = "not in the expected format: ms since epoch" - val f = FailureDetails.EnrichmentFailureMessage.InputData( + AtomicError( field, Option(tstamp), - msg - ) - FailureDetails.EnrichmentFailure(None, f).asLeft + "Not in the expected format: ms since epoch" + ).asLeft } /** @@ -149,7 +140,7 @@ object EventEnrichments { * @param eventCode The event code * @return the event type, or an error message if not recognised, boxed in a Scalaz Validation */ - val extractEventType: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractEventType: (String, String) => Either[AtomicError, String] = (field, code) => code match { case "se" => "struct".asRight @@ -161,13 +152,8 @@ object EventEnrichments { case "pv" => "page_view".asRight case "pp" => "page_ping".asRight case _ => - val msg = "not recognized as an event type" - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(code), - msg - ) - FailureDetails.EnrichmentFailure(None, f).asLeft + val msg = "Not a valid event type" + AtomicError(field, Option(code), msg).asLeft } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/MiscEnrichments.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/MiscEnrichments.scala index aaa476eb8..727795e37 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/MiscEnrichments.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/MiscEnrichments.scala @@ -14,7 +14,7 @@ import cats.syntax.either._ import io.circe._ -import com.snowplowanalytics.snowplow.badrows.{FailureDetails, Processor} +import com.snowplowanalytics.snowplow.badrows.Processor import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.implicits._ @@ -44,7 +44,7 @@ object MiscEnrichments { * @param platform The code for the platform generating this event. * @return a Scalaz ValidatedString. */ - val extractPlatform: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractPlatform: (String, String) => Either[AtomicError, String] = (field, platform) => platform match { case "web" => "web".asRight // Web, including Mobile Web @@ -57,17 +57,12 @@ object MiscEnrichments { case "srv" => "srv".asRight // Server-side App case "headset" => "headset".asRight // AR/VR Headset case _ => - val msg = "not recognized as a tracking platform" - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(platform), - msg - ) - FailureDetails.EnrichmentFailure(None, f).asLeft + val msg = "Not a valid platform" + AtomicError(field, Option(platform), msg).asLeft } /** Make a String TSV safe */ - val toTsvSafe: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val toTsvSafe: (String, String) => Either[AtomicError, String] = (_, value) => CU.makeTsvSafe(value).asRight /** @@ -76,7 +71,7 @@ object MiscEnrichments { * Here we retrieve the first one as it is supposed to be the client one, c.f. * https://en.m.wikipedia.org/wiki/X-Forwarded-For#Format */ - val extractIp: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractIp: (String, String) => Either[AtomicError, String] = (_, value) => { val lastIp = Option(value).map(_.split("[,|, ]").head).orNull CU.makeTsvSafe(lastIp).asRight diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala index 00474b069..91929a28c 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala @@ -13,8 +13,6 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments import cats.implicits._ import cats.data.ValidatedNel -import com.snowplowanalytics.snowplow.badrows._ - import com.snowplowanalytics.snowplow.enrich.common.enrichments.{EventEnrichments => EE} import com.snowplowanalytics.snowplow.enrich.common.enrichments.{MiscEnrichments => ME} import com.snowplowanalytics.snowplow.enrich.common.enrichments.{ClientEnrichments => CE} @@ -31,7 +29,7 @@ object Transform { * to "user_ipaddress" in the enriched event * @param enriched /!\ MUTABLE enriched event, mutated IN-PLACE /!\ */ - private[enrichments] def transform(raw: RawEvent, enriched: EnrichedEvent): ValidatedNel[FailureDetails.EnrichmentFailure, Unit] = { + private[enrichments] def transform(raw: RawEvent, enriched: EnrichedEvent): ValidatedNel[AtomicError, Unit] = { val sourceMap: SourceMap = raw.parameters.collect { case (k, Some(v)) => (k, v) } val firstPassTransform = enriched.transform(sourceMap, firstPassTransformMap) val secondPassTransform = enriched.transform(sourceMap, secondPassTransformMap) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CrossNavigationEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CrossNavigationEnrichment.scala index 443075822..fc598fdd0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CrossNavigationEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CrossNavigationEnrichment.scala @@ -192,7 +192,17 @@ object CrossNavigationEnrichment extends ParseableEnrichment { private def extractTstamp(str: String): Either[FailureDetails.EnrichmentFailure, Option[String]] = str match { case "" => None.asRight - case s => EE.extractTimestamp("sp_dtm", s).map(_.some) + case s => + EE.extractTimestamp("sp_dtm", s) + .leftMap { error => + val f = FailureDetails.EnrichmentFailureMessage.InputData( + error.field, + error.value, + error.message + ) + FailureDetails.EnrichmentFailure(None, f) + } + .map(_.some) } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala index 39866edcb..65bc63d4a 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala @@ -45,7 +45,7 @@ import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData import com.snowplowanalytics.snowplow.badrows.{FailureDetails, Processor} import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.enrichments.MiscEnrichments +import com.snowplowanalytics.snowplow.enrich.common.enrichments.{AtomicError, MiscEnrichments} import com.snowplowanalytics.snowplow.enrich.common.QueryStringParameters /** General-purpose utils to help the ETL process along. */ @@ -138,7 +138,7 @@ object ConversionUtils { val result = new String(decodedBytes, UTF_8) // Must specify charset (EMR uses US_ASCII) result } - .leftMap(e => s"could not base64 decode: ${e.getMessage}") + .leftMap(e => s"Could not base64 decode: ${e.getMessage}") /** * Encodes a URL-safe Base64 string. @@ -158,19 +158,13 @@ object ConversionUtils { * @param str The String hopefully containing a UUID * @return either the original String, or an error String */ - val validateUuid: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val validateUuid: (String, String) => Either[AtomicError, String] = (field, str) => { def check(s: String)(u: UUID): Boolean = u != null && s.toLowerCase == u.toString val uuid = Try(UUID.fromString(str)).toOption.filter(check(str)) uuid match { case Some(_) => str.toLowerCase.asRight - case None => - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - "not a valid UUID" - ) - FailureDetails.EnrichmentFailure(None, f).asLeft + case None => AtomicError(field, Option(str), "Not a valid UUID").asLeft } } @@ -179,17 +173,12 @@ object ConversionUtils { * @param str The String hopefully parseable as an integer * @return either the original String, or an error String */ - val validateInteger: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val validateInteger: (String, String) => Either[AtomicError, String] = (field, str) => { Either .catchNonFatal { str.toInt; str } .leftMap { _ => - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - "not a valid integer" - ) - FailureDetails.EnrichmentFailure(None, f) + AtomicError(field, Option(str), "Not a valid integer") } } @@ -214,7 +203,7 @@ object ConversionUtils { val d = URLDecoder.decode(s, enc.toString) d.replaceAll("(\\r|\\n)", "").replaceAll("\\t", " ").asRight } catch { - case NonFatal(e) => s"exception URL-decoding (encoding $enc): ${e.getMessage}".asLeft + case NonFatal(e) => s"Exception URL-decoding (encoding $enc): ${e.getMessage}".asLeft } /** @@ -304,7 +293,7 @@ object ConversionUtils { } match { case util.Success(params) => params.toList.asRight case util.Failure(e) => - val msg = s"could not parse uri, expection was thrown: [$e]." + val msg = s"Could not parse uri. Error: [$e]." val f = FailureDetails.EnrichmentFailureMessage.InputData( "uri", Option(uri).map(_.toString()), @@ -330,19 +319,14 @@ object ConversionUtils { jint.asRight } catch { case _: NumberFormatException => - "cannot be converted to java.lang.Integer".asLeft + "Cannot be converted to java.lang.Integer".asLeft } } - val stringToJInteger2: (String, String) => Either[FailureDetails.EnrichmentFailure, JInteger] = + val stringToJInteger2: (String, String) => Either[AtomicError, JInteger] = (field, str) => stringToJInteger(str).leftMap { e => - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - e - ) - FailureDetails.EnrichmentFailure(None, f) + AtomicError(field, Option(str), e) } val stringToJBigDecimal: String => Either[String, JBigDecimal] = str => @@ -361,18 +345,13 @@ object ConversionUtils { else Right(bd) } - .leftMap(e => s"cannot be converted to java.math.BigDecimal. Error : ${e.getMessage}") + .leftMap(e => s"Cannot be converted to java.math.BigDecimal. Error : ${e.getMessage}") } - val stringToJBigDecimal2: (String, String) => Either[FailureDetails.EnrichmentFailure, JBigDecimal] = + val stringToJBigDecimal2: (String, String) => Either[AtomicError, JBigDecimal] = (field, str) => stringToJBigDecimal(str).leftMap { e => - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - e - ) - FailureDetails.EnrichmentFailure(None, f) + AtomicError(field, Option(str), e) } /** @@ -384,7 +363,7 @@ object ConversionUtils { * @param field The name of the field we are validating. To use in our error message * @return either a failure or a String */ - val stringToDoubleLike: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val stringToDoubleLike: (String, String) => Either[AtomicError, String] = (field, str) => Either .catchNonFatal { @@ -397,12 +376,8 @@ object ConversionUtils { } } .leftMap { _ => - val msg = "cannot be converted to Double-like" - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage - .InputData(field, Option(str), msg) - ) + val msg = "Cannot be converted to Double-like" + AtomicError(field, Option(str), msg) } /** @@ -411,7 +386,7 @@ object ConversionUtils { * @param field The name of the field we are validating. To use in our error message * @return a Scalaz Validation, being either a Failure String or a Success Double */ - def stringToMaybeDouble(field: String, str: String): Either[FailureDetails.EnrichmentFailure, Option[Double]] = + def stringToMaybeDouble(field: String, str: String): Either[AtomicError, Option[Double]] = Either .catchNonFatal { if (Option(str).isEmpty || str == "null") @@ -422,50 +397,41 @@ object ConversionUtils { jbigdec.doubleValue().some } } - .leftMap(_ => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - "cannot be converted to Double" - ) - ) - ) + .leftMap(_ => AtomicError(field, Option(str), "Cannot be converted to Double")) - /** Convert a java BigDecimal a Double */ - def jBigDecimalToDouble(field: String, f: JBigDecimal): Either[FailureDetails.EnrichmentFailure, Option[Double]] = + /** Convert a java BigDecimal to a Double */ + def jBigDecimalToDouble(field: String, f: JBigDecimal): Either[AtomicError, Option[Double]] = Either .catchNonFatal { Option(f).map(_.doubleValue) } - .leftMap(_ => + .leftMap(_ => AtomicError(field, Option(f).map(_.toString), "Cannot be converted to Double")) + + /** Convert a java BigDecimal to a Double */ + def jBigDecimalToDouble( + field: String, + f: JBigDecimal, + enrichmentInfo: FailureDetails.EnrichmentInformation + ): Either[FailureDetails.EnrichmentFailure, Option[Double]] = + jBigDecimalToDouble(field, f) + .leftMap { error => FailureDetails.EnrichmentFailure( - None, + Some(enrichmentInfo), FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(f).map(_.toString), - "cannot be converted to Double" + error.field, + error.value, + error.message ) ) - ) + } /** Convert a Double to a java BigDecimal */ - def doubleToJBigDecimal(field: String, d: Option[Double]): Either[FailureDetails.EnrichmentFailure, Option[JBigDecimal]] = + def doubleToJBigDecimal(field: String, d: Option[Double]): Either[AtomicError, Option[JBigDecimal]] = Either .catchNonFatal { d.map(dd => new JBigDecimal(dd)) } - .leftMap(_ => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - field, - d.map(_.toString), - "cannot be converted to java BigDecimal" - ) - ) - ) + .leftMap(_ => AtomicError(field, d.map(_.toString), "Cannot be converted to java BigDecimal")) /** * Converts a String to a Double with two decimal places. Used to honor schemas with @@ -475,7 +441,7 @@ object ConversionUtils { val stringToTwoDecimals: String => Either[String, Double] = str => try BigDecimal(str).setScale(2, BigDecimal.RoundingMode.HALF_EVEN).toDouble.asRight catch { - case _: NumberFormatException => "cannot be converted to Double".asLeft + case _: NumberFormatException => "Cannot be converted to Double".asLeft } /** @@ -485,7 +451,7 @@ object ConversionUtils { val stringToDouble: String => Either[String, Double] = str => Either .catchNonFatal(BigDecimal(str).toDouble) - .leftMap(_ => s"cannot be converted to Double") + .leftMap(_ => s"Cannot be converted to Double") /** * Extract a Java Byte representing 1 or 0 only from a String, or error. @@ -493,19 +459,14 @@ object ConversionUtils { * @param field The name of the field we are trying to process. To use in our error message * @return either a Failure String or a Success Byte */ - val stringToBooleanLikeJByte: (String, String) => Either[FailureDetails.EnrichmentFailure, JByte] = + val stringToBooleanLikeJByte: (String, String) => Either[AtomicError, JByte] = (field, str) => str match { case "1" => (1.toByte: JByte).asRight case "0" => (0.toByte: JByte).asRight case _ => - val msg = "cannot be converted to Boolean-like java.lang.Byte" - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - msg - ) - FailureDetails.EnrichmentFailure(None, f).asLeft + val msg = "Cannot be converted to Boolean-like java.lang.Byte" + AtomicError(field, Option(str), msg).asLeft } /** @@ -520,7 +481,7 @@ object ConversionUtils { else if (str == "0") false.asRight else - s"cannot be converted to boolean, only 1 or 0 are supported".asLeft + s"Cannot be converted to boolean, only 1 or 0 are supported".asLeft /** * Truncates a String - useful for making sure Strings can't overflow a database field. diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala index d18936818..791174120 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala @@ -19,8 +19,6 @@ import io.circe._ import io.circe.syntax._ import io.circe.generic.semiauto._ -import java.time.Instant - import com.snowplowanalytics.iglu.client.{ClientError, IgluCirceClient} import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup @@ -30,8 +28,6 @@ import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.snowplow.badrows._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentManager -import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent /** * Contain the functions to validate: @@ -56,36 +52,19 @@ object IgluUtils { def extractAndValidateInputJsons[F[_]: Monad: Clock]( enriched: EnrichedEvent, client: IgluCirceClient[F], - raw: RawEvent, - processor: Processor, registryLookup: RegistryLookup[F] - ): EitherT[ - F, - BadRow.SchemaViolations, - EventExtractResult - ] = - EitherT { - for { - contexts <- IgluUtils.extractAndValidateInputContexts(enriched, client, registryLookup) - unstruct <- IgluUtils - .extractAndValidateUnstructEvent(enriched, client, registryLookup) - .map(_.toValidatedNel) - } yield (contexts, unstruct) - .mapN { (c, ue) => - val validationInfoContexts = (c.flatMap(_.validationInfo) ::: ue.flatMap(_.validationInfo).toList).distinct - .map(_.toSdj) - EventExtractResult(contexts = c.map(_.sdj), unstructEvent = ue.map(_.sdj), validationInfoContexts = validationInfoContexts) - } - .leftMap { schemaViolations => - buildSchemaViolationsBadRow( - schemaViolations, - EnrichedEvent.toPartiallyEnrichedEvent(enriched), - RawEvent.toRawEvent(raw), - processor - ) - } - .toEither - } + ): F[ValidatedNel[FailureDetails.SchemaViolation, EventExtractResult]] = + for { + contexts <- IgluUtils.extractAndValidateInputContexts(enriched, client, registryLookup) + unstruct <- IgluUtils + .extractAndValidateUnstructEvent(enriched, client, registryLookup) + .map(_.toValidatedNel) + } yield (contexts, unstruct) + .mapN { (c, ue) => + val validationInfoContexts = (c.flatMap(_.validationInfo) ::: ue.flatMap(_.validationInfo).toList).distinct + .map(_.toSdj) + EventExtractResult(contexts = c.map(_.sdj), unstructEvent = ue.map(_.sdj), validationInfoContexts = validationInfoContexts) + } /** * Extract unstructured event from event and validate against its schema @@ -109,7 +88,7 @@ object IgluUtils { // Validate input Json string and extract unstructured event unstruct <- extractInputData(rawUnstructEvent, field, criterion, client, registryLookup) // Parse Json unstructured event as SelfDescribingData[Json] - unstructSDJ <- parseAndValidateSDJ_sv(unstruct, client, registryLookup) + unstructSDJ <- parseAndValidateSDJ(unstruct, client, registryLookup) } yield unstructSDJ.some case None => EitherT.rightT[F, FailureDetails.SchemaViolation](none[SdjExtractResult]) @@ -141,7 +120,7 @@ object IgluUtils { // Parse and validate each SDJ and merge the errors contextsSDJ <- EitherT( contexts - .map(parseAndValidateSDJ_sv(_, client, registryLookup).toValidatedNel) + .map(parseAndValidateSDJ(_, client, registryLookup).toValidatedNel) .sequence .map(_.sequence.toEither) ) @@ -164,31 +143,17 @@ object IgluUtils { private[common] def validateEnrichmentsContexts[F[_]: Monad: Clock]( client: IgluCirceClient[F], sdjs: List[SelfDescribingData[Json]], - raw: RawEvent, - processor: Processor, - enriched: EnrichedEvent, registryLookup: RegistryLookup[F] - ): EitherT[F, BadRow.EnrichmentFailures, Unit] = + ): F[ValidatedNel[FailureDetails.SchemaViolation, Unit]] = checkList(client, sdjs, registryLookup) .leftMap( _.map { case (schemaKey, clientError) => - val enrichmentInfo = - FailureDetails.EnrichmentInformation(schemaKey, "enrichments-contexts-validation") - FailureDetails.EnrichmentFailure( - enrichmentInfo.some, - FailureDetails.EnrichmentFailureMessage.IgluError(schemaKey, clientError) - ) + val f: FailureDetails.SchemaViolation = FailureDetails.SchemaViolation.IgluError(schemaKey, clientError) + f } ) - .leftMap { enrichmentFailures => - EnrichmentManager.buildEnrichmentFailuresBadRow( - enrichmentFailures, - EnrichedEvent.toPartiallyEnrichedEvent(enriched), - RawEvent.toRawEvent(raw), - processor - ) - } + .toValidated /** Used to extract .data for input custom contexts and input unstructured event */ private def extractInputData[F[_]: Monad: Clock]( @@ -257,7 +222,7 @@ object IgluUtils { } /** Parse a Json as a SDJ and check that it's valid */ - private def parseAndValidateSDJ_sv[F[_]: Monad: Clock]( // _sv for SchemaViolation + private def parseAndValidateSDJ[F[_]: Monad: Clock]( json: Json, client: IgluCirceClient[F], registryLookup: RegistryLookup[F] @@ -309,17 +274,4 @@ object IgluUtils { unstructEvent: Option[SelfDescribingData[Json]], validationInfoContexts: List[SelfDescribingData[Json]] ) - - /** Build `BadRow.SchemaViolations` from a list of `FailureDetails.SchemaViolation`s */ - def buildSchemaViolationsBadRow( - vs: NonEmptyList[FailureDetails.SchemaViolation], - pee: Payload.PartiallyEnrichedEvent, - re: Payload.RawEvent, - processor: Processor - ): BadRow.SchemaViolations = - BadRow.SchemaViolations( - processor, - Failure.SchemaViolations(Instant.now(), vs), - Payload.EnrichmentPayload(pee, re) - ) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala index 680286044..d487514c0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala @@ -20,7 +20,7 @@ import io.circe.Json import org.joda.time.{DateTime, DateTimeZone} import org.joda.time.format.{DateTimeFormat, DateTimeFormatter} -import com.snowplowanalytics.snowplow.badrows.FailureDetails +import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicError /** Contains general purpose extractors and other utilities for JSONs. Jackson-based. */ object JsonUtils { @@ -32,29 +32,21 @@ object JsonUtils { DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").withZone(DateTimeZone.UTC) /** Validates a String as correct JSON. */ - val extractUnencJson: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractUnencJson: (String, String) => Either[AtomicError, String] = (field, str) => validateAndReformatJson(str) .leftMap { e => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage - .InputData(field, Option(str), e) - ) + AtomicError(field, Option(str), e) } /** Decodes a Base64 (URL safe)-encoded String then validates it as correct JSON. */ - val extractBase64EncJson: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractBase64EncJson: (String, String) => Either[AtomicError, String] = (field, str) => ConversionUtils .decodeBase64Url(str) .flatMap(validateAndReformatJson) .leftMap { e => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage - .InputData(field, Option(str), e) - ) + AtomicError(field, Option(str), e) } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/MapTransformer.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/MapTransformer.scala index 6bc76be1c..448533d1c 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/MapTransformer.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/MapTransformer.scala @@ -17,7 +17,7 @@ import cats.instances.int._ import cats.syntax.either._ import cats.syntax.validated._ -import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicError /** * The problem we're trying to solve: converting maps to classes in Scala @@ -66,7 +66,7 @@ object MapTransformer { type Field = String // A transformation takes a Key and Value and returns either a failure or anything - type TransformFunc = Function2[Key, Value, Either[FailureDetails.EnrichmentFailure, _]] + type TransformFunc = Function2[Key, Value, Either[AtomicError, _]] // Our source map type SourceMap = Map[Key, Value] @@ -88,7 +88,7 @@ object MapTransformer { transformMap: TransformMap )( implicit m: Manifest[T] - ): ValidatedNel[FailureDetails.EnrichmentFailure, T] = { + ): ValidatedNel[AtomicError, T] = { val newInst = m.runtimeClass.getDeclaredConstructor().newInstance() val result = _transform(newInst, sourceMap, transformMap, getSetters(m.runtimeClass)) // On success, replace the field count with the new instance @@ -116,7 +116,7 @@ object MapTransformer { * @param transformMap Determines how the data should be transformed before storing in the obj * @return a ValidationNel containing a Nel of error Strings, or the count of updated fields */ - def transform(sourceMap: SourceMap, transformMap: TransformMap): ValidatedNel[FailureDetails.EnrichmentFailure, Int] = + def transform(sourceMap: SourceMap, transformMap: TransformMap): ValidatedNel[AtomicError, Int] = _transform[T](obj, sourceMap, transformMap, setters) } @@ -134,8 +134,8 @@ object MapTransformer { sourceMap: SourceMap, transformMap: TransformMap, setters: SettersMap - ): ValidatedNel[FailureDetails.EnrichmentFailure, Int] = { - val results: List[Either[FailureDetails.EnrichmentFailure, Int]] = sourceMap.map { + ): ValidatedNel[AtomicError, Int] = { + val results: List[Either[AtomicError, Int]] = sourceMap.map { case (key, in) => transformMap.get(key) match { case Some((func, field)) => @@ -172,7 +172,7 @@ object MapTransformer { } }.toList - results.foldLeft(0.validNel[FailureDetails.EnrichmentFailure]) { + results.foldLeft(0.validNel[AtomicError]) { case (acc, e) => acc.combine(e.toValidatedNel) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index d598ca932..a48a48a92 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -29,7 +29,6 @@ import io.circe.syntax._ import org.joda.time.DateTime import com.snowplowanalytics.snowplow.badrows._ -import com.snowplowanalytics.snowplow.badrows.FailureDetails.EnrichmentFailureMessage import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} @@ -137,6 +136,62 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE }) } + "return a SchemaViolations bad row that contains 1 ValidationError for the atomic field and 1 ValidationError for the unstruct event" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "tr_tt" -> "not number", + "ue_pr" -> + """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "unallowedAdditionalField": "foo@bar.org" + } + } + }""" + ).toOpt + val rawEvent = RawEvent(api, parameters, None, source, context) + EnrichmentManager + .enrichEvent[IO]( + enrichmentReg, + client, + processor, + timestamp, + rawEvent, + AcceptInvalid.featureFlags, + IO.unit, + SpecHelpers.registryLookup, + atomicFieldLimits + ) + .value + .map(_ must beLeft.like { + case BadRow.SchemaViolations( + _, + Failure.SchemaViolations(_, + NonEmptyList(FailureDetails.SchemaViolation.IgluError(schemaKey1, clientError1), + List(FailureDetails.SchemaViolation.IgluError(schemaKey2, clientError2)) + ) + ), + _ + ) => + schemaKey1 must beEqualTo(AtomicFields.atomicSchema) + clientError1.toString must contain("tr_tt") + clientError1.toString must contain("Cannot be converted to java.math.BigDecimal") + schemaKey2 must beEqualTo(emailSentSchema) + clientError2.toString must contain( + "unallowedAdditionalField: is not defined in the schema and the schema does not allow additional properties" + ) + case other => + ko(s"[$other] is not a SchemaViolations bad row with 2 IgluError") + }) + } + "return an EnrichmentFailures bad row if one of the enrichment (JS enrichment here) fails" >> { val script = """ @@ -203,7 +258,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE }) } - "return an EnrichmentFailures bad row containing one IgluError if one of the contexts added by the enrichments is invalid" >> { + "return a SchemaViolations bad row containing one IgluError if one of the contexts added by the enrichments is invalid" >> { val script = """ function process(event) { @@ -251,22 +306,19 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE ) enriched.value .map(_ must beLeft.like { - case BadRow.EnrichmentFailures( + case BadRow.SchemaViolations( _, - Failure.EnrichmentFailures( + Failure.SchemaViolations( _, NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - _: FailureDetails.EnrichmentFailureMessage.IgluError - ), + _: FailureDetails.SchemaViolation.IgluError, Nil ) ), payload ) if payload.enriched.derived_contexts.isDefined => ok - case br => ko(s"bad row [$br] is not an EnrichmentFailures containing one IgluError and with derived_contexts defined") + case br => ko(s"[$br] is not a SchemaViolations bad row containing one IgluError and with derived_contexts defined") }) } @@ -1380,7 +1432,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE FailureDetails.EnrichmentFailureMessage.InputData( "sp_dtm", "some_invalid_timestamp_value".some, - "not in the expected format: ms since epoch" + "Not in the expected format: ms since epoch" ) ) val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) @@ -1414,7 +1466,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE FailureDetails.EnrichmentFailureMessage.InputData( "sp_dtm", "some_invalid_timestamp_value".some, - "not in the expected format: ms since epoch" + "Not in the expected format: ms since epoch" ) ) val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) @@ -1577,7 +1629,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE } "validateEnriched" should { - "create a bad row if a field is oversized" >> { + "create a SchemaViolations bad row if an atomic field is oversized" >> { val result = EnrichmentManager .enrichEvent[IO]( enrichmentReg, @@ -1590,23 +1642,23 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE SpecHelpers.registryLookup, atomicFieldLimits ) - .value - result.map(_ must beLeft.like { - case badRow: BadRow.EnrichmentFailures => - val firstError = badRow.failure.messages.head.message - val secondError = badRow.failure.messages.last.message - - firstError must beEqualTo( - EnrichmentFailureMessage.Simple("Enriched event does not conform to atomic schema field's length restrictions") - ) - secondError must beEqualTo(EnrichmentFailureMessage.Simple("Field v_tracker longer than maximum allowed size 100")) - case br => - ko(s"bad row [$br] is not BadRow.EnrichmentFailures") - }) + result.value + .map(_ must beLeft.like { + case BadRow.SchemaViolations( + _, + Failure.SchemaViolations(_, NonEmptyList(FailureDetails.SchemaViolation.IgluError(schemaKey, clientError), Nil)), + _ + ) => + schemaKey must beEqualTo(AtomicFields.atomicSchema) + clientError.toString must contain("v_tracker") + clientError.toString must contain("Field is longer than maximum allowed size") + case other => + ko(s"[$other] is not a SchemaViolations bad row with one IgluError") + }) } - "not create a bad row if a field is oversized and acceptInvalid is set to true" >> { + "not create a bad row if an atomic field is oversized and acceptInvalid is set to true" >> { val result = EnrichmentManager .enrichEvent[IO]( enrichmentReg, @@ -1623,6 +1675,69 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE result.map(_ must beRight[EnrichedEvent]) } + + "return a SchemaViolations bad row containing both the atomic field length error and the invalid enrichment context error" >> { + val script = + """ + function process(event) { + return [ { schema: "iglu:com.acme/email_sent/jsonschema/1-0-0", + data: { + emailAddress: "hello@world.com", + foo: "bar" + } + } ]; + }""" + + val config = + json"""{ + "parameters": { + "script": ${ConversionUtils.encodeBase64Url(script)} + } + }""" + val schemaKey = SchemaKey( + "com.snowplowanalytics.snowplow", + "javascript_script_config", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ) + val jsEnrichConf = + JavascriptScriptEnrichment.parse(config, schemaKey).toOption.get + val jsEnrich = JavascriptScriptEnrichment(jsEnrichConf.schemaKey, jsEnrichConf.rawFunction) + val enrichmentReg = EnrichmentRegistry[IO](javascriptScript = List(jsEnrich)) + + val rawEvent = RawEvent(api, fatBody, None, source, context) + EnrichmentManager + .enrichEvent[IO]( + enrichmentReg, + client, + processor, + timestamp, + rawEvent, + AcceptInvalid.featureFlags, + IO.unit, + SpecHelpers.registryLookup, + atomicFieldLimits + ) + .value + .map(_ must beLeft.like { + case BadRow.SchemaViolations( + _, + Failure.SchemaViolations(_, + NonEmptyList(FailureDetails.SchemaViolation.IgluError(schemaKey1, clientError1), + List(FailureDetails.SchemaViolation.IgluError(schemaKey2, clientError2)) + ) + ), + _ + ) => + schemaKey1 must beEqualTo(AtomicFields.atomicSchema) + clientError1.toString must contain("v_tracker") + clientError1.toString must contain("Field is longer than maximum allowed size") + schemaKey2 must beEqualTo(emailSentSchema) + clientError2.toString must contain("emailAddress2: is missing but it is required") + case other => + ko(s"[$other] is not a SchemaViolations bad row with 2 IgluError") + }) + } } } @@ -1691,4 +1806,11 @@ object EnrichmentManagerSpec { .getOrElse(throw new RuntimeException("IAB enrichment couldn't be initialised")) // to make sure it's not none .enrichment[IO] + val emailSentSchema = + SchemaKey( + "com.acme", + "email_sent", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/clientEnrichmentSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/clientEnrichmentSpecs.scala index 91c8db02d..2be734174 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/clientEnrichmentSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/clientEnrichmentSpecs.scala @@ -14,31 +14,14 @@ import cats.syntax.either._ import org.specs2.Specification import org.specs2.matcher.DataTables -import com.snowplowanalytics.snowplow.badrows._ - class ExtractViewDimensionsSpec extends Specification with DataTables { val FieldName = "res" - def err: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - """does not conform to regex (\d+)x(\d+)""" - ) - ) - def err2: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - "could not be converted to java.lang.Integer s" - ) - ) + def err: String => AtomicError = + input => AtomicError(FieldName, Option(input), """Does not conform to regex (\d+)x(\d+)""") + + def err2: String => AtomicError = + input => AtomicError(FieldName, Option(input), "Could not be converted to java.lang.Integer s") def is = s2""" Extracting screen dimensions (viewports, screen resolution etc) with extractViewDimensions should work $e1""" diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/eventEnrichmentSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/eventEnrichmentSpecs.scala index 92f0448b8..cb28a7b05 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/eventEnrichmentSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/eventEnrichmentSpecs.scala @@ -16,8 +16,6 @@ import org.joda.time.DateTimeZone import org.specs2.Specification import org.specs2.matcher.DataTables -import com.snowplowanalytics.snowplow.badrows._ - class ExtractEventTypeSpec extends Specification with DataTables { def is = s2""" extractEventType should return the event name for any valid event code $e1 @@ -27,16 +25,8 @@ class ExtractEventTypeSpec extends Specification with DataTables { """ val FieldName = "e" - def err: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - "not recognized as an event type" - ) - ) + def err: String => AtomicError = + input => AtomicError(FieldName, Option(input), "Not a valid event type") def e1 = "SPEC NAME" || "INPUT VAL" | "EXPECTED OUTPUT" | @@ -66,9 +56,9 @@ class ExtractEventTypeSpec extends Specification with DataTables { def e3 = // format: off "SPEC NAME" || "INPUT VAL" | "EXPECTED OUTPUT" | - "None" !! None ! FailureDetails.EnrichmentFailure(None, FailureDetails.EnrichmentFailureMessage.InputData("collector_tstamp", None, "should be set")).asLeft | - "Negative timestamp" !! BCTstamp ! FailureDetails.EnrichmentFailure(None, FailureDetails.EnrichmentFailureMessage.InputData("collector_tstamp", Some("-0030-01-01T00:00:00.000Z"),"formatted as -0030-01-01 00:00:00.000 is not Redshift-compatible")).asLeft | - ">10k timestamp" !! FarAwayTstamp ! FailureDetails.EnrichmentFailure(None, FailureDetails.EnrichmentFailureMessage.InputData("collector_tstamp", Some("11970-01-01T00:00:00.000Z"),"formatted as 11970-01-01 00:00:00.000 is not Redshift-compatible")).asLeft | + "None" !! None ! AtomicError("collector_tstamp", None, "Field not set").asLeft | + "Negative timestamp" !! BCTstamp ! AtomicError("collector_tstamp", Some("-0030-01-01T00:00:00.000Z"),"Formatted as -0030-01-01 00:00:00.000 is not Redshift-compatible").asLeft | + ">10k timestamp" !! FarAwayTstamp ! AtomicError("collector_tstamp", Some("11970-01-01T00:00:00.000Z"),"Formatted as 11970-01-01 00:00:00.000 is not Redshift-compatible").asLeft | "Valid timestamp" !! SeventiesTstamp ! "1970-01-01 00:00:00.000".asRight |> { // format: on (_, input, expected) => @@ -77,26 +67,11 @@ class ExtractEventTypeSpec extends Specification with DataTables { def e4 = "SPEC NAME" || "INPUT VAL" | "EXPECTED OUTPUT" | - "Not long" !! (("f", "v")) ! FailureDetails - .EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - "f", - Some("v"), - "not in the expected format: ms since epoch" - ) - ) - .asLeft | - "Too long" !! (("f", "1111111111111111")) ! FailureDetails - .EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - "f", - Some("1111111111111111"), - "formatting as 37179-09-17 07:18:31.111 is not Redshift-compatible" - ) - ) - .asLeft | + "Not long" !! (("f", "v")) ! AtomicError("f", Some("v"), "Not in the expected format: ms since epoch").asLeft | + "Too long" !! (("f", "1111111111111111")) ! AtomicError("f", + Some("1111111111111111"), + "Formatting as 37179-09-17 07:18:31.111 is not Redshift-compatible" + ).asLeft | "Valid ts" !! (("f", "1")) ! "1970-01-01 00:00:00.001".asRight |> { (_, input, expected) => EventEnrichments.extractTimestamp(input._1, input._2) must_== expected } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/miscEnrichmentSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/miscEnrichmentSpecs.scala index e13822302..77b6f7022 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/miscEnrichmentSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/miscEnrichmentSpecs.scala @@ -18,7 +18,7 @@ import org.specs2.mutable.{Specification => MutSpecification} import org.specs2.Specification import org.specs2.matcher.DataTables -import com.snowplowanalytics.snowplow.badrows.{FailureDetails, Processor} +import com.snowplowanalytics.snowplow.badrows.Processor import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} @@ -35,16 +35,8 @@ class EtlVersionSpec extends MutSpecification { /** Tests the extractPlatform function. Uses DataTables. */ class ExtractPlatformSpec extends Specification with DataTables { val FieldName = "p" - def err: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - "not recognized as a tracking platform" - ) - ) + def err: String => AtomicError = + input => AtomicError(FieldName, Option(input), "Not a valid platform") def is = s2""" Extracting platforms with extractPlatform should work $e1 diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CrossNavigationEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CrossNavigationEnrichmentSpec.scala index db830ff43..387f868f1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CrossNavigationEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CrossNavigationEnrichmentSpec.scala @@ -182,7 +182,7 @@ class CrossNavigationEnrichmentSpec extends Specification with EitherMatchers { FailureDetails.EnrichmentFailureMessage.InputData( "sp_dtm", "not-timestamp".some, - "not in the expected format: ms since epoch" + "Not in the expected format: ms since epoch" ) ) val result = CrossDomainMap.makeCrossDomainMap(input).map(_.domainMap) @@ -196,7 +196,7 @@ class CrossNavigationEnrichmentSpec extends Specification with EitherMatchers { FailureDetails.EnrichmentFailureMessage.InputData( "sp_dtm", "1111111111111111".some, - "formatting as 37179-09-17 07:18:31.111 is not Redshift-compatible" + "Formatting as 37179-09-17 07:18:31.111 is not Redshift-compatible" ) ) val result = CrossDomainMap.makeCrossDomainMap(input).map(_.domainMap) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/web/ParseCrossDomainSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/web/ParseCrossDomainSpec.scala index 0468e005f..154bde8d1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/web/ParseCrossDomainSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/web/ParseCrossDomainSpec.scala @@ -46,7 +46,7 @@ class ParseCrossDomainSpec extends Specification with DataTables { FailureDetails.EnrichmentFailureMessage.InputData( "sp_dtm", "not-a-timestamp".some, - "not in the expected format: ms since epoch" + "Not in the expected format: ms since epoch" ) ) CrossNavigationEnrichment.parseCrossDomain(List(("_sp" -> Some("abc.not-a-timestamp")))).map(_.domainMap) must beLeft(expected) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala index f60d5778a..8f0596eba 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala @@ -440,106 +440,64 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect } "validateEnrichmentsContexts" should { - "return a BadRow.EnrichmentFailures with one expected failure for one invalid context" >> { + "return one expected SchemaViolation for one invalid context" >> { val contexts = List( SpecHelpers.jsonStringToSDJ(invalidEmailSent).right.get ) IgluUtils - .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched, SpecHelpers.registryLookup) - .value - .map(_ must beLeft.like { - case BadRow.EnrichmentFailures(_, failures, _) => - failures.messages match { - case NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) - ), - _ - ) => - ok - case err => ko(s"bad row is EnrichmentFailures but [$err] is not one ValidationError") - } - case br => ko(s"bad row [$br] is not EnrichmentFailures") + .validateEnrichmentsContexts(SpecHelpers.client, contexts, SpecHelpers.registryLookup) + .map(_.toEither must beLeft.like { + case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), Nil) => ok + case other => ko(s"[$other] is not one ValidationError") }) } - "return a BadRow.EnrichmentFailures 2 expected failures for 2 invalid contexts" >> { + "return two expected SchemaViolation for two invalid contexts" >> { val contexts = List( SpecHelpers.jsonStringToSDJ(invalidEmailSent).right.get, SpecHelpers.jsonStringToSDJ(noSchema).right.get ) IgluUtils - .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched, SpecHelpers.registryLookup) - .value - .map(_ must beLeft.like { - case BadRow.EnrichmentFailures(_, failures, _) => - failures.messages match { - case NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) - ), - List( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ResolutionError(_)) - ) - ) - ) => - ok - case errs => - ko( - s"bad row is EnrichmentFailures but [$errs] is not one ValidationError and one ResolutionError" - ) - } - case br => ko(s"bad row [$br] is not EnrichmentFailures") + .validateEnrichmentsContexts(SpecHelpers.client, contexts, SpecHelpers.registryLookup) + .map(_.toEither must beLeft.like { + case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), + List(FailureDetails.SchemaViolation.IgluError(_, ResolutionError(_))) + ) => + ok + case other => ko(s"[$other] is not one ValidationError and one ResolutionError") }) } - "return a BadRow.EnrichmentFailures with an expected failure for 1 valid context and one invalid" >> { + "return one expected SchemaViolation for one invalid context and one valid" >> { val contexts = List( SpecHelpers.jsonStringToSDJ(invalidEmailSent).right.get, SpecHelpers.jsonStringToSDJ(emailSent1).right.get ) IgluUtils - .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched, SpecHelpers.registryLookup) - .value - .map(_ must beLeft.like { - case BadRow.EnrichmentFailures(_, failures, _) => - failures.messages match { - case NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) - ), - Nil - ) => - ok - case err => ko(s"bad row is EnrichmentFailures but [$err] is not one ValidationError") - } - case br => ko(s"bad row [$br] is not EnrichmentFailures") + .validateEnrichmentsContexts(SpecHelpers.client, contexts, SpecHelpers.registryLookup) + .map(_.toEither must beLeft.like { + case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), Nil) => ok + case other => ko(s"[$other] is not one ValidationError") }) } - "not return any error for 2 valid contexts" >> { + "not return any error for two valid contexts" >> { val contexts = List( SpecHelpers.jsonStringToSDJ(emailSent1).right.get, SpecHelpers.jsonStringToSDJ(emailSent2).right.get ) IgluUtils - .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched, SpecHelpers.registryLookup) - .value - .map(_ must beRight) + .validateEnrichmentsContexts(SpecHelpers.client, contexts, SpecHelpers.registryLookup) + .map(_.toEither must beRight) } } "extractAndValidateInputJsons" should { - "return a SchemaViolations containing 1 error if the input event contains an invalid unstructured event" >> { + "return one SchemaViolation if the input event contains an invalid unstructured event" >> { val input = new EnrichedEvent input.setUnstruct_event(buildUnstruct(invalidEmailSent)) @@ -547,18 +505,15 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect .extractAndValidateInputJsons( input, SpecHelpers.client, - raw, - processor, SpecHelpers.registryLookup ) - .value - .map(_ must beLeft.like { - case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 1 => ok - case br => ko(s"bad row [$br] is not a SchemaViolations containing 1 error") + .map(_.toEither must beLeft.like { + case errors if errors.size == 1 => ok + case other => ko(s"[$other] is not one SchemaViolation") }) } - "return a SchemaViolations containing 1 error if the input event contains 1 invalid context" >> { + "return one SchemaViolation if the input event contains one invalid context" >> { val input = new EnrichedEvent input.setContexts(buildInputContexts(List(invalidEmailSent))) @@ -566,18 +521,15 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect .extractAndValidateInputJsons( input, SpecHelpers.client, - raw, - processor, SpecHelpers.registryLookup ) - .value - .map(_ must beLeft.like { - case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 1 => ok - case br => ko(s"bad row [$br] is not a SchemaViolations containing 1 error") + .map(_.toEither must beLeft.like { + case errors if errors.size == 1 => ok + case other => ko(s"[$other] is not one SchemaViolation") }) } - "return a SchemaViolations containing 2 errors if the input event contains an invalid unstructured event and 1 invalid context" >> { + "return two SchemaViolation if the input event contains an invalid unstructured event and one invalid context" >> { val input = new EnrichedEvent input.setUnstruct_event(invalidEmailSent) input.setContexts(buildInputContexts(List(invalidEmailSent))) @@ -586,14 +538,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect .extractAndValidateInputJsons( input, SpecHelpers.client, - raw, - processor, SpecHelpers.registryLookup ) - .value - .map(_ must beLeft.like { - case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 2 => ok - case br => ko(s"bad row [$br] is not a SchemaViolations containing 2 errors") + .map(_.toEither must beLeft.like { + case errors if errors.size == 2 => ok + case other => ko(s"[$other] is not two SchemaViolation") }) } @@ -606,12 +555,9 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect .extractAndValidateInputJsons( input, SpecHelpers.client, - raw, - processor, SpecHelpers.registryLookup ) - .value - .map(_ must beRight.like { + .map(_.toEither must beRight.like { case IgluUtils.EventExtractResult(contexts, Some(unstructEvent), validationInfos) if contexts.size == 2 && validationInfos.isEmpty @@ -619,7 +565,7 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect ok case res => ko( - s"[$res] is not a list with 2 extracted contexts and an option with the extracted unstructured event" + s"[$res] is not a list with two extracted contexts and an option with the extracted unstructured event" ) }) } @@ -640,12 +586,9 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect .extractAndValidateInputJsons( input, SpecHelpers.client, - raw, - processor, SpecHelpers.registryLookup ) - .value - .map(_ must beRight.like { + .map(_.toEither must beRight.like { case IgluUtils.EventExtractResult(contexts, Some(unstructEvent), List(validationInfo)) if contexts.size == 2 && unstructEvent.schema == supersedingExampleSchema101 @@ -655,7 +598,7 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect ok case res => ko( - s"[$res] is not a list with 2 extracted contexts and an option with the extracted unstructured event" + s"[$res] is not a list with two extracted contexts and an option with the extracted unstructured event" ) }) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/MapTransformerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/MapTransformerSpec.scala index 5a2f5fcbf..66b5c01be 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/MapTransformerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/MapTransformerSpec.scala @@ -18,9 +18,7 @@ import org.apache.commons.lang3.builder.HashCodeBuilder import org.specs2.matcher.ValidatedMatchers import org.specs2.mutable.Specification -import com.snowplowanalytics.snowplow.badrows._ - -import com.snowplowanalytics.snowplow.enrich.common.enrichments.{ClientEnrichments, MiscEnrichments} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.{AtomicError, ClientEnrichments, MiscEnrichments} import com.snowplowanalytics.snowplow.enrich.common.utils.MapTransformer._ // Test Bean @@ -52,7 +50,7 @@ final class TargetBean { class MapTransformerSpec extends Specification with ValidatedMatchers { - val identity: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val identity: (String, String) => Either[AtomicError, String] = (_, value) => value.asRight val sourceMap = Map( diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala index a6dcd1c39..74fc97332 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala @@ -27,6 +27,7 @@ import org.specs2.matcher.DataTables import com.snowplowanalytics.snowplow.badrows._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicError class StringToUriSpec extends MSpecification with DataTables { @@ -210,7 +211,7 @@ class DecodeBase64UrlSpec extends Specification with DataTables with ScalaCheck """ // Only way of getting a failure currently - def e1 = ConversionUtils.decodeBase64Url(null) must beLeft("could not base64 decode: null") + def e1 = ConversionUtils.decodeBase64Url(null) must beLeft("Could not base64 decode: null") // No string creates a failure def e2 = @@ -263,14 +264,7 @@ class ValidateUuidSpec extends Specification with DataTables with ScalaCheck { def e2 = prop { (str: String) => ConversionUtils.validateUuid(FieldName, str) must beLeft( - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(str), - "not a valid UUID" - ) - ) + AtomicError(FieldName, Option(str), "Not a valid UUID") ) } } @@ -288,14 +282,7 @@ class ValidateIntegerSpec extends Specification { def e2 = { val str = "abc" ConversionUtils.validateInteger(FieldName, str) must beLeft( - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Some(str), - "not a valid integer" - ) - ) + AtomicError(FieldName, Some(str), "Not a valid integer") ) } } @@ -326,16 +313,8 @@ class StringToDoubleLikeSpec extends Specification with DataTables { """ val FieldName = "val" - def err: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - "cannot be converted to Double-like" - ) - ) + def err: String => AtomicError = + input => AtomicError(FieldName, Option(input), "Cannot be converted to Double-like") def e1 = "SPEC NAME" || "INPUT STR" | "EXPECTED" | @@ -379,7 +358,7 @@ class StringToJIntegerSpec extends Specification with DataTables { stringToJInteger should convert valid Strings to Java Integers $e2 """ - val err: String = "cannot be converted to java.lang.Integer" + val err: String = "Cannot be converted to java.lang.Integer" def e1 = "SPEC NAME" || "INPUT STR" | "EXPECTED" | @@ -410,16 +389,8 @@ class StringToBooleanLikeJByteSpec extends Specification with DataTables { """ val FieldName = "val" - def err: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - "cannot be converted to Boolean-like java.lang.Byte" - ) - ) + def err: String => AtomicError = + input => AtomicError(FieldName, Option(input), "Cannot be converted to Boolean-like java.lang.Byte") def e1 = "SPEC NAME" || "INPUT STR" | "EXPECTED" |