Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: 1840 invalid characters #1892

Merged
merged 9 commits into from
Oct 21, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.mobilitydata.gtfsvalidator.notice;

import static org.mobilitydata.gtfsvalidator.notice.SeverityLevel.ERROR;

import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice;

/**
* This field contains invalid characters, such as the replacement character ("\uFFFD").
*
* <p>Check that text was properly encoded in UTF-8 as required by GTFS.
*/
@GtfsValidationNotice(severity = ERROR)
public class InvalidCharacterNotice extends ValidationNotice {
/** The name of the file containing the invalid characters. */
private final String filename;

/** The row number in the CSV file where the invalid characters were found. */
private final long csvRowNumber;

/** The name of the field containing the invalid characters. */
private final String fieldName;

/** The value of the field containing the invalid characters. */
private final String fieldValue;

public InvalidCharacterNotice(
String filename, long csvRowNumber, String fieldName, String fieldValue) {
this.filename = filename;
this.csvRowNumber = csvRowNumber;
this.fieldName = fieldName;
this.fieldValue = fieldValue;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,7 @@
import java.util.function.Function;
import javax.annotation.Nullable;
import org.mobilitydata.gtfsvalidator.annotation.FieldLevelEnum;
import org.mobilitydata.gtfsvalidator.notice.EmptyRowNotice;
import org.mobilitydata.gtfsvalidator.notice.InvalidColorNotice;
import org.mobilitydata.gtfsvalidator.notice.InvalidCurrencyNotice;
import org.mobilitydata.gtfsvalidator.notice.InvalidDateNotice;
import org.mobilitydata.gtfsvalidator.notice.InvalidFloatNotice;
import org.mobilitydata.gtfsvalidator.notice.InvalidIntegerNotice;
import org.mobilitydata.gtfsvalidator.notice.InvalidLanguageCodeNotice;
import org.mobilitydata.gtfsvalidator.notice.InvalidRowLengthNotice;
import org.mobilitydata.gtfsvalidator.notice.InvalidTimeNotice;
import org.mobilitydata.gtfsvalidator.notice.InvalidTimezoneNotice;
import org.mobilitydata.gtfsvalidator.notice.MissingRecommendedFieldNotice;
import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFieldNotice;
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;
import org.mobilitydata.gtfsvalidator.notice.NumberOutOfRangeNotice;
import org.mobilitydata.gtfsvalidator.notice.TooManyRowsNotice;
import org.mobilitydata.gtfsvalidator.notice.UnexpectedEnumValueNotice;
import org.mobilitydata.gtfsvalidator.notice.ValidationNotice;
import org.mobilitydata.gtfsvalidator.notice.*;
import org.mobilitydata.gtfsvalidator.table.GtfsColumnDescriptor;
import org.mobilitydata.gtfsvalidator.table.GtfsEnum;
import org.mobilitydata.gtfsvalidator.type.GtfsColor;
Expand Down Expand Up @@ -137,6 +121,12 @@ public String asString(int columnIndex, GtfsColumnDescriptor columnDescriptor) {
fileName, getRowNumber(), columnDescriptor.columnName()));
}
if (s != null) {
// Validate if the string contains invalid characters
if (containsInvalidCharacters(s)) {
noticeContainer.addValidationNotice(
new InvalidCharacterNotice(fileName, getRowNumber(), columnDescriptor.columnName(), s));
}

s =
fieldValidator.validateField(
s,
Expand All @@ -146,6 +136,10 @@ public String asString(int columnIndex, GtfsColumnDescriptor columnDescriptor) {
return s;
}

private boolean containsInvalidCharacters(String string) {
return string.contains("\uFFFD");
}

@Nullable
public String asText(int columnIndex, GtfsColumnDescriptor columnDescriptor) {
return asString(columnIndex, columnDescriptor);
Expand Down
Loading