From bc1afe85ead8e84b93fabc8af7a54422a48b270c Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 Jan 2025 23:21:07 +0100 Subject: [PATCH 1/3] CSV: fix parsing files with double-quote inside a field value Fixes #11660 --- .../double_quotes_in_middle_of_field_bis.csv | 3 + autotest/ogr/ogr_csv.py | 20 ++++++ port/cpl_csv.cpp | 63 ++++++++++++------- 3 files changed, 65 insertions(+), 21 deletions(-) create mode 100644 autotest/ogr/data/csv/double_quotes_in_middle_of_field_bis.csv diff --git a/autotest/ogr/data/csv/double_quotes_in_middle_of_field_bis.csv b/autotest/ogr/data/csv/double_quotes_in_middle_of_field_bis.csv new file mode 100644 index 000000000000..050d95d81045 --- /dev/null +++ b/autotest/ogr/data/csv/double_quotes_in_middle_of_field_bis.csv @@ -0,0 +1,3 @@ +first,second,third +1,two"with quote,3 +10,twenty"with quote,30 diff --git a/autotest/ogr/ogr_csv.py b/autotest/ogr/ogr_csv.py index 7cb00c1394ce..b7f2c87a588c 100755 --- a/autotest/ogr/ogr_csv.py +++ b/autotest/ogr/ogr_csv.py @@ -2800,6 +2800,26 @@ def test_ogr_csv_double_quotes_in_middle_of_field(): assert f["str"] == "foo" +############################################################################### +# Test bugfix for https://github.com/OSGeo/gdal/issues/11660 + + +def test_ogr_csv_double_quotes_in_middle_of_field_bis(): + + ds = ogr.Open("data/csv/double_quotes_in_middle_of_field_bis.csv") + lyr = ds.GetLayer(0) + + f = lyr.GetNextFeature() + assert f["first"] == "1" + assert f["second"] == """two"with quote""" + assert f["third"] == "3" + + f = lyr.GetNextFeature() + assert f["first"] == "10" + assert f["second"] == """twenty"with quote""" + assert f["third"] == "30" + + ############################################################################### diff --git a/port/cpl_csv.cpp b/port/cpl_csv.cpp index 6acf55fe234c..36e0216f39ff 100644 --- a/port/cpl_csv.cpp +++ b/port/cpl_csv.cpp @@ -647,45 +647,66 @@ CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t), return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes, bMergeDelimiter); + const size_t nDelimiterLength = strlen(pszDelimiter); + bool bInString = false; // keep in that scope ! + std::string osWorkLine(pszLine); // keep in that scope ! + size_t i = 0; // keep in that scope ! + try { - // We must now count the quotes in our working string, and as - // long as it is odd, keep adding new lines. - std::string osWorkLine(pszLine); - - size_t i = 0; - int nCount = 0; - while (true) { - for (; i < osWorkLine.size(); i++) + for (; i < osWorkLine.size(); ++i) { if (osWorkLine[i] == '\"') - nCount++; + { + if (!bInString) + { + // Only consider " as the start of a quoted string + // if it is the first character of the line, or + // if it is immediately after the field delimiter. + if (i == 0 || + (i >= nDelimiterLength && + osWorkLine.compare(i - nDelimiterLength, + nDelimiterLength, pszDelimiter, + nDelimiterLength) == 0)) + { + bInString = true; + } + } + else if (i + 1 < osWorkLine.size() && + osWorkLine[i + 1] == '"') + { + // Escaped double quote in a quoted string + ++i; + } + else + { + bInString = false; + } + } } - if (nCount % 2 == 0) - break; + if (!bInString) + { + return CSVSplitLine(osWorkLine.c_str(), pszDelimiter, + bKeepLeadingAndClosingQuotes, + bMergeDelimiter); + } - pszLine = pfnReadLine(fp, nMaxLineSize); - if (pszLine == nullptr) + const char *pszNewLine = pfnReadLine(fp, nMaxLineSize); + if (pszNewLine == nullptr) break; osWorkLine.append("\n"); - osWorkLine.append(pszLine); + osWorkLine.append(pszNewLine); } - - char **papszReturn = - CSVSplitLine(osWorkLine.c_str(), pszDelimiter, - bKeepLeadingAndClosingQuotes, bMergeDelimiter); - - return papszReturn; } catch (const std::exception &e) { CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what()); - return nullptr; } + return nullptr; } /************************************************************************/ From 195e02a86acffa76c03b59e0e560db1d5f70332b Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 16 Jan 2025 17:04:35 +0100 Subject: [PATCH 2/3] Shapefile driver/ogr2ogr to Shapefile: write DateTime as ISO8601 string, both in Arrow and non-Arrow code paths Fixes #11671 --- apps/ogr2ogr_lib.cpp | 11 ++++++- autotest/utilities/test_ogr2ogr_lib.py | 43 +++++++++++++++++++++++++ ogr/ogrsf_frmts/shape/ogrshapelayer.cpp | 8 ++--- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/apps/ogr2ogr_lib.cpp b/apps/ogr2ogr_lib.cpp index bcaafd45a580..8937c3976421 100644 --- a/apps/ogr2ogr_lib.cpp +++ b/apps/ogr2ogr_lib.cpp @@ -4048,6 +4048,12 @@ static void DoFieldTypeConversion(GDALDataset *poDstDS, } oFieldDefn.SetType(OFTReal); } + else if (oFieldDefn.GetType() == OFTDateTime && poDstDriver && + EQUAL(poDstDriver->GetDescription(), "ESRI Shapefile")) + { + // Just be silent. The shapefile driver will itself emit a + // warning mentionning it converts DateTime to String. + } else if (!bQuiet) { CPLError( @@ -6518,7 +6524,10 @@ bool LayerTranslator::Translate( } poDstFeature->Reset(); - if (poDstFeature->SetFrom(poFeature.get(), panMap, TRUE) != + + if (poDstFeature->SetFrom( + poFeature.get(), panMap, /* bForgiving = */ TRUE, + /* bUseISO8601ForDateTimeAsString = */ true) != OGRERR_NONE) { if (psOptions->nGroupTransactions) diff --git a/autotest/utilities/test_ogr2ogr_lib.py b/autotest/utilities/test_ogr2ogr_lib.py index bb7fcd43cfc0..d26a52a41e7c 100755 --- a/autotest/utilities/test_ogr2ogr_lib.py +++ b/autotest/utilities/test_ogr2ogr_lib.py @@ -3169,3 +3169,46 @@ def test_ogr2ogr_lib_transfer_filegdb_relationships(tmp_vsimem): assert relationship.GetLeftTableName() == "table6" assert relationship.GetRightTableName() == "table7" assert relationship.GetMappingTableName() == "composite_many_to_many" + + +############################################################################### + + +@gdaltest.enable_exceptions() +@pytest.mark.require_driver("GPKG") +@pytest.mark.parametrize("OGR2OGR_USE_ARROW_API", ["YES", "NO"]) +def test_ogr2ogr_lib_datetime_in_shapefile(tmp_vsimem, OGR2OGR_USE_ARROW_API): + + src_filename = str(tmp_vsimem / "src.gpkg") + with ogr.GetDriverByName("GPKG").CreateDataSource(src_filename) as src_ds: + src_lyr = src_ds.CreateLayer("test", geom_type=ogr.wkbNone) + + field = ogr.FieldDefn("dt", ogr.OFTDateTime) + src_lyr.CreateField(field) + f = ogr.Feature(src_lyr.GetLayerDefn()) + f.SetField("dt", "2022-05-31T12:34:56.789+05:30") + src_lyr.CreateFeature(f) + + got_msg = [] + + def my_handler(errorClass, errno, msg): + got_msg.append(msg) + return + + out_filename = str(tmp_vsimem / "out.dbf") + with gdaltest.error_handler(my_handler), gdaltest.config_options( + {"CPL_DEBUG": "ON", "OGR2OGR_USE_ARROW_API": OGR2OGR_USE_ARROW_API} + ): + gdal.VectorTranslate(out_filename, src_filename) + + if OGR2OGR_USE_ARROW_API == "YES": + assert "OGR2OGR: Using WriteArrowBatch()" in got_msg + else: + assert "OGR2OGR: Using WriteArrowBatch()" not in got_msg + + print(got_msg) + assert "Field dt created as String field, though DateTime requested." in got_msg + + with ogr.Open(out_filename) as dst_ds: + dst_lyr = dst_ds.GetLayer(0) + assert [f.GetField("dt") for f in dst_lyr] == ["2022-05-31T12:34:56.789+05:30"] diff --git a/ogr/ogrsf_frmts/shape/ogrshapelayer.cpp b/ogr/ogrsf_frmts/shape/ogrshapelayer.cpp index 6681dd599618..00f5c812395e 100644 --- a/ogr/ogrsf_frmts/shape/ogrshapelayer.cpp +++ b/ogr/ogrsf_frmts/shape/ogrshapelayer.cpp @@ -2086,11 +2086,11 @@ OGRErr OGRShapeLayer::CreateField(const OGRFieldDefn *poFieldDefn, case OFTDateTime: CPLError( CE_Warning, CPLE_NotSupported, - "Field %s create as date field, though DateTime requested.", + "Field %s created as String field, though DateTime requested.", szNewFieldName); - chType = 'D'; - nWidth = 8; - oModFieldDefn.SetType(OFTDate); + chType = 'C'; + nWidth = static_cast(strlen("YYYY-MM-DDTHH:MM:SS.sss+HH:MM")); + oModFieldDefn.SetType(OFTString); break; default: From c1c885a046dbccd1a81487e98b72ddcb030a5d9d Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 16 Jan 2025 23:37:33 +0100 Subject: [PATCH 3/3] autotest/cpp: really fix warning on fedora:rawhide --- autotest/cpp/gtest_include.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/autotest/cpp/gtest_include.h b/autotest/cpp/gtest_include.h index 2ba176edbdbe..836af6c9cd0c 100644 --- a/autotest/cpp/gtest_include.h +++ b/autotest/cpp/gtest_include.h @@ -17,4 +17,14 @@ #pragma GCC system_header #endif +#if defined(__GNUC__) +// Workaround https://github.com/google/googletest/issues/4701 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcpp" +#endif + #include "gtest/gtest.h" + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif