diff --git a/superset/dataframe.py b/superset/dataframe.py index c1733362d8073..f82ab294d8265 100644 --- a/superset/dataframe.py +++ b/superset/dataframe.py @@ -106,11 +106,13 @@ def __init__(self, data, cursor_description, db_engine_spec): self.column_names = column_names if dtype: + # put data in a 2D array so we can efficiently access each column; + # the reshape ensures the shape is 2D in case data is empty + array = np.array(data, dtype="object").reshape(-1, len(column_names)) # convert each column in data into a Series of the proper dtype; we # need to do this because we can not specify a mixed dtype when # instantiating the DataFrame, and this allows us to have different # dtypes for each column. - array = np.array(data, dtype="object") data = { column: pd.Series(array[:, i], dtype=dtype[column]) for i, column in enumerate(column_names) diff --git a/tests/dataframe_test.py b/tests/dataframe_test.py index d254d63a040bb..a698cff082454 100644 --- a/tests/dataframe_test.py +++ b/tests/dataframe_test.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import numpy as np +import pandas as pd from superset.dataframe import dedup, SupersetDataFrame from superset.db_engine_specs import BaseEngineSpec @@ -135,3 +136,23 @@ def test_pandas_datetime64(self): cursor_descr = [("ds", "timestamp", None, None, None, None, True)] cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec) self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("