Skip to content

Commit

Permalink
Merge pull request #71 from apache/fix_array_no_data
Browse files Browse the repository at this point in the history
Fix no data in Presto (#8268)
  • Loading branch information
Beto Dealmeida authored Sep 30, 2019
2 parents 60f11fd + 0717f65 commit 853e157
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
4 changes: 3 additions & 1 deletion superset/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,13 @@ def __init__(self, data, cursor_description, db_engine_spec):
self.column_names = column_names

if dtype:
# put data in a 2D array so we can efficiently access each column;
# the reshape ensures the shape is 2D in case data is empty
array = np.array(data, dtype="object").reshape(-1, len(column_names))
# convert each column in data into a Series of the proper dtype; we
# need to do this because we can not specify a mixed dtype when
# instantiating the DataFrame, and this allows us to have different
# dtypes for each column.
array = np.array(data, dtype="object")
data = {
column: pd.Series(array[:, i], dtype=dtype[column])
for i, column in enumerate(column_names)
Expand Down
21 changes: 21 additions & 0 deletions tests/dataframe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
import numpy as np
import pandas as pd

from superset.dataframe import dedup, SupersetDataFrame
from superset.db_engine_specs import BaseEngineSpec
Expand Down Expand Up @@ -135,3 +136,23 @@ def test_pandas_datetime64(self):
cursor_descr = [("ds", "timestamp", None, None, None, None, True)]
cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec)
self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("<M8[ns]"))

def test_no_type_coercion(self):
data = [("a", 1), ("b", 2)]
cursor_descr = [
("one", "varchar", None, None, None, None, True),
("two", "integer", None, None, None, None, True),
]
cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec)
self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("O"))
self.assertEqual(cdf.raw_df.dtypes[1], pd.Int64Dtype())

def test_empty_data(self):
data = []
cursor_descr = [
("one", "varchar", None, None, None, None, True),
("two", "integer", None, None, None, None, True),
]
cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec)
self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("O"))
self.assertEqual(cdf.raw_df.dtypes[1], pd.Int64Dtype())

0 comments on commit 853e157

Please sign in to comment.