Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dropEmptyRows and dropEmptyCols #510

Merged
merged 2 commits into from
Sep 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build.fsx
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ let testCoreProjs =
"tests/Deedle.Tests/Deedle.Tests.fsproj"
"tests/Deedle.Math.Tests/Deedle.Math.Tests.fsproj"
"tests/Deedle.CSharp.Tests/Deedle.CSharp.Tests.csproj"
"tests/Deedle.Documentation.Tests/Deedle.Documentation.Tests.fsproj"
//"tests/Deedle.Documentation.Tests/Deedle.Documentation.Tests.fsproj"
"tests/Deedle.PerfTests/Deedle.PerfTests.fsproj"
]

Expand Down Expand Up @@ -189,7 +189,7 @@ Target.create "BuildCoreTests" (fun _ ->
DotNet.build (fun opts -> { opts with Configuration = DotNet.BuildConfiguration.Release }) proj )

Target.create "RunCoreTests" (fun _ ->
for proj in testProjs do
for proj in testCoreProjs do
DotNet.test (fun opts -> { opts with Configuration = DotNet.BuildConfiguration.Release }) proj )

// --------------------------------------------------------------------------------------
Expand Down
24 changes: 23 additions & 1 deletion src/Deedle/FrameExtensions.fs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#nowarn "10001"
#nowarn "10001"
namespace Deedle

// ------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -1283,6 +1283,17 @@ type FrameExtensions =
[<Extension>]
static member DropSparseRows(frame:Frame<'TRowKey, 'TColumnKey>) = Frame.dropSparseRows frame

/// Creates a new data frame that contains only those rows that are empty for each column.
/// The resulting data frame has the same number of columns, but may have
/// fewer rows (or no rows at all).
///
/// ## Parameters
/// - `frame` - An input data frame that is to be filtered
///
/// [category:Missing values]
[<Extension>]
static member DropEmptyRows(frame:Frame<'TRowKey, 'TColumnKey>) = Frame.dropEmptyRows frame

/// Creates a new data frame that contains only those columns of the original
/// data frame that are _dense_, meaning that they have a value for each row.
/// The resulting data frame has the same number of rows, but may have
Expand All @@ -1295,6 +1306,17 @@ type FrameExtensions =
[<Extension>]
static member DropSparseColumns(frame:Frame<'TRowKey, 'TColumnKey>) = Frame.dropSparseCols frame

/// Creates a new data frame that drops those columns that are empty for each row.
/// The resulting data frame has the same number of rows, but may have
/// fewer columns (or no columns at all).
///
/// ## Parameters
/// - `frame` - An input data frame that is to be filtered
///
/// [category:Missing values]
[<Extension>]
static member DropEmptyColumns(frame:Frame<'TRowKey, 'TColumnKey>) = Frame.dropEmptyCols frame

// ----------------------------------------------------------------------------------------------
// Obsolete - kept for temporary compatibility
// ----------------------------------------------------------------------------------------------
Expand Down
38 changes: 37 additions & 1 deletion src/Deedle/FrameModule.fs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace Deedle
namespace Deedle

/// The `Frame` module provides an F#-friendly API for working with data frames.
/// The module follows the usual desing for collection-processing in F#, so the
Expand Down Expand Up @@ -1375,6 +1375,26 @@ module Frame =
let newData = frame.Data.Select(VectorHelpers.transformColumn frame.VectorBuilder newRowIndex.AddressingScheme cmd)
Frame<_, _>(newRowIndex, frame.ColumnIndex, newData, frame.IndexBuilder, frame.VectorBuilder)

/// Creates a new data frame that contains only those rows that are empty for each column.
/// The resulting data frame has the same number of columns, but may have
/// fewer rows (or no rows at all).
///
/// [category:Missing values]
[<CompiledName("DropEmptyRows")>]
let dropEmptyRows (frame:Frame<'R, 'C>) =
// Create a combined vector that has 'true' for rows which have some values
let hasAllFlagVector =
frame.Data
|> createRowVector
frame.VectorBuilder frame.RowIndex.AddressingScheme (lazy frame.RowIndex.KeyCount)
frame.ColumnIndex.KeyCount frame.ColumnIndex.AddressAt
(fun rowReader -> rowReader.DataSequence |> Seq.exists (fun opt -> opt.HasValue))
// Collect all rows that have at least some values
let newRowIndex, cmd =
frame.IndexBuilder.Search( (frame.RowIndex, Vectors.Return 0), hasAllFlagVector, true)
let newData = frame.Data.Select(VectorHelpers.transformColumn frame.VectorBuilder newRowIndex.AddressingScheme cmd)
Frame<_, _>(newRowIndex, frame.ColumnIndex, newData, frame.IndexBuilder, frame.VectorBuilder)

/// Creates a new data frame that contains only those columns of the original
/// data frame that are _dense_, meaning that they have a value for each row.
/// The resulting data frame has the same number of rows, but may have
Expand All @@ -1392,6 +1412,22 @@ module Frame =
let colIndex = frame.IndexBuilder.Create(ReadOnlyCollection.ofArray newColKeys, None)
Frame(frame.RowIndex, colIndex, frame.VectorBuilder.Create(newData), frame.IndexBuilder, frame.VectorBuilder )

/// Creates a new data frame that drops those columns that are empty for each row.
/// The resulting data frame has the same number of rows, but may have
/// fewer columns (or no columns at all).
///
/// [category:Missing values]
[<CompiledName("DropEmptyColumns")>]
let dropEmptyCols (frame:Frame<'R, 'C>) =
let newColKeys, newData =
[| for KeyValue(colKey, addr) in frame.ColumnIndex.Mappings do
match frame.Data.GetValue(addr) with
| OptionalValue.Present(vec) when vec.ObjectSequence |> Seq.exists (fun o -> o.HasValue) ->
yield colKey, vec
| _ -> () |] |> Array.unzip
let colIndex = frame.IndexBuilder.Create(ReadOnlyCollection.ofArray newColKeys, None)
Frame(frame.RowIndex, colIndex, frame.VectorBuilder.Create(newData), frame.IndexBuilder, frame.VectorBuilder )

/// Returns the columns of the data frame that do not have any missing values.
/// The operation returns a series (indexed by the column keys of the source frame)
/// containing _series_ representing individual columns of the frame. This is similar
Expand Down
29 changes: 28 additions & 1 deletion tests/Deedle.Tests/Frame.fs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#if INTERACTIVE
#if INTERACTIVE
#I "../../bin/net45"
#load "Deedle.fsx"
#r "../../packages/NUnit/lib/net45/nunit.framework.dll"
Expand Down Expand Up @@ -1582,6 +1582,20 @@ let ``Dropping sparse rows works on sample frame``() =
"C" => series [0 => 0.0; 2 => 2.0; 4 => 4.0 ] ]
actual |> shouldEqual expected

[<Test>]
let ``Dropping empty rows works on sample frame``() =
let emptyFrame =
frame [
"A", Series.ofValues [0.; nan; 2.; nan]
"B", Series.ofValues [0.; 1.; 2.; nan]
"C", Series.ofValues [0.; 1.; nan; nan] ]
let actual = emptyFrame |> Frame.dropEmptyRows
let expected =
frame [ "A" => series [0 => 0.0; 1 => nan; 2 => 2.0 ]
"B" => series [0 => 0.0; 1 => 1.0; 2 => 2.0 ]
"C" => series [0 => 0.0; 1 => 1.0; 2 => nan ] ]
actual |> shouldEqual expected

[<Test>]
let ``Dropping sparse rows works on frame with missing in one column``() =
let sparseFrame =
Expand Down Expand Up @@ -1637,6 +1651,19 @@ let ``Dropping sparse columns works on sample frame``() =
frame [ "B" => series [0 => 0.0; 1 => 1.0; 2 => 2.0; 3 => 3.0; 4 => 4.0; 5 => 5.0 ] ]
actual |> shouldEqual expected

[<Test>]
let ``Dropping empty columns works on sample frame``() =
let emptyFrame =
frame [
"A", Series.ofValues [nan; nan; nan; nan]
"B", Series.ofValues [0.; 1.; 2.; nan]
"C", Series.ofValues [0.; 1.; nan; nan] ]
let actual = emptyFrame |> Frame.dropEmptyCols
let expected =
frame [ "B", Series.ofValues [0.; 1.; 2.; nan]
"C", Series.ofValues [0.; 1.; nan; nan] ]
actual |> shouldEqual expected

// ----------------------------------------------------------------------------------------------
// Obsolete Stream operations
// ----------------------------------------------------------------------------------------------
Expand Down