Skip to content

Commit

Permalink
improve speed of ArcTable.AddRows
Browse files Browse the repository at this point in the history
  • Loading branch information
HLWeil committed Jan 29, 2024
1 parent 3f0f41f commit ddb8238
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 6 deletions.
6 changes: 1 addition & 5 deletions src/ISA/ISA/ArcTypes/ArcTable.fs
Original file line number Diff line number Diff line change
Expand Up @@ -433,11 +433,7 @@ type ArcTable(name: string, headers: ResizeArray<CompositeHeader>, values: Syste
let column = CompositeColumn.create(h,[|row.[columnIndex]|])
SanityChecks.validateColumn column
// Sanity checks - end
rows
|> Array.iter (fun row ->
Unchecked.addRow index row this.Headers this.Values
index <- index + 1
)
Unchecked.addRows index rows this.Headers this.Values

static member addRows (rows: CompositeCell [] [], ?index: int) =
fun (table:ArcTable) ->
Expand Down
22 changes: 22 additions & 0 deletions src/ISA/ISA/ArcTypes/ArcTableAux.fs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,28 @@ module Unchecked =
)
()

let addRows (index:int) (newRows:CompositeCell [][]) (headers: ResizeArray<CompositeHeader>) (values:Dictionary<int*int,CompositeCell>) =
/// Store start rowCount here, so it does not get changed midway through
let rowCount = getRowCount values
let columnCount = getColumnCount headers
let increaseRowIndices =
// Only do this if column is inserted and not appended!
if index < rowCount then
/// Get last row index
let lastRowIndex = System.Math.Max(rowCount - 1, 0) // If there are no rows. We get negative last column index. In this case just return 0.
// start with last row index and go down to `index`
for rowIndex = lastRowIndex downto index do
for columnIndex in 0 .. (columnCount-1) do
moveCellTo(columnIndex,rowIndex,columnIndex,rowIndex+1) values
let mutable currentRowIndex = index
for row in newRows do
/// Then we can set the new row at `index`
let setNewCells =
row |> Array.iteri (fun columnIndex cell ->
setCellAt (columnIndex,currentRowIndex,cell) values
)
currentRowIndex <- currentRowIndex + 1

/// Functions for transforming base level ARC Table and ISA Json Objects
module JsonTypes =

Expand Down
13 changes: 13 additions & 0 deletions tests/ISA/ISA.Tests/ArcTable.Tests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -1988,6 +1988,19 @@ let private tests_AddRows =
else
Expect.equal table.Values.[columnIndex, rowIndex] newTable.Values.[columnIndex, rowIndex-newColumnCount] $"Cell {columnIndex},{rowIndex}"
)
testCase "performance" (fun () ->
let table = ArcTable("MyTable",ResizeArray [CompositeHeader.Input IOType.Sample;CompositeHeader.FreeText "Freetext1" ; CompositeHeader.FreeText "Freetext2"; CompositeHeader.Output IOType.Sample], System.Collections.Generic.Dictionary())
let rows =
Array.init 10000 (fun i ->
[|CompositeCell.FreeText $"Source_{i}"; CompositeCell.FreeText $"FT1_{i}"; CompositeCell.FreeText $"FT2_{i}"; CompositeCell.FreeText $"Sample_{i}"; |])
let stopwatch = Stopwatch()
stopwatch.Start()
table.AddRows(rows)
stopwatch.Stop()
let expectedTime = 100
let elapsed = stopwatch.Elapsed.Milliseconds
Expect.isTrue (elapsed < expectedTime) $"Elapsed time should be less than {expectedTime}ms, but was {elapsed}ms"
)
]

let private tests_UpdateRefWithSheet =
Expand Down
17 changes: 17 additions & 0 deletions tests/ISA/ISA.Tests/Library.fs
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,20 @@ module Test =


let testList = testList

open System
open Fable.Core

[<AttachMembers>]
type Stopwatch() =
member val StartTime: DateTime option = None with get, set
member val StopTime: DateTime option = None with get, set
member this.Start() = this.StartTime <- Some DateTime.Now
member this.Stop() =
match this.StartTime with
| Some _ -> this.StopTime <- Some DateTime.Now
| None -> failwith "Error. Unable to call `Stop` before `Start`."
member this.Elapsed : TimeSpan =
match this.StartTime, this.StopTime with
| Some start, Some stop -> stop - start
| _, _ -> failwith "Error. Unable to call `Elapsed` without calling `Start` and `Stop` before."
39 changes: 39 additions & 0 deletions tests/Speedtest/AddRows.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
module AddRows

open ARCtrl.ISA
open ArcTableAux


let addRowsOld (this:ArcTable) (rows:CompositeCell [] []) (index:int option) =

let mutable index = defaultArg index this.RowCount
// Sanity checks
SanityChecks.validateRowIndex index this.RowCount true
rows |> Array.iter (fun row -> SanityChecks.validateRowLength row this.ColumnCount)
for row in rows do
for columnIndex in 0 .. this.ColumnCount-1 do
let h = this.Headers.[columnIndex]
let column = CompositeColumn.create(h,[|row.[columnIndex]|])
SanityChecks.validateColumn column
rows
|> Array.iter (fun row ->
Unchecked.addRow index row this.Headers this.Values
index <- index + 1
)


let table() = ArcTable("MyTable",ResizeArray [CompositeHeader.Input IOType.Sample;CompositeHeader.FreeText "Freetext1" ; CompositeHeader.FreeText "Freetext2"; CompositeHeader.Output IOType.Sample], System.Collections.Generic.Dictionary())
let rows =
Array.init 10000 (fun i ->
[|CompositeCell.FreeText $"Source_{i}"; CompositeCell.FreeText $"FT1_{i}"; CompositeCell.FreeText $"FT2_{i}"; CompositeCell.FreeText $"Sample_{i}"; |])

let prepareTables() =
let t1 = table()
let t2 = table()
t1,t2

let oldF (t) =
addRowsOld t rows None
let newF (t:ArcTable) =
t.AddRows(rows)

64 changes: 64 additions & 0 deletions tests/Speedtest/FillMissing.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
module FillMissing

open ARCtrl.ISA
open ArcTableAux

open System.Collections.Generic

let fillMissingOld (headers: ResizeArray<CompositeHeader>) (values:Dictionary<int*int,CompositeCell>) =
let rowCount = getRowCount values
let columnCount = getColumnCount headers
let maxRows = rowCount
let lastColumnIndex = columnCount - 1
/// Get all keys, to map over relevant rows afterwards
let keys = values.Keys
// iterate over columns
for columnIndex in 0 .. lastColumnIndex do
/// Only get keys for the relevant column
let colKeys = keys |> Seq.filter (fun (c,_) -> c = columnIndex) |> Set.ofSeq
/// Create set of expected keys
let expectedKeys = Seq.init maxRows (fun i -> columnIndex,i) |> Set.ofSeq
/// Get the missing keys
let missingKeys = Set.difference expectedKeys colKeys
// if no missing keys, we are done and skip the rest, if not empty missing keys we ...
if missingKeys.IsEmpty |> not then
/// .. first check which empty filler `CompositeCells` we need.
///
/// We use header to decide between CompositeCell.Term/CompositeCell.Unitized and CompositeCell.FreeText
let relatedHeader = headers.[columnIndex]
/// We use the first cell in the column to decide between CompositeCell.Term and CompositeCell.Unitized
///
/// Not sure if we can add a better logic to infer if empty cells should be term or unitized ~Kevin F
let tryExistingCell = if colKeys.IsEmpty then None else Some values.[colKeys.MinimumElement]
let empty = Unchecked.getEmptyCellForHeader relatedHeader tryExistingCell
for missingColumn,missingRow in missingKeys do
Unchecked.setCellAt (missingColumn,missingRow,empty) values


let table() =

let name = "MyTable"
let headers = ResizeArray [CompositeHeader.Input IOType.Sample;CompositeHeader.FreeText "Freetext1" ; CompositeHeader.FreeText "Freetext2"; CompositeHeader.Output IOType.Sample]
let values = System.Collections.Generic.Dictionary()
for i = 0 to 10000 do
if i%2 = 0 then
Unchecked.setCellAt(0,i,(CompositeCell.FreeText $"Source_{i}")) values
Unchecked.setCellAt(2,i,(CompositeCell.FreeText $"FT2_{i}")) values
else

Unchecked.setCellAt(1,i,(CompositeCell.FreeText $"FT1_{i}")) values
Unchecked.setCellAt(3,i,(CompositeCell.FreeText $"Sample_{i}")) values

ArcTable(name, headers, values)

let prepareTables() =
let t1 = table()
let t2 = table()
t1,t2

let oldF (t:ArcTable) =
fillMissingOld t.Headers t.Values

let newF (t:ArcTable) =
Unchecked.fillMissingCells t.Headers t.Values

16 changes: 15 additions & 1 deletion tests/Speedtest/Program.fs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@


open ARCtrl
open ARCtrl.ISA

[<EntryPoint>]
let main argv =
Expand All @@ -14,5 +15,18 @@ let main argv =
|> LargeStudy.fromWorkbook
|> ignore
1
elif Array.contains "--addRows" argv then
let t1,t2 = AddRows.prepareTables()
AddRows.oldF t1
AddRows.newF t2
1
elif Array.contains "--fillMissing" argv then
let t1,t2 = FillMissing.prepareTables()
FillMissing.newF t2
FillMissing.oldF t1
FillMissing.oldF t1
FillMissing.newF t2
1

else
0
2 changes: 2 additions & 0 deletions tests/Speedtest/Speedtest.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
</PropertyGroup>

<ItemGroup>
<Compile Include="FillMissing.fs" />
<Compile Include="ManyStudies.fs" />
<Compile Include="LargeStudy.fs" />
<Compile Include="AddRows.fs" />
<Compile Include="Program.fs" />
</ItemGroup>

Expand Down

0 comments on commit ddb8238

Please sign in to comment.