-
Notifications
You must be signed in to change notification settings - Fork 49
/
transparent.rs
1284 lines (1164 loc) · 45.2 KB
/
transparent.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
use crate::{util::*, *};
use anyhow::Context as AContext;
use rusqlite::functions::Context;
use rusqlite::types::ToSqlOutput;
use rusqlite::types::Value;
use rusqlite::OptionalExtension;
use rusqlite::{named_params, params};
use std::collections::HashMap;
use std::time::{Duration, Instant};
// the output will be without magic header, without checksums, and without dictids. This will save 4 bytes when not using dictionaries and 8 bytes when using dictionaries.
// this also means the data will not be decodeable as a normal zstd archive with the standard tools
static COMPACT: bool = true;
#[derive(Debug)]
struct ColumnInfo {
name: String,
coltype: String,
is_primary_key: bool,
to_compress: bool,
is_dict_id: bool,
}
fn def_min_dict_size() -> i64 {
5000
}
fn def_dict_size_ratio() -> f32 {
0.01
}
fn def_train_dict_samples_ratio() -> f32 {
100.0
}
fn def_incremental_compression_step_bytes() -> i64 {
// https://github.com/facebook/zstd/blob/dev/doc/images/CSpeed2.png
// about 5MB/s at level 19
5_000_000 / 3
}
/// This is the configuration of the transparent compression for one column of one table.
/// It is safe to change every property of this configuration at any time except for table and column, but data that is already compressed will not be recompressed with the new settings.
/// You can update the config e.g. using SQL: `update _zstd_configs set config = json_patch(config, '{"target_db_load": 1}');`
///
/// Note that the configuration is assumed to be trusted. For example, dict_chooser can probably used for SQL injection.
#[derive(serde::Serialize, serde::Deserialize)]
pub struct TransparentCompressConfig {
/// the name of the table to which the transparent compression will be applied. It will be renamed to _tblname_zstd and replaced with a editable view.
pub table: String,
/// the name of the column
pub column: String,
/// The compression level. Valid levels are 1-19.
/// Compression will be significantly slower when the level is increased, but decompression speed should stay about the same regardless of compression level.
/// That means this is a tradeoff between zstd_incremental_maintenance vs SELECT performance.
pub compression_level: i8,
/// An SQL expression that chooses which dict to use or returns null if data should stay uncompressed for now
/// Examples:
///
/// * `'a'`
/// This will cause a single dictionary to be trained for everything.
///
/// * `strftime(created, '%Y-%m')`
/// This will cause every month of data to be compressed with its own dictionary.
///
/// * `nullif(strftime(created, '%Y-%m'), strftime('now', '%Y-%m'))`
///
/// The same as above, but if the entry is from the current month it will stay uncompressed.
/// This is handy because it means that the dictionary for the month will only be created when the month is over
/// and can thus be optimized the most for the given data
/// * `case when date(timestamp, ''weekday 0'') < date(''now'', ''weekday 0'') then data_type || ''.'' || date(timestamp, ''weekday 0'') else null end`
///
/// This one uses keys like data_type.2020-11-01` where the date is the first day of the week, except for the current week which stays uncompressed.
/// This means that every different data_type will be compressed separately and separately for each week.
///
/// You can return the special string `[nodict]` to compress the given data without a dictionary.
/// Note that the compression key is global for all tables. So if you want your dict to only apply to this table return
/// something like `"tablename." || strftime(...)`.
pub dict_chooser: String,
#[serde(default = "def_min_dict_size")]
/// if dictionary size would be smaller than this then no dict will be trained and if no dict exists the data will stay uncompressed
pub min_dict_size_bytes_for_training: i64,
#[serde(default = "def_dict_size_ratio")]
/// The target size of the dictionary based on seen data. For example,
/// For example if we see 10MB of data for a specific group, the dict will target a size of ratio * 10MB (default 0.01)
pub dict_size_ratio: f32,
/// for training, we find samples of this factor (default 100)
/// the default of 100 and 0.01 means that by default the dict will be trained on all of the data
#[serde(default = "def_train_dict_samples_ratio")]
pub train_dict_samples_ratio: f32,
/// how many bytes (approximately) to compress at once. By default tuned so at compression level 19 it locks the database for about 0.3s per step.
#[serde(default = "def_incremental_compression_step_bytes")]
pub incremental_compression_step_bytes: i64,
}
pub fn pretty_bytes(bytes: i64) -> String {
if bytes >= 1_000_000_000 {
format!("{:.2}GB", bytes as f64 / 1e9)
} else if bytes >= 1_000_000 {
format!("{:.2}MB", bytes as f64 / 1e6)
} else if bytes >= 1_000 {
format!("{:.2}kB", bytes as f64 / 1e3)
} else {
format!("{}B", bytes)
}
}
#[derive(Debug)]
enum SqliteAffinity {
Integer,
Text,
Blob,
Real,
Numeric,
}
/// determine affinity, algorithm described at https://www.sqlite.org/draft/datatype3.html#determination_of_column_affinity
fn get_column_affinity(declared_type: &str) -> SqliteAffinity {
use SqliteAffinity::*;
let typ = declared_type.to_ascii_lowercase();
if typ.contains("int") {
Integer
} else if typ.contains("char") || typ.contains("clob") || typ.contains("text") {
Text
} else if typ.contains("blob") || typ.is_empty() {
Blob
} else if typ.contains("real") || typ.contains("floa") || typ.contains("doub") {
Real
} else {
Numeric
}
}
fn show_warnings(db: &Connection) -> anyhow::Result<()> {
// warnings
let journal_mode: String = db
.query_row("pragma journal_mode;", params![], |r| r.get(0))
.context("querying journal mode")?;
if journal_mode != "wal" {
log::warn!("Warning: It is recommended to set `pragma journal_mode=WAL;`");
}
let vacuum_mode: i32 = db
.query_row("pragma auto_vacuum;", params![], |r| r.get(0))
.context("querying vacuum mode")?;
if vacuum_mode != 1 {
log::warn!("Warning: It is recommended to set `pragma auto_vacuum=full;`");
}
let busy_timeout: i32 = db
.query_row("pragma busy_timeout;", params![], |r| r.get(0))
.context("querying busy timeout")?;
if busy_timeout == 0 {
log::warn!("Warning: It is recommended to set `pragma busy_timeout=2000;` or higher");
}
Ok(())
}
///
/// enables transparent row-level compression for a table with the following steps:
///
/// 1. renames tablename to _tablename_zstd if table is not already enabled
/// 2. creates a view called tablename that mirrors _tablename_zstd except it decompresses the compressed column on the fly
/// 3. creates INSERT, UPDATE and DELETE triggers on the view so they affect the backing table instead
///
/// Warning: this function assumes trusted input, it is not sql injection safe!
pub fn zstd_enable_transparent<'a>(ctx: &Context) -> anyhow::Result<ToSqlOutput<'a>> {
let arg_config = 0;
let config_str: String = ctx.get(arg_config)?;
let config: TransparentCompressConfig = serde_json::from_str(&config_str)
.with_context(|| format!("parsing json config '{}'", config_str))?;
let db = &mut unsafe { ctx.get_connection()? };
let db = db
.unchecked_transaction()
.context("Could not start transaction")?;
let table_name = &config.table;
let new_table_name = format!("_{}_zstd", table_name);
let configs = get_configs(&db)?;
let already_compressed_columns = configs
.iter()
.filter(|c| &c.table == table_name)
.map(|c| &c.column[..])
.collect::<Vec<&str>>();
log::debug!(
"already compressed columns={:?}",
already_compressed_columns
);
if already_compressed_columns.contains(&&config.column[..]) {
anyhow::bail!(
"Column {} is already enabled for compression.",
&config.column
);
}
let table_already_enabled = !already_compressed_columns.is_empty();
let dict_id_columns: Vec<String> = if table_already_enabled {
let query = format!(
r#"select "from"
from pragma_foreign_key_list('{}')
where "table" = '_zstd_dicts'"#,
&new_table_name
);
log::debug!("dict_id_columns query {:?}", query);
db.prepare(&query)?
.query_map(params![], |row| row.get("from"))
.context("Could not get dicts ids info")?
.collect::<Result<Vec<String>, _>>()?
} else {
vec![]
};
log::debug!("dict_id columns={:?}", dict_id_columns);
if !check_table_exists(
&db,
if table_already_enabled {
&new_table_name
} else {
table_name
},
) {
anyhow::bail!("Table {} doesn't exist", table_name);
}
let columns_info: Vec<ColumnInfo> = db
.prepare(&format_sqlite!(
r#"pragma table_info({})"#,
if table_already_enabled {
&new_table_name
} else {
table_name
}
))?
.query_map(params![], |row| {
let col_name: String = row.get("name")?;
let to_compress = (col_name == config.column)
|| (already_compressed_columns.contains(&&col_name[..]));
let is_dict_id = dict_id_columns.contains(&col_name);
Ok(ColumnInfo {
name: col_name,
is_primary_key: row.get("pk")?,
coltype: row.get("type")?,
to_compress,
is_dict_id,
})
})
.context("Could not query table_info")?
.collect::<Result<_, rusqlite::Error>>()?;
show_warnings(&db)?;
// primary key columns. these will be used to index the table in the modifying triggers
let primary_key_columns: Vec<&ColumnInfo> =
columns_info.iter().filter(|e| e.is_primary_key).collect();
if columns_info.is_empty() {
anyhow::bail!("Table {} does not exist", table_name);
}
if primary_key_columns.is_empty() {
anyhow::bail!(
"Table {} does not have a primary key, sqlite-zstd only works on tables with primary keys, since rowids can change on VACUUM.",
table_name
);
}
let column_name = &config.column;
let to_compress_column = columns_info
.iter()
.find(|e| &e.name == column_name)
.with_context(|| format!("Column {} does not exist in {}", column_name, table_name))?;
if to_compress_column.is_primary_key {
anyhow::bail!("Can't compress column {} since it is part of primary key (this could probably be supported, but currently isn't)", column_name);
}
check_columns_to_compress_are_not_indexed(&db, &columns_info, table_name)?;
let dict_id_column_name = get_dict_id(&to_compress_column.name);
log::debug!("cols={:?}", columns_info);
{
let query = format!(
"select ({}) as dict_chooser from {} limit 1",
config.dict_chooser,
escape_sqlite_identifier(table_name)
);
// small sanity check of chooser statement
db.query_row(&query, params![], |row| row.get::<_, String>(0))
.optional()
.with_context(|| format!("Tried to execute:\n{}", query))
.context(r#"Dict chooser expression does not seem to be valid. Make sure you return a string and get your escaping right: If you want an sqlite string inside a json string inside a sqlite string you need to do '{"foo": "''bar''"}'"#)?;
}
{
// can't use prepared statement at these positions
if !table_already_enabled {
let rename_query =
format_sqlite!("alter table {} rename to {}", table_name, &new_table_name);
log::debug!("[run] {}", &rename_query);
db.execute(&rename_query, params![])
.context("Could not rename table")?;
}
util::ensure_dicts_table_exists(&db)?;
db.execute(
"
create table if not exists _zstd_configs (
id integer primary key autoincrement,
config json not null
);",
params![],
)
.context("Could not create _zstd_configs")?;
db.execute(
"insert into _zstd_configs (config) values (?)",
params![config_str],
)
.context("Could not insert config")?;
db.execute(
&format_sqlite!(
"alter table {} add column {} integer default null references _zstd_dicts(id)",
&new_table_name,
&dict_id_column_name
),
params![],
)
.context("Could not add dictid column")?;
// this index is needed since the maintenance function queries by the dictionary id to find rows that are not compressed
db.execute(
&format_sqlite!(
"create index {} on {} ({})",
&format!("{}_idx", &dict_id_column_name),
&new_table_name,
&dict_id_column_name
),
params![],
)
.context("Could not create index on dictid")?;
}
create_or_replace_view(
&db,
&columns_info,
table_name,
&new_table_name,
table_already_enabled,
)?;
create_insert_trigger(&db, &columns_info, table_name, &new_table_name, &config)?;
// a WHERE statement that selects a row based on the primary key
let primary_key_condition = primary_key_columns
.iter()
.map(|c| format_sqlite!("old.{0} = {0}", &c.name))
.collect::<Vec<String>>()
.join(" and ");
// add delete trigger
create_delete_trigger(&db, table_name, &new_table_name, &primary_key_condition)?;
// update trigger
create_update_triggers(
&db,
&columns_info,
table_name,
&new_table_name,
&primary_key_condition,
&config,
)?;
db.commit().context("Could not commit transaction")?;
Ok(ToSqlOutput::Owned(Value::Text("Done!".to_string())))
}
fn get_dict_id(column_name: &str) -> String {
format!("_{}_dict", column_name)
}
fn check_table_exists(db: &rusqlite::Connection, table_name: &str) -> bool {
let table_count: u32 = db
.query_row(
"select count(`type`) from sqlite_master where name = ? and type = 'table'",
params![table_name],
|r| r.get(0),
)
.unwrap_or(0);
table_count != 0
}
fn check_columns_to_compress_are_not_indexed(
db: &rusqlite::Connection,
columns_info: &[ColumnInfo],
table_name: &str,
) -> anyhow::Result<()> {
let indexed_columns: HashMap<String, String> = db
.prepare(
"
select distinct ii.name as column_name, il.name as index_name
from sqlite_master as m,
pragma_index_list(m.name) as il,
pragma_index_info(il.name) as ii
where m.type='table' AND m.name=?",
)?
.query_map(params![table_name], |row| {
Ok((row.get("column_name")?, row.get("index_name")?))
})
.context("could not get indices info")?
.collect::<Result<_, rusqlite::Error>>()?;
let indexed_columns_to_compress = columns_info
.iter()
.filter(|c| match indexed_columns.get(&c.name) {
Some(_) => c.to_compress,
None => false,
})
.collect::<Vec<&ColumnInfo>>();
if !indexed_columns_to_compress.is_empty() {
let columns_indices = indexed_columns_to_compress
.iter()
.map(|c| format!("{} ({})", c.name, indexed_columns.get(&c.name).unwrap()))
.collect::<Vec<String>>()
.join(", ");
anyhow::bail!("Can't compress column(s): {} - used as part of index (this could probably be supported, but currently isn't)", columns_indices);
};
Ok(())
}
fn create_or_replace_view(
db: &rusqlite::Connection,
columns_info: &[ColumnInfo],
table_name: &str,
internal_table_name: &str,
table_already_enabled: bool,
) -> anyhow::Result<()> {
if table_already_enabled {
// this drops the existing triggers as well
let dropview_query = format!(r#"drop view {}"#, escape_sqlite_identifier(table_name));
log::debug!("[run] {}", &dropview_query);
db.execute(&dropview_query, params![])
.context("Could not drop view")?;
}
// create view
let select_columns_escaped = columns_info
.iter()
.filter(|c| !c.is_dict_id )
.map(|c| {
if c.to_compress {
let affinity_is_text = match get_column_affinity(&c.coltype) {
SqliteAffinity::Blob => false,
SqliteAffinity::Text => true,
other => anyhow::bail!("the to-compress column has type {} which has affinity {:?}, but affinity must be text or blob. See https://www.sqlite.org/draft/datatype3.html#determination_of_column_affinity", c.coltype, other)
};
Ok(format!(
// prepared statement parameters not allowed in view
"zstd_decompress_col({}, {}, {}, {}) as {0}",
&escape_sqlite_identifier(&c.name),
if affinity_is_text { 1 } else { 0 },
&escape_sqlite_identifier(&get_dict_id(&c.name)),
COMPACT
))
} else {
Ok(format_sqlite!("{}", &c.name))
}
})
.collect::<Result<Vec<String>, _>>()
.context("could not construct select in view")?
.join(", ");
let createview_query = format!(
r#"
create view {} as
select {}
from {}
"#,
escape_sqlite_identifier(table_name),
select_columns_escaped,
escape_sqlite_identifier(internal_table_name)
);
log::debug!("[run] {}", &createview_query);
db.execute(&createview_query, params![])
.context("Could not create view")?;
Ok(())
}
fn create_insert_trigger(
db: &rusqlite::Connection,
columns_info: &[ColumnInfo],
table_name: &str,
internal_table_name: &str,
_config: &TransparentCompressConfig,
) -> anyhow::Result<()> {
let trigger_name = format!("{}_insert_trigger", table_name);
// expressions that map backing table columns to view columns
let mut insert_selection = vec![];
// names of the columns to be inserted
let mut columns_selection = vec![];
for c in columns_info {
if c.is_dict_id {
continue;
}
columns_selection.push(String::from(&c.name));
if c.to_compress {
let dict_id = get_dict_id(&c.name);
insert_selection.push(format!(
// prepared statement parameters not allowed in view
"new.{col} as {col}, null as {dictcol}",
col = escape_sqlite_identifier(&c.name),
dictcol = escape_sqlite_identifier(&dict_id)
));
columns_selection.push(String::from(&dict_id));
} else {
insert_selection.push(format_sqlite!("new.{}", &c.name));
}
}
let createtrigger_query = format!(
"
create trigger {}
instead of insert on {}
for each row
begin
insert into {}({}) select {};
end;
",
escape_sqlite_identifier(&trigger_name),
escape_sqlite_identifier(table_name),
escape_sqlite_identifier(internal_table_name),
columns_selection.join(", "),
insert_selection.join(",\n"),
);
log::debug!("[run] {}", &createtrigger_query);
db.execute(&createtrigger_query, params![])
.context("Could not create insert trigger")?;
Ok(())
}
fn create_delete_trigger(
db: &rusqlite::Connection,
table_name: &str,
internal_table_name: &str,
primary_key_condition: &str,
) -> anyhow::Result<()> {
let trigger_name = format!("{}_delete_trigger", table_name);
let deletetrigger_query = format!(
"
create trigger {trg_name}
instead of delete on {view}
for each row
begin
delete from {backing_table} where {primary_key_condition};
end;
",
trg_name = escape_sqlite_identifier(&trigger_name),
view = escape_sqlite_identifier(table_name),
backing_table = escape_sqlite_identifier(internal_table_name),
primary_key_condition = primary_key_condition
);
log::debug!("[run] {}", &deletetrigger_query);
db.execute(&deletetrigger_query, params![])
.context("could not create delete trigger")?;
Ok(())
}
fn create_update_triggers(
db: &rusqlite::Connection,
columns_info: &[ColumnInfo],
table_name: &str,
internal_table_name: &str,
primary_key_condition: &str,
_config: &TransparentCompressConfig,
) -> anyhow::Result<()> {
for col in columns_info {
if col.is_dict_id {
continue;
}
let trigger_name = format!("{}_update_{}_trigger", table_name, col.name);
let update = if col.to_compress {
format!(
"{col} = new.{col}, {dictcol} = null",
col = escape_sqlite_identifier(&col.name),
dictcol = escape_sqlite_identifier(&get_dict_id(&col.name)),
)
} else {
format_sqlite!("{} = new.{}", &col.name, &col.name)
};
// update triggers
let updatetrigger_query = format!(
"
create trigger {trg_name}
instead of update of {upd_col} on {view_name}
for each row
begin
update {backing_table} set {update} where {primary_key_condition};
end;
",
trg_name = escape_sqlite_identifier(&trigger_name),
view_name = escape_sqlite_identifier(table_name),
backing_table = escape_sqlite_identifier(internal_table_name),
upd_col = escape_sqlite_identifier(&col.name),
update = update,
primary_key_condition = primary_key_condition
);
log::debug!("[run] {}", &updatetrigger_query);
db.execute(&updatetrigger_query, params![])
.with_context(|| format!("Could not create update of {} trigger", col.name))?;
}
Ok(())
}
fn get_configs(db: &rusqlite::Connection) -> Result<Vec<TransparentCompressConfig>, anyhow::Error> {
// if the table `_zstd_configs` does not exist yet, transparent compression hasn't been used yet, so return an empty array
if !check_table_exists(db, "_zstd_configs") {
return Ok(vec![]);
}
let configs = db
.prepare("select config from _zstd_configs")?
.query_map(params![], |row| {
serde_json::from_str(row.get_ref_unwrap("config").as_str()?)
.context("parsing config")
.map_err(ah)
})
.context("Couldn't fetch configs")?
.collect::<Result<Vec<TransparentCompressConfig>, rusqlite::Error>>()?;
Ok(configs)
}
#[derive(Debug)]
struct TodoInfo {
dict_choice: Option<String>,
count: i64,
total_bytes: i64,
}
struct IncrementalMaintenanceArgs {
end_limit: Instant,
target_db_load: f32,
time_limit: f64,
}
pub fn zstd_incremental_maintenance<'a>(ctx: &Context) -> Result<ToSqlOutput<'a>, anyhow::Error> {
let args = {
let arg_time_limit_seconds = 0;
let arg_target_db_load = 1;
let time_limit: Option<f64> = ctx
.get(arg_time_limit_seconds)
.context("could not get time limit argument")?;
let time_limit = time_limit.unwrap_or(100000000.0);
let target_db_load: f32 = ctx
.get(arg_target_db_load)
.context("could not get target db load argument")?;
if !(0.0..=1e100).contains(&time_limit) {
anyhow::bail!("time too large");
}
let end_limit = Instant::now() + Duration::from_secs_f64(time_limit);
IncrementalMaintenanceArgs {
end_limit,
target_db_load,
time_limit,
}
};
let db = unsafe { ctx.get_connection()? };
show_warnings(&db)?;
let configs = get_configs(&db)?;
for config in configs {
match maintenance_for_config(&db, config, &args)? {
MaintRet::TimeLimitReached => {
log::info!(
"time limit of {:.1}s reached, stopping with more maintenance work pending",
args.time_limit
);
return Ok(1.into());
}
MaintRet::Completed => {}
}
}
log::info!("All maintenance work completed!");
Ok(0.into())
}
enum MaintRet {
TimeLimitReached,
Completed,
}
struct EscapedNames {
compressed_tablename: String,
data_colname: String,
dict_colname: String,
}
impl From<&TransparentCompressConfig> for EscapedNames {
fn from(config: &TransparentCompressConfig) -> EscapedNames {
EscapedNames {
compressed_tablename: escape_sqlite_identifier(&format!("_{}_zstd", config.table)),
data_colname: escape_sqlite_identifier(&config.column),
dict_colname: escape_sqlite_identifier(&format!("_{}_dict", config.column)),
}
}
}
fn maintenance_for_config(
db: &Connection,
config: TransparentCompressConfig,
args: &IncrementalMaintenanceArgs,
) -> anyhow::Result<MaintRet> {
let esc_names = EscapedNames::from(&config);
let todos = db
.prepare(&format!(
"select
({chooser}) as dict_choice,
count(*) as count,
sum(length({datacol})) as total_bytes
from {tbl} where {dictcol} is null group by dict_choice",
tbl = esc_names.compressed_tablename,
dictcol = esc_names.dict_colname,
datacol = esc_names.data_colname,
chooser = config.dict_chooser
))?
.query_map(params![], |row| {
Ok(TodoInfo {
dict_choice: row.get("dict_choice")?,
count: row.get("count")?,
total_bytes: row.get("total_bytes")?,
})
})?
.collect::<Result<Vec<_>, _>>()?;
let total_bytes_to_compress: i64 = todos
.iter()
.filter(|e| e.dict_choice.is_some())
.map(|e| e.total_bytes)
.sum();
let mut rows_compressed_so_far: i64 = 0;
let mut bytes_compressed_so_far: i64 = 0;
let total_rows_to_compress: i64 = todos
.iter()
.filter(|e| e.dict_choice.is_some())
.map(|e| e.count)
.sum();
log::info!(
"{}.{}: Total {} rows ({}) to potentially compress (split in {} groups).",
config.table,
config.column,
total_rows_to_compress,
pretty_bytes(total_bytes_to_compress),
todos.len()
);
for todo in todos.into_iter() {
let rows_handled = maintenance_for_todo(db, &config, &todo, &esc_names, args)?;
rows_compressed_so_far += rows_handled;
// estimate bytes compressed
bytes_compressed_so_far +=
((rows_handled as f64 / todo.count as f64) * todo.total_bytes as f64) as i64;
if rows_handled > 0 {
log::info!(
"Handled {} / {} rows ({} / {})",
rows_compressed_so_far,
total_rows_to_compress,
pretty_bytes(bytes_compressed_so_far),
pretty_bytes(total_bytes_to_compress)
);
}
if Instant::now() > args.end_limit {
return Ok(MaintRet::TimeLimitReached);
}
}
Ok(MaintRet::Completed)
}
fn maintenance_for_todo(
db: &Connection,
config: &TransparentCompressConfig,
todo: &TodoInfo,
esc_names: &EscapedNames,
args: &IncrementalMaintenanceArgs,
) -> anyhow::Result<i64> {
let avg_sample_bytes = todo.total_bytes / todo.count;
let dict_choice = todo.dict_choice.as_deref().unwrap_or("[null]");
log::debug!(
"looking at group={}, has {} rows with {} average size ({} total)",
dict_choice,
todo.count,
pretty_bytes(avg_sample_bytes),
pretty_bytes(todo.total_bytes)
);
let (dict_id, dict_is_new) =
match get_or_train_dict(db, config, todo, esc_names).context("getting dict")? {
TrainDictReturn::Skip => return Ok(0),
TrainDictReturn::Done {
dict_id,
dict_is_new,
} => (dict_id, dict_is_new),
};
let mut total_updated: i64 = 0;
let mut chunk_size = config.incremental_compression_step_bytes / avg_sample_bytes;
if chunk_size < 1 {
chunk_size = 1;
}
log::debug!(
"Compressing {} samples with key {} and level {}, chunksize {}",
todo.count,
dict_choice,
config.compression_level,
chunk_size
);
loop {
let update_start = Instant::now();
let q = &format!(
"update {tbl} set {datacol} = zstd_compress_col({datacol}, :lvl, :dict, :compact), {dictcol} = :dict where rowid in (select rowid from {tbl} where {dictcol} is null and :dictchoice = ({chooser}) limit :chunksize)",
tbl = esc_names.compressed_tablename,
datacol = esc_names.data_colname,
dictcol = esc_names.dict_colname,
chooser = config.dict_chooser
);
log::trace!("executing {}", q);
let updated = db
.execute(
q,
named_params! {
":lvl": config.compression_level,
":dict": dict_id,
":dictchoice": &dict_choice,
":chunksize": chunk_size,
":compact": COMPACT
},
)
.with_context(|| format!("while compressing chunk for key {}", dict_choice))?;
total_updated += updated as i64;
log::debug!("Compressed {} / {}", total_updated, todo.count);
if Instant::now() > args.end_limit {
break;
}
let elapsed = update_start.elapsed();
if elapsed.div_f32(args.target_db_load) > elapsed {
let sleep_duration = elapsed.div_f32(args.target_db_load) - elapsed;
if sleep_duration > Duration::from_millis(1) {
log::debug!(
"Sleeping {}s to keep write load at {}",
sleep_duration.as_secs_f32(),
args.target_db_load
);
std::thread::sleep(sleep_duration);
}
}
if updated == 0 {
break;
}
}
let (total_size_after, total_count_after): (i64, i64) = db.query_row(
&format!(
"select sum(length({datacol})), count(*) from {tbl} where {dictcol} = ?",
tbl = esc_names.compressed_tablename,
datacol = esc_names.data_colname,
dictcol = esc_names.dict_colname
),
params![dict_id],
|row| Ok((row.get(0)?, row.get(1)?)),
)?;
if dict_is_new {
log::info!(
"Compressed {} rows with dict_choice={} (dict_id={}). Total size of entries before: {}, afterwards: {}, (average: before={}, after={})",
total_updated,
dict_choice,
dict_id,
pretty_bytes(todo.total_bytes),
pretty_bytes(total_size_after),
pretty_bytes(avg_sample_bytes),
pretty_bytes(total_size_after / total_count_after),
);
}
Ok(total_updated)
}
enum TrainDictReturn {
Skip,
Done { dict_id: i32, dict_is_new: bool },
}
fn get_or_train_dict(
db: &Connection,
config: &TransparentCompressConfig,
todo: &TodoInfo,
esc_names: &EscapedNames,
) -> anyhow::Result<TrainDictReturn> {
let dict_choice = match &todo.dict_choice {
None => {
log::debug!("Skipping group, no dict chosen");
return Ok(TrainDictReturn::Skip);
}
Some(e) => e,
};
if dict_choice == "[nodict]" {
return Ok(TrainDictReturn::Done {
dict_id: -1,
dict_is_new: false,
});
}
let avg_sample_bytes = todo.total_bytes / todo.count;
let dict_id: Option<i32> = db
.query_row(
"select id from _zstd_dicts where chooser_key = ?",
params![dict_choice],
|row| row.get("id"),
)
.optional()?;
Ok(match dict_id {
Some(dict_id) => {
log::debug!(
"Found existing dictionary id={} for key={}",
dict_id,
dict_choice
);
TrainDictReturn::Done {
dict_id,
dict_is_new: false,
}
}
None => {
let dict_target_size = (todo.total_bytes as f32 * config.dict_size_ratio) as i64;
if dict_target_size < config.min_dict_size_bytes_for_training {
log::debug!(
"Dictionary for group '{}' would be smaller than minimum ({} * {:.3} = {} < {}), ignoring",
dict_choice,
pretty_bytes(todo.total_bytes),
config.dict_size_ratio,
pretty_bytes(dict_target_size),
pretty_bytes(config.min_dict_size_bytes_for_training)
);
return Ok(TrainDictReturn::Skip);
}
let target_samples = (dict_target_size as f32 * config.train_dict_samples_ratio
/ avg_sample_bytes as f32) as i64; // use roughly 100x the size of the dictionary as data
log::debug!(
"Training dict for key {} of max size {}",
dict_choice,
pretty_bytes(dict_target_size)
);
let dict_id = db.query_row(&format!(
"select zstd_train_dict_and_save({datacol}, ?, ?, ?) as dictid from {tbl} where {dictcol} is null and ? = ({chooser})",
datacol=esc_names.data_colname,
tbl=esc_names.compressed_tablename,
dictcol=esc_names.dict_colname,
chooser=config.dict_chooser
), params![dict_target_size, target_samples, dict_choice, dict_choice], |row| row.get("dictid"))?;
TrainDictReturn::Done {
dict_id,
dict_is_new: true,
}
}
})
}
#[cfg(test)]
mod tests {
use super::add_functions::tests::create_example_db;
use super::*;
use pretty_assertions::assert_eq;
use rand::prelude::SliceRandom;
use rusqlite::params;
use rusqlite::{Connection, Row};
fn row_to_thong(r: &Row) -> anyhow::Result<Vec<Value>> {
Ok((0..r.column_count())
.map(|i| r.get_ref(i).map(|e| e.into()))
.collect::<Result<_, _>>()?)
}
fn get_whole_table(db: &Connection, tbl_name: &str) -> anyhow::Result<Vec<Vec<Value>>> {
let mut stmt = db.prepare(&format!("select * from {} ORDER BY id", tbl_name))?;
let q1: Vec<Vec<Value>> = stmt
.query_map(params![], |e| row_to_thong(e).map_err(ah))?
.collect::<Result<_, rusqlite::Error>>()?;
Ok(q1)
}
fn check_table_rows_same(db1: &Connection, db2: &Connection) -> anyhow::Result<()> {
let tbl1 = get_whole_table(db1, "events").context("Could not get whole table db 1")?;
let tbl2 = get_whole_table(db2, "events").context("Could not get whole table db 2")?;
assert_eq!(tbl1, tbl2);
Ok(())
}
#[test]
fn sanity() -> anyhow::Result<()> {
let db1 = create_example_db(Some(123), 100)?;
let db2 = create_example_db(Some(123), 100)?;