This repository has been archived by the owner on Jan 2, 2025. It is now read-only.
forked from datto/dattobd
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathelastio-snap.c
6748 lines (5496 loc) · 186 KB
/
elastio-snap.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2015 Datto Inc.
* Additional contributions by Elastio Software, Inc are Copyright (C) 2020 Elastio Software Inc.
*/
#include "includes.h"
#include "kernel-config.h"
#include "elastio-snap.h"
//current lowest supported kernel = 3.10.0
//basic information
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Tom Caputi");
MODULE_DESCRIPTION("Kernel module for supporting block device snapshots and incremental backups.");
MODULE_VERSION(ELASTIO_SNAP_VERSION);
//printing macros
#define LOG_DEBUG(fmt, args...) \
do{ \
if(elastio_snap_debug) printk(KERN_DEBUG "elastio-snap: " fmt "\n", ## args); \
}while(0)
#define LOG_WARN(fmt, args...) printk(KERN_WARNING "elastio-snap: " fmt "\n", ## args)
#define LOG_ERROR(error, fmt, args...) printk(KERN_ERR "elastio-snap: " fmt ": %d\n", ## args, error)
#define PRINT_BIO(text, bio) LOG_DEBUG(text ": sect = %llu size = %u", (unsigned long long)bio_sector(bio), bio_size(bio) / 512)
/*********************************REDEFINED FUNCTIONS*******************************/
#include <linux/delay.h>
#include <linux/fiemap.h>
#ifdef HAVE_UUID_H
#include <linux/uuid.h>
#endif
#ifdef HAVE_UAPI_MOUNT_H
#include <uapi/linux/mount.h>
#endif
#if defined HAVE_BLK_MQ_MAKE_REQUEST || defined HAVE_BLK_MQ_SUBMIT_BIO
#include <linux/blk-mq.h>
#include <linux/percpu-refcount.h>
#endif
#ifndef HAVE_BIO_LIST
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
struct bio_list {
struct bio *head;
struct bio *tail;
};
#define BIO_EMPTY_LIST { NULL, NULL }
#define bio_list_for_each(bio, bl) for((bio) = (bl)->head; (bio); (bio) = (bio)->bi_next)
static inline int bio_list_empty(const struct bio_list *bl){
return bl->head == NULL;
}
static inline void bio_list_init(struct bio_list *bl){
bl->head = bl->tail = NULL;
}
static inline void bio_list_add(struct bio_list *bl, struct bio *bio){
bio->bi_next = NULL;
if (bl->tail) bl->tail->bi_next = bio;
else bl->head = bio;
bl->tail = bio;
}
static inline struct bio *bio_list_pop(struct bio_list *bl){
struct bio *bio = bl->head;
if (bio) {
bl->head = bl->head->bi_next;
if (!bl->head) bl->tail = NULL;
bio->bi_next = NULL;
}
return bio;
}
#endif
#ifndef HAVE_D_UNLINKED
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
static inline int d_unlinked(struct dentry *dentry){
return d_unhashed(dentry) && !IS_ROOT(dentry);
}
#endif
#ifndef HAVE_NOOP_LLSEEK
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
static loff_t noop_llseek(struct file *file, loff_t offset, int origin){
return file->f_pos;
}
#endif
#ifndef HAVE_STRUCT_PATH
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
struct path {
struct vfsmount *mnt;
struct dentry *dentry;
};
#define elastio_snap_get_dentry(f) (f)->f_dentry
#define elastio_snap_get_mnt(f) (f)->f_vfsmnt
#else
#define elastio_snap_get_dentry(f) (f)->f_path.dentry
#define elastio_snap_get_mnt(f) (f)->f_path.mnt
#endif
#ifdef HAVE_BDEVNAME
#define elastio_snap_bdevname(dev, buf) bdevname(dev, buf)
#else
#define elastio_snap_bdevname(dev, buf) snprintf(buf, sizeof(buf), "%pg", dev);
#endif
#ifndef HAVE_PATH_PUT
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
void path_put(const struct path *path) {
dput(path->dentry);
mntput(path->mnt);
}
#define elastio_snap_d_path(path, page_buf, page_size) d_path((path)->dentry, (path)->mnt, page_buf, page_size)
#define elastio_snap_get_nd_dentry(nd) (nd).dentry
#define elastio_snap_get_nd_mnt(nd) (nd).mnt
#else
#define elastio_snap_d_path(path, page_buf, page_size) d_path(path, page_buf, page_size)
#define elastio_snap_get_nd_dentry(nd) (nd).path.dentry
#define elastio_snap_get_nd_mnt(nd) (nd).path.mnt
#endif
#ifndef HAVE_FMODE_T
typedef mode_t fmode_t;
#endif
#ifndef HAVE_BLK_ALLOC_QUEUE_MK_REQ_FN_NODE_ID
struct request_queue* (*elastio_blk_alloc_queue)(int node_id) = (BLK_ALLOC_QUEUE_ADDR != 0) ?
(struct request_queue* (*)(int node_id)) (BLK_ALLOC_QUEUE_ADDR + (long long)(((void *)kfree) - (void *)KFREE_ADDR)) : NULL;
#endif
struct super_block* (*elastio_snap_get_super)(struct block_device *) = (GET_SUPER_ADDR != 0) ?
(struct super_block* (*)(struct block_device*)) (GET_SUPER_ADDR + (long long)(((void *)kfree) - (void *)KFREE_ADDR)) : NULL;
#ifndef HAVE_BLKDEV_GET_BY_PATH
struct block_device *elastio_snap_lookup_bdev(const char *pathname, fmode_t mode) {
int r;
struct block_device *retbd;
struct nameidata nd;
struct inode *inode;
dev_t dev;
if ((r = path_lookup(pathname, LOOKUP_FOLLOW, &nd)))
goto fail;
inode = elastio_snap_get_nd_dentry(nd)->d_inode;
if (!inode) {
r = -ENOENT;
goto fail;
}
if (!S_ISBLK(inode->i_mode)) {
r = -ENOTBLK;
goto fail;
}
dev = inode->i_rdev;
retbd = open_by_devnum(dev, mode);
out:
#ifdef HAVE_PATH_PUT
path_put(&nd.path);
#else
dput(nd.dentry);
mntput(nd.mnt);
#endif
return retbd;
fail:
retbd = ERR_PTR(r);
goto out;
}
#endif
#ifndef HAVE_BLKDEV_GET_BY_PATH
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
static struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder){
struct block_device *bdev;
bdev = elastio_snap_lookup_bdev(path, mode);
if(IS_ERR(bdev))
return bdev;
if((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
#ifdef HAVE_BLKDEV_PUT_1
blkdev_put(bdev);
#else
blkdev_put(bdev, mode);
#endif
return ERR_PTR(-EACCES);
}
return bdev;
}
#endif
#ifndef READ_SYNC
#define READ_SYNC 0
#endif
#ifndef REQ_WRITE
#define REQ_WRITE WRITE
#endif
#ifndef REQ_FLUSH
#define REQ_FLUSH (1 << BIO_RW_BARRIER)
#endif
//if these don't exist they are not supported
#ifndef REQ_SECURE
#define REQ_SECURE 0
#endif
#ifndef REQ_WRITE_SAME
#define REQ_WRITE_SAME 0
#endif
#ifndef HAVE_SUBMIT_BIO_1
//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
#ifndef REQ_DISCARD
#define REQ_DISCARD 0
#endif
#ifndef HAVE_ENUM_REQ_OP
typedef enum req_op {
REQ_OP_READ,
REQ_OP_WRITE,
REQ_OP_DISCARD, /* request to discard sectors */
REQ_OP_SECURE_ERASE, /* request to securely erase sectors */
REQ_OP_WRITE_SAME, /* write same block many times */
REQ_OP_FLUSH, /* request for cache flush */
} req_op_t;
static inline void elastio_snap_set_bio_ops(struct bio *bio, req_op_t op, unsigned op_flags){
bio->bi_rw = 0;
switch(op){
case REQ_OP_READ:
break;
case REQ_OP_WRITE:
bio->bi_rw |= REQ_WRITE;
break;
case REQ_OP_DISCARD:
bio->bi_rw |= REQ_DISCARD;
break;
case REQ_OP_SECURE_ERASE:
bio->bi_rw |= REQ_DISCARD | REQ_SECURE;
break;
case REQ_OP_WRITE_SAME:
bio->bi_rw |= REQ_WRITE_SAME;
break;
case REQ_OP_FLUSH:
bio->bi_rw |= REQ_FLUSH;
break;
}
bio->bi_rw |= op_flags;
}
#else
typedef enum req_op req_op_t;
#define elastio_snap_set_bio_ops(bio, op, flags) bio_set_op_attrs(bio, op, flags)
#endif
#define bio_is_discard(bio) ((bio)->bi_rw & REQ_DISCARD)
#define elastio_snap_submit_bio(bio) submit_bio(0, bio)
#define elastio_snap_submit_bio_wait(bio) submit_bio_wait(0, bio)
static inline int elastio_snap_bio_op_flagged(struct bio *bio, unsigned int flag){
return bio->bi_rw & flag;
}
static inline void elastio_snap_bio_op_set_flag(struct bio *bio, unsigned int flag){
bio->bi_rw |= flag;
}
static inline void elastio_snap_bio_op_clear_flag(struct bio *bio, unsigned int flag){
bio->bi_rw &= ~flag;
}
#else
#ifndef HAVE_ENUM_REQ_OPF
//#if LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
typedef enum req_op req_op_t;
#else
typedef enum req_opf req_op_t;
#endif
static inline void elastio_snap_set_bio_ops(struct bio *bio, req_op_t op, unsigned op_flags){
#ifdef HAVE_BIO_SET_OP_ATTRS
bio->bi_opf = 0;
bio_set_op_attrs(bio, op, op_flags);
#else
bio->bi_opf = op | op_flags;
#endif
}
static inline int elastio_snap_bio_op_flagged(struct bio *bio, unsigned int flag){
return bio->bi_opf & flag;
}
static inline void elastio_snap_bio_op_set_flag(struct bio *bio, unsigned int flag){
bio->bi_opf |= flag;
}
static inline void elastio_snap_bio_op_clear_flag(struct bio *bio, unsigned int flag){
bio->bi_opf &= ~flag;
}
#ifdef REQ_DISCARD
#define bio_is_discard(bio) ((bio)->bi_opf & REQ_DISCARD)
#else
#define bio_is_discard(bio) (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
#endif
#define elastio_snap_submit_bio(bio) submit_bio(bio)
#define elastio_snap_submit_bio_wait(bio) submit_bio_wait(bio)
#endif
#if !defined HAVE_SUBMIT_BIO_WAIT && !defined HAVE_SUBMIT_BIO_1
//#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
struct submit_bio_ret{
struct completion event;
int error;
};
static void __submit_bio_wait_endio(struct bio *bio, int error){
struct submit_bio_ret *ret = bio->bi_private;
ret->error = error;
complete(&ret->event);
}
#ifdef HAVE_BIO_ENDIO_INT
static int submit_bio_wait_endio(struct bio *bio, unsigned int bytes, int error){
if (bio->bi_size) return 1;
__submit_bio_wait_endio(bio, error);
return 0;
}
#else
static void submit_bio_wait_endio(struct bio *bio, int error){
__submit_bio_wait_endio(bio, error);
}
#endif
static int submit_bio_wait(int rw, struct bio *bio){
struct submit_bio_ret ret;
//kernel implementation has the line below, but all our calls will have this already and it changes across kernel versions
//rw |= REQ_SYNC;
init_completion(&ret.event);
bio->bi_private = &ret;
bio->bi_end_io = submit_bio_wait_endio;
submit_bio(rw, bio);
wait_for_completion(&ret.event);
return ret.error;
}
#endif
#ifdef HAVE_BIO_ENDIO_INT
static void elastio_snap_bio_endio(struct bio *bio, int err){
bio_endio(bio, bio->bi_size, err);
}
#elif !defined HAVE_BIO_ENDIO_1
static void elastio_snap_bio_endio(struct bio *bio, int err){
bio_endio(bio, err);
}
#elif defined HAVE_BLK_STATUS_T
static void elastio_snap_bio_endio(struct bio *bio, int err){
bio->bi_status = errno_to_blk_status(err);
bio_endio(bio);
}
#else
static void elastio_snap_bio_endio(struct bio *bio, int err){
bio->bi_error = err;
bio_endio(bio);
}
#endif
//the kernel changed the usage of bio_for_each_segment in 3.14. Do not use any fields directly or you will lose compatibility.
#ifndef HAVE_BVEC_ITER
//#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0)
typedef int bio_iter_t;
typedef struct bio_vec *bio_iter_bvec_t;
#define bio_iter_len(bio, iter) ((bio)->bi_io_vec[(iter)].bv_len)
#define bio_iter_offset(bio, iter) ((bio)->bi_io_vec[(iter)].bv_offset)
#define bio_iter_page(bio, iter) ((bio)->bi_io_vec[(iter)].bv_page)
#define bio_iter_idx(iter) (iter)
#define bio_sector(bio) (bio)->bi_sector
#define bio_size(bio) (bio)->bi_size
#define bio_idx(bio) (bio)->bi_idx
#else
typedef struct bvec_iter bio_iter_t;
typedef struct bio_vec bio_iter_bvec_t;
#define bio_iter_idx(iter) ((iter).bi_idx)
#define bio_sector(bio) (bio)->bi_iter.bi_sector
#define bio_size(bio) (bio)->bi_iter.bi_size
#define bio_idx(bio) (bio)->bi_iter.bi_idx
#endif
#ifndef HAVE_MNT_WANT_WRITE
#define mnt_want_write(x) 0
#define mnt_drop_write (void)sizeof
#endif
#ifndef UMOUNT_NOFOLLOW
#define UMOUNT_NOFOLLOW 0
#endif
#if !defined(HAVE_BDEV_STACK_LIMITS)
//#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, sector_t start){
struct request_queue *bq = bdev_get_queue(bdev);
start += get_start_sect(bdev);
return blk_stack_limits(t, &bq->limits, start << 9);
}
#define elastio_snap_bdev_stack_limits(queue, bdev, start) bdev_stack_limits(&(queue)->limits, bdev, start)
#else
#define elastio_snap_bdev_stack_limits(queue, bdev, start) bdev_stack_limits(&(queue)->limits, bdev, start)
#endif
#ifndef HAVE_KERN_PATH
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
static int kern_path(const char *name, unsigned int flags, struct path *path){
struct nameidata nd;
int ret = path_lookup(name, flags, &nd);
if(!ret){
path->dentry = elastio_snap_get_nd_dentry(nd);
path->mnt = elastio_snap_get_nd_mnt(nd);
}
return ret;
}
#endif
#ifdef HAVE_BIOSET_NEED_BVECS_FLAG
#define elastio_snap_bioset_create(bio_size, bvec_size, scale) bioset_create(bio_size, bvec_size, BIOSET_NEED_BVECS)
#elif defined HAVE_BIOSET_CREATE_3
#define elastio_snap_bioset_create(bio_size, bvec_size, scale) bioset_create(bio_size, bvec_size, scale)
#else
#define elastio_snap_bioset_create(bio_size, bvec_size, scale) bioset_create(bio_size, scale)
#endif
#ifndef HAVE_USER_PATH_AT
int user_path_at(int dfd, const char __user *name, unsigned flags, struct path *path) {
struct nameidata nd;
char *tmp = getname(name);
int err = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
BUG_ON(flags & LOOKUP_PARENT);
err = path_lookup(tmp, flags, &nd);
putname(tmp);
if (!err) {
path->dentry = elastio_snap_get_nd_dentry(nd);
path->mnt = elastio_snap_get_nd_mnt(nd);
}
}
return err;
}
#endif
static int elastio_snap_should_remove_suid(struct dentry *dentry)
{
mode_t mode = dentry->d_inode->i_mode;
int kill = 0;
/* suid always must be killed */
if (unlikely(mode & S_ISUID))
kill = ATTR_KILL_SUID;
/*
* sgid without any exec bits is just a mandatory locking mark; leave
* it alone. If some exec bits are set, it's a real sgid; kill it.
*/
if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
kill |= ATTR_KILL_SGID;
if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
return kill;
return 0;
}
#ifdef HAVE_BIO_BI_BDEV_BD_DISK
#define elastio_snap_bio_bi_disk(bio) ((bio)->bi_bdev->bd_disk)
#else
//#if LINUX_VERSION_CODE < KERNEL_VERSION(5,12,0)
#define elastio_snap_bio_bi_disk(bio) ((bio)->bi_disk)
#endif
#ifdef HAVE_BLKDEV_PUT_1
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
#define elastio_snap_blkdev_put(bdev) blkdev_put(bdev);
#else
#define elastio_snap_blkdev_put(bdev) blkdev_put(bdev, FMODE_READ);
#endif
#ifdef HAVE_BDEV_NR_SECTORS
#define elastio_snap_bdev_size(bdev) bdev_nr_sectors(bdev)
#elif defined HAVE_PART_NR_SECTS_READ
//#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
#define elastio_snap_bdev_size(bdev) part_nr_sects_read((bdev)->bd_part)
#else
//#if LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0)
#define elastio_snap_bdev_size(bdev) ((bdev)->bd_part->nr_sects)
#endif
#ifndef HAVE_BDEV_IS_PARTITION
//#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
#define elastio_snap_bdev_is_partition(bdev) (bdev->bd_contains != bdev)
#else
#define elastio_snap_bdev_is_partition(bdev) bdev_is_partition(bdev)
#endif
#ifndef HAVE_VZALLOC
#define vzalloc(size) __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL)
#endif
#if !defined HAVE_MAKE_REQUEST_FN_IN_QUEUE && defined HAVE_BDOPS_SUBMIT_BIO_UINT
// Linux kernel version 5.9 - 5.15
// make_request_fn has been moved from the request queue structure to the
// block_device_operations as submit_bio function with UINT return type.
// See https://github.com/torvalds/linux/commit/c62b37d96b6eb3ec5ae4cbe00db107bf15aebc93
#define USE_BDOPS_SUBMIT_BIO
// Prototype bdev->fops->submit_bio but with the name already used in the code
typedef blk_qc_t (make_request_fn) (struct bio *bio);
#endif
#if !defined HAVE_MAKE_REQUEST_FN_IN_QUEUE && defined HAVE_BDOPS_SUBMIT_BIO
// Linux kernel version 5.16+
// submit_bio function in the block_device_operations structure has changed its return type to VOID.
// See https://github.com/torvalds/linux/commit/3e08773c3841e9db7a520908cc2b136a77d275ff#diff79b436371fdb3ddf0e7ad9bd4c9afe05160f7953438e650a77519b882904c56bR1181
#define USE_BDOPS_SUBMIT_BIO
// Prototype bdev->fops->submit_bio but with the name already used in the code
typedef void (make_request_fn) (struct bio *bio);
#endif
#ifndef USE_BDOPS_SUBMIT_BIO
static inline make_request_fn* elastio_snap_get_bd_mrf(struct block_device *bdev){
return bdev->bd_disk->queue->make_request_fn;
}
static inline void elastio_snap_set_bd_mrf(struct block_device *bdev, make_request_fn *mrf){
bdev->bd_disk->queue->make_request_fn = mrf;
}
#else
static inline struct block_device_operations* elastio_snap_get_bd_ops(struct block_device *bdev){
return (struct block_device_operations*)bdev->bd_disk->fops;
}
static inline void elastio_snap_set_bd_ops(struct block_device *bdev, const struct block_device_operations *bd_ops){
bdev->bd_disk->fops = bd_ops;
}
static inline make_request_fn* elastio_snap_get_bd_mrf(struct block_device *bdev){
return bdev->bd_disk->fops->submit_bio;
}
#endif
static inline struct request_queue *elastio_snap_bio_get_queue(struct bio *bio);
#ifdef HAVE_MAKE_REQUEST_FN_INT
#define MRF_RETURN_TYPE int
#define MRF_RETURN(ret) return ret
static inline int __elastio_snap_call_mrf(make_request_fn *fn, struct request_queue *q, struct bio *bio){
return fn(q, bio);
}
static inline int elastio_snap_call_mrf(make_request_fn *fn, struct bio *bio){
return __elastio_snap_call_mrf(fn, elastio_snap_bio_get_queue(bio), bio);
}
#elif defined HAVE_MAKE_REQUEST_FN_VOID
#define MRF_RETURN_TYPE void
#define MRF_RETURN(ret) return
#define MRF_RETURN_TYPE_VOID
static inline int __elastio_snap_call_mrf(make_request_fn *fn, struct request_queue *q, struct bio *bio){
fn(q, bio);
return 0;
}
static inline int elastio_snap_call_mrf(make_request_fn *fn, struct bio *bio){
__elastio_snap_call_mrf(fn, elastio_snap_bio_get_queue(bio), bio);
return 0;
}
#else
#ifdef HAVE_BDOPS_SUBMIT_BIO
// Linux kernel version 5.16+
#define MRF_RETURN_TYPE void
#define MRF_RETURN(ret) return
#define MRF_RETURN_TYPE_VOID
#else
// Linux kernel version 5.9 - 5.15
#define MRF_RETURN_TYPE blk_qc_t
#define MRF_RETURN(ret) return BLK_QC_T_NONE
#endif
#ifndef USE_BDOPS_SUBMIT_BIO
static inline int __elastio_snap_call_mrf(make_request_fn *fn, struct request_queue *q, struct bio *bio){
return fn(q, bio);
}
static inline int elastio_snap_call_mrf(make_request_fn *fn, struct bio *bio){
return __elastio_snap_call_mrf(fn, elastio_snap_bio_get_queue(bio), bio);
}
#endif
#ifdef HAVE_BLK_MQ_MAKE_REQUEST
// Linux version 5.8
static inline MRF_RETURN_TYPE elastio_snap_null_mrf(struct request_queue *q, struct bio *bio){
percpu_ref_get(&q->q_usage_counter);
return blk_mq_make_request(q, bio);
}
#endif
#endif
#ifdef MRF_RETURN_TYPE_VOID
#define MRF_SET_RETURN_VALUE(mrf_func) mrf_func
#define MRF_RETURN_VALUE(mrf_func) mrf_func
#else
#define MRF_SET_RETURN_VALUE(mrf_func) ret = mrf_func
#define MRF_RETURN_VALUE(mrf_func) return mrf_func
#endif
#ifdef USE_BDOPS_SUBMIT_BIO
// Linux version 5.9+
// The blk_mq_submit_bio function was exported in the kernels 5.9.0 - 5.9.1. And starting from the 5.9.2 it doesn't.
// And compat HAVE_BLK_MQ_SUBMIT_BIO doesn't allow us to detect whether it exported or not.
// Anyway this call by address works in all cases for the kernels 5.9+.
// Also elastio_blk_mq_submit_bio is set to NULL in case if address of the blk_mq_submit_bio function is not detected for further checks.
MRF_RETURN_TYPE (*elastio_blk_mq_submit_bio)(struct bio *) = (BLK_MQ_SUBMIT_BIO_ADDR != 0) ?
(MRF_RETURN_TYPE (*)(struct bio *)) (BLK_MQ_SUBMIT_BIO_ADDR + (long long)(((void *)kfree) - (void *)KFREE_ADDR)) : NULL;
static inline MRF_RETURN_TYPE elastio_snap_null_mrf(struct bio *bio){
#ifndef MRF_RETURN_TYPE_VOID
percpu_ref_get(&elastio_snap_bio_bi_disk(bio)->queue->q_usage_counter);
#endif
MRF_RETURN_VALUE(elastio_blk_mq_submit_bio(bio));
}
static inline int elastio_snap_call_mrf(make_request_fn *fn, struct bio *bio){
int ret = 0;
MRF_SET_RETURN_VALUE(fn(bio));
return ret;
}
#endif
#ifndef ACCESS_ONCE
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
#endif
//this is defined in 3.16 and up
#ifndef MIN_NICE
#define MIN_NICE -20
#endif
//if this isn't defined, we don't need it anyway
#ifndef FMODE_NONOTIFY
#define FMODE_NONOTIFY 0
#endif
#ifndef HAVE_BLK_SET_STACKING_LIMITS
#define blk_set_stacking_limits(ql) blk_set_default_limits(ql)
#endif
#ifndef HAVE_INODE_LOCK
//#if LINUX_VERSION_CODE < KERNEL_VERSION(4,5,0)
static inline void elastio_snap_inode_lock(struct inode *inode){
mutex_lock(&inode->i_mutex);
}
static inline void elastio_snap_inode_unlock(struct inode *inode){
mutex_unlock(&inode->i_mutex);
}
#else
#define elastio_snap_inode_lock inode_lock
#define elastio_snap_inode_unlock inode_unlock
#endif
#if !defined HAVE_PROC_CREATE_FN_FILE_OPERATIONS && !defined HAVE_PROC_CREATE_FN_PROC_OPS
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
static inline struct proc_dir_entry *proc_create(const char *name, mode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops){
struct proc_dir_entry *ent;
ent = create_proc_entry(name, mode, parent);
if(!ent) goto error;
ent->proc_fops = proc_fops;
return ent;
error:
return NULL;
}
#endif
/**
* The method is used to unlock/lock CoW file before/after IO.
* The COW file should be switched to an immutable one while the driver works to prevent it
* from moving or copying and thus guaranteeing the correct driver's behavior.
* Another problem which may occur just while reading the CoW file is high and uncontrolled
* memory consumption. See comments here https://github.com/elastio/elastio-snap/issues/39
*/
static inline void file_switch_lock(struct file *filp, bool lock, bool mark_dirty)
{
struct inode *inode;
if (!filp) return;
inode = elastio_snap_get_dentry(filp)->d_inode;
igrab(inode);
if (lock)
inode->i_flags |= S_IMMUTABLE;
else
inode->i_flags &= ~S_IMMUTABLE;
if (mark_dirty)
mark_inode_dirty(inode);
iput(inode);
}
#define file_lock(filp) file_switch_lock(filp, true, false)
#define file_unlock(filp) file_switch_lock(filp, false, false)
#define file_unlock_mark_dirty(filp) file_switch_lock(filp, false, true)
static inline struct request_queue *elastio_snap_bio_get_queue(struct bio *bio){
#if defined HAVE_BIO_BI_BDEV && defined HAVE_MAKE_REQUEST_FN_IN_QUEUE
//#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
return bdev_get_queue(bio->bi_bdev);
#else
return elastio_snap_bio_bi_disk(bio)->queue;
#endif
}
static inline void elastio_snap_bio_set_dev(struct bio *bio, struct block_device *bdev){
#if defined HAVE_BIO_SET_DEV
bio_set_dev(bio, bdev);
#else
//#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
bio->bi_bdev = bdev;
#endif
}
static inline void elastio_snap_bio_copy_dev(struct bio *dst, struct bio *src){
#if defined HAVE_BIO_COPY_DEV
bio_copy_dev(dst, src);
#else
//#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
dst->bi_bdev = src->bi_bdev;
#endif
}
#ifndef HAVE_BIOSET_INIT
//#if LINUX_VERSION_CODE < KERNEL_VERSION(4,18,0)
#define dev_bioset(dev) ((dev)->sd_bioset)
#else
#define dev_bioset(dev) (&(dev)->sd_bioset)
#endif
#ifndef __kernel_long_t
typedef long __kernel_long_t;
typedef unsigned long __kernel_ulong_t;
#endif
#ifndef HAVE_SI_MEM_AVAILABLE
__kernel_ulong_t si_mem_available(void)
{
struct sysinfo si;
si_meminfo(&si);
return si.freeram;
}
#endif
#ifndef HAVE_BIO_FREE_PAGES
static void bio_free_pages(struct bio *bio){
struct page *bv_page;
#ifdef HAVE_BVEC_ITER_ALL
struct bvec_iter_all iter;
struct bio_vec *bvec;
bio_for_each_segment_all(bvec, bio, iter) {
#else
int i = 0;
struct bio_vec *bvec;
bio_for_each_segment_all(bvec, bio, i) {
#endif
bv_page = bvec->bv_page;
if (bv_page) {
__free_page(bv_page);
}
}
}
#endif
/*********************************MACRO/PARAMETER DEFINITIONS*******************************/
//memory macros
#define get_zeroed_pages(flags, order) __get_free_pages(((flags) | __GFP_ZERO), order)
//takes a value and the log of the value it should be rounded up to
#define NUM_SEGMENTS(x, log_size) (((x) + (1<<(log_size)) - 1) >> (log_size))
#define ROUND_UP(x, chunk) ((((x) + (chunk) - 1) / (chunk)) * (chunk))
#define ROUND_DOWN(x, chunk) (((x) / (chunk)) * (chunk))
//bitmap macros
#define bitmap_is_marked(bitmap, pos) (((bitmap)[(pos) / 8] & (1 << ((pos) % 8))) != 0)
#define bitmap_mark(bitmap, pos) (bitmap)[(pos) / 8] |= (1 << ((pos) % 8))
//name macros
#define INFO_PROC_FILE "elastio-snap-info"
#define DRIVER_NAME "elastio-snap"
#define CONTROL_DEVICE_NAME "elastio-snap-ctl"
#define SNAP_DEVICE_NAME "elastio-snap%d"
#define SNAP_COW_THREAD_NAME_FMT "elastio_snap_cow%d"
#define SNAP_MRF_THREAD_NAME_FMT "elastio_snap_mrf%d"
#define INC_THREAD_NAME_FMT "elastio_snap_inc%d"
//macro for iterating over snap_devices (requires a null check on dev)
#define tracer_for_each(dev, i) for(i = ACCESS_ONCE(lowest_minor), dev = ACCESS_ONCE(snap_devices[i]); i <= ACCESS_ONCE(highest_minor); i++, dev = ACCESS_ONCE(snap_devices[i]))
#define tracer_for_each_full(dev, i) for(i = 0, dev = ACCESS_ONCE(snap_devices[i]); i < elastio_snap_max_snap_devices; i++, dev = ACCESS_ONCE(snap_devices[i]))
#ifdef USE_BDOPS_SUBMIT_BIO
//returns true if tracing struct's base device fops matches that of bio
#define tracer_matches_bio(dev, bio) (elastio_snap_get_bd_ops(dev->sd_base_dev) == elastio_snap_bio_bi_disk(bio)->fops)
#else
//returns true if tracing struct's base device queue matches that of bio
#define tracer_matches_bio(dev, bio) (bdev_get_queue((dev)->sd_base_dev) == elastio_snap_bio_get_queue(bio))
#endif
//returns true if tracing struct's sector range matches the sector of the bio
#define tracer_sector_matches_bio(dev, bio) (bio_sector(bio) >= (dev)->sd_sect_off && bio_sector(bio) < (dev)->sd_sect_off + (dev)->sd_size)
//should be called along with tracer_matches_bio to be valid. returns true if bio is a write, has a size,
//tracing struct is in non-fail state, and the device's sector range matches the bio
#define tracer_should_trace_bio(dev, bio) (bio_data_dir(bio) && !bio_is_discard(bio) && bio_size(bio) && !tracer_read_fail_state(dev) && tracer_sector_matches_bio(dev, bio))
//macros for snapshot bio modes of operation
#define READ_MODE_COW_FILE 1
#define READ_MODE_BASE_DEVICE 2
#define READ_MODE_MIXED 3
//#if LINUX_VERSION_CODE < KERNEL_VERSION(4,17,0)
#ifndef SECTOR_SHIFT
#define SECTOR_SHIFT 9
#endif
#ifndef SECTOR_SIZE
#define SECTOR_SIZE (1 << SECTOR_SHIFT)
#endif
//macros for defining sector and block sizes
#define SECTORS_PER_PAGE (PAGE_SIZE / SECTOR_SIZE)
#define COW_SECTION_SIZE PAGE_SIZE
#define SECTORS_PER_BLOCK (COW_BLOCK_SIZE / SECTOR_SIZE)
#define SECTOR_TO_BLOCK(sect) ((sect) / SECTORS_PER_BLOCK)
#define BLOCK_TO_SECTOR(block) ((block) * SECTORS_PER_BLOCK)
//macros for compilation
#define MAYBE_UNUSED(x) (void)(x)
//macros for defining the state of a tracing struct (bit offsets)
#define SNAPSHOT 0
#define ACTIVE 1
#define UNVERIFIED 2
//macro for defining the cow state, whether it placed on bdev or not
#define COW_ON_BDEV 1
#define LOW_MEMORY_FAIL_PERCENT 20
//macros for working with bios
#define BIO_SET_SIZE 256
#define bio_last_sector(bio) (bio_sector(bio) + (bio_size(bio) / SECTOR_SIZE))
/* don't perform COW operation */
#if defined HAVE_ENUM_REQ_OP && defined REQ_OP_BITS
//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0) && LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
/* special case for deb9's 4.9 train
* Bit 30 conflicts with struct bio's bi_opf opcode bitfield, which occupies the top 3 bits of the member. If we set
* that bit, it will mutate the operation that the bio is representing. Setting this to 28 puts this in an unused flag
* for bi_opf (that flag means something in struct request's cmd_flags, but we're not setting that).
*
* Note: CentOS 7 has enum req_op starting from the version 7.4, kernel 3.10.0-693. But this enum has just 4 values
* instead of 6 as in other kernels, where this enum is present. And it doesn't have defined REQ_OP_BITS, which could
* be defined and equal to the 2 bits.
*/
#define __ELASTIO_SNAP_PASSTHROUGH 28 // set as the last flag bit
#else
// set as an unused flag in versions older than 4.8
// set as an unused opcode bit in kernels newer than 4.9
#define __ELASTIO_SNAP_PASSTHROUGH 30
#endif
#define ELASTIO_SNAP_PASSTHROUGH (1ULL << __ELASTIO_SNAP_PASSTHROUGH)
#define ELASTIO_SNAP_DEFAULT_SNAP_DEVICES 24
#define ELASTIO_SNAP_MAX_SNAP_DEVICES 255
#if !defined BIO_MAX_PAGES && defined BIO_MAX_VECS
#define BIO_MAX_PAGES BIO_MAX_VECS
#endif
//global module parameters
static int elastio_snap_may_hook_syscalls = 1;
static unsigned long elastio_snap_cow_ext_buf_size = sizeof(struct fiemap_extent) * 1024;
static unsigned long elastio_snap_cow_max_memory_default = (300 * 1024 * 1024);
static unsigned int elastio_snap_cow_fallocate_percentage_default = 10;
static unsigned int elastio_snap_max_snap_devices = ELASTIO_SNAP_DEFAULT_SNAP_DEVICES;
static int elastio_snap_debug = 0;
module_param_named(may_hook_syscalls, elastio_snap_may_hook_syscalls, int, S_IRUGO);
MODULE_PARM_DESC(may_hook_syscalls, "if true, allows the kernel module to find and alter the system call table to allow tracing to work across remounts");
module_param_named(cow_ext_buf_size, elastio_snap_cow_ext_buf_size, ulong, 0);
MODULE_PARM_DESC(cow_ext_buf_size, "length of the cow file extension buffer (in bytes)");
module_param_named(cow_max_memory_default, elastio_snap_cow_max_memory_default, ulong, 0);
MODULE_PARM_DESC(cow_max_memory_default, "default maximum cache size (in bytes)");
module_param_named(cow_fallocate_percentage_default, elastio_snap_cow_fallocate_percentage_default, uint, 0);
MODULE_PARM_DESC(cow_fallocate_percentage_default, "default space allocated to the cow file (as integer percentage)");
module_param_named(max_snap_devices, elastio_snap_max_snap_devices, uint, S_IRUGO);
MODULE_PARM_DESC(max_snap_devices, "maximum number of tracers available");
module_param_named(debug, elastio_snap_debug, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(debug, "enables debug logging");
/*********************************STRUCT DEFINITIONS*******************************/
struct sector_set{
struct sector_set *next;
sector_t sect;
unsigned int len;
};
struct sset_list{
struct sector_set *head;
struct sector_set *tail;
};
struct bio_queue{
struct bio_list bios;
spinlock_t lock;
wait_queue_head_t event;
};
struct sset_queue{
struct sset_list ssets;
spinlock_t lock;
wait_queue_head_t event;
};
struct bio_sector_map{
struct bio *bio;
sector_t sect;
unsigned int size;
struct bio_sector_map *next;
};
struct bsector_list {
struct bio_sector_map* head;
struct bio_sector_map* tail;
};
struct tracing_params{
struct bio *orig_bio;
struct snap_device *dev;
atomic_t refs;
struct bsector_list bio_sects;
};
#ifdef USE_BDOPS_SUBMIT_BIO
struct tracing_ops {
struct block_device_operations *bd_ops;
atomic_t refs;
};
#endif