-
Notifications
You must be signed in to change notification settings - Fork 1
/
zfs_snapshots.py
executable file
·215 lines (177 loc) · 7.9 KB
/
zfs_snapshots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#!/usr/bin/env python
#
# Author: Tudor Bosman <[email protected]>
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Tool to maintain ZFS snapshots. Run this periodically (hourly) from cron as
zfs_snapshots.py <filesystem> <filesystem>...
where each <filesystem> is a ZFS filesystem name (not mount point, but the
name as listed by "zfs list")
It will create snapshots every hour, every day, every week, and every month
(actually, every 30 days), and automatically delete old ones. The intervals
are easily changed by modifying the SNAPSHOT_TYPES dictionary.
Old snapshots are (by default) retained for the interval of the next
higher-level snapshot (that is, hourly snapshots are retained for one day,
daily snapshots are retained for one week, weekly snapshots are retained for
30 days). Monthly snapshots are retained for a year (365 days). The
retention intervals can be changed by modifying the SNAPSHOT_TYPES
dictionary, or (at runtime) using the --hourly_retention, --daily_retention,
--weekly_retention, --monthly_retention flags.
"""
import argparse
import bisect
import datetime
import logging
import re
import os
import sys
import datetime_args
from zfs_utils import *
# List all snapshot intervals here, as a dictionary:
# {name : (snapshot_interval, default_retention), ...}
SNAPSHOT_TYPES = {
"hourly": (datetime.timedelta(hours=1), datetime.timedelta(days=1)),
"daily": (datetime.timedelta(days=1), datetime.timedelta(days=7)),
"weekly": (datetime.timedelta(days=7), datetime.timedelta(days=30)),
"monthly": (datetime.timedelta(days=30), datetime.timedelta(days=365)),
}
# List of snapshot type names in order of increasing time intervals, so we
# always process snapshots in a consistent (and logical) order.
_SNAPSHOT_TYPE_NAMES = [
name for (_,name) in
sorted((time,name) for (name,(time,_)) in SNAPSHOT_TYPES.items())]
# Regular expression fragment that matches valid snapshot types.
_SNAPSHOT_TYPE_RE = (
"(?:%s)" % "|".join(re.escape(t) for t in _SNAPSHOT_TYPE_NAMES))
# Regular expression that matches valid snapshot file names.
_SNAPSHOT_NAME_RE = re.compile(
r"(?P<type>%s)\.(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})-"
r"(?P<hour>\d{2})(?P<minute>\d{2})(?P<second>\d{2})" % _SNAPSHOT_TYPE_RE)
def make_snapshot_name(type, time):
"""Format the name of a snapshot file."""
assert type in _SNAPSHOT_TYPE_NAMES
time = time.strftime("%Y%m%d-%H%M%S")
return "%s.%s" % (type, time)
def list_snapshots(dir):
"""List all snapshots (created by us) in a directory.
Returns a dictionary:
{"hourly": [(time, name), ...],
"daily": [(time, name), ...],
"weekly": [(time, name), ...],
"monthly": [(time, name), ...],
}
"""
snapshots = dict((t, []) for t in _SNAPSHOT_TYPE_NAMES)
for snapshot_name in os.listdir(dir):
m = _SNAPSHOT_NAME_RE.match(snapshot_name)
if m is None:
continue # ignore snapshots that were not generated by us
m = m.groupdict()
snapshot_type = m["type"]
del m["type"]
# The remaining fields are valid for the datetime constructor
m = dict((k, int(v)) for (k, v) in m.items())
try:
snapshot_time = datetime.datetime(**m)
except ValueError, e:
continue # probably not created by us
snapshots[snapshot_type].append((snapshot_time, snapshot_name))
for snapshot_list in snapshots.values():
snapshot_list.sort()
return snapshots
def get_new_snapshots(snapshots, now):
"""Get the names of snapshots that we need to create, given the
list of snapshots (and times) in the format returned by
list_snapshots.
"""
new_snapshots = []
for snapshot_type in _SNAPSHOT_TYPE_NAMES:
snapshot_list = snapshots[snapshot_type]
interval = SNAPSHOT_TYPES[snapshot_type][0]
if snapshot_list:
last_snapshot_time = snapshot_list[-1][0]
if last_snapshot_time + interval > now:
continue
new_snapshots.append(make_snapshot_name(snapshot_type, now))
return new_snapshots
def get_old_snapshots_to_delete(snapshots, now, retention_map):
"""Get the names of snapshots that are too old and should be deleted,
given a list of snapshots (and times) in the format returned by
list_snapshots.
"""
old_snapshots = []
for snapshot_type in _SNAPSHOT_TYPE_NAMES:
snapshot_list = snapshots[snapshot_type]
retention_interval = retention_map.get(snapshot_type)
if retention_interval is None:
continue # Keep this kind of snapshot forever
i = bisect.bisect_left(snapshot_list, (now - retention_interval, ""))
old_snapshots.extend(name for (time, name) in snapshot_list[:i])
return old_snapshots
def process_snapshots(fs, now, retention_map):
"""Create new snapshots and delete old snapshots on one filesystem."""
zfs_dir = zfs_get_property(fs, "mountpoint")
logging.info("Processing ZFS filesystem \"%s\" (mounted on %s)",
fs, zfs_dir)
snapshot_dir = "%s/.zfs/snapshot" % zfs_dir
snapshots = list_snapshots(snapshot_dir)
snapshot_count = sum(len(v) for v in snapshots.values())
logging.info("Found %s", noun_count(snapshot_count, "existing snapshot"))
# Create new snapshots
new_snapshots = get_new_snapshots(snapshots, now)
if new_snapshots:
logging.info("Creating %s",
noun_count(len(new_snapshots), "new snapshot"))
for snapshot_name in new_snapshots:
logging.info("Creating snapshot %s", snapshot_name)
zfs_create_snapshot(fs, snapshot_name)
# Delete old snapshots
old_snapshots = get_old_snapshots_to_delete(snapshots, now, retention_map)
if old_snapshots:
logging.info("Deleting %s",
noun_count(len(old_snapshots), "old snapshot"))
for snapshot_name in old_snapshots:
logging.info("Deleting old snapshot %s", snapshot_name)
zfs_delete_snapshot(fs, snapshot_name)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--all", action="store_true",
help="Snapshot all mounted filesystems")
parser.add_argument("filesystem", nargs="*",
help="Filesystem to snapshot")
# Automatically create <type>_retention flags to override the default
# retention settings at runtime.
for name, (_, retention) in SNAPSHOT_TYPES.items():
parser.add_argument("--%s_retention" % name,
type=datetime_args.parse_timedelta,
default=retention,
metavar="DURATION",
help="Keep %s snapshots for DURATION" % name)
# TODO(tudor): logging configured from command-line flags
logging.getLogger().setLevel(logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(sys.stderr))
args = parser.parse_args()
if args.all:
filesystems = zfs_list_filesystems()
else:
filesystems = args.filesystem
# We use the start time as the time in all snapshot names.
now = datetime.datetime.utcnow()
# Get the retention values from the command-line options.
retention_map = dict((t, getattr(args, "%s_retention" % t))
for t in _SNAPSHOT_TYPE_NAMES)
for fs in filesystems:
process_snapshots(fs, now, retention_map)
if __name__ == "__main__":
main()