Skip to content

Commit

Permalink
port tootsuite#12568 to monsterfork: Add tootctl media remove-orphans
Browse files Browse the repository at this point in the history
  • Loading branch information
Gargron authored and multiple creatures committed Feb 21, 2020
1 parent 28073bb commit caeca95
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 7 deletions.
7 changes: 0 additions & 7 deletions app/models/media_attachment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,6 @@ def audio_or_video?
audio? || video?
end

def blocked?
domains = Set[self.account.domain]
domains.add(remote_url.scan(/[\w\-]+\.[\w\-]+(?:\.[\w\-]+)*/).first) if remote_url.present?
blocks = DomainBlock.suspend.or(DomainBlock.where(reject_media: true))
domains.any? { |domain| blocks.where(domain: domain).or(blocks.where('domain LIKE ?', "%.#{domain}")).exists? }
end

def variant?(other_file_name)
return true if file_file_name == other_file_name

Expand Down
77 changes: 77 additions & 0 deletions lib/mastodon/media_cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,83 @@ def remove
say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
end

option :start_after
option :dry_run, type: :boolean, default: false
desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
long_desc <<~LONG_DESC
Scans file storage for files that do not belong to existing media attachments. Because this operation
requires iterating over every single file individually, it will be slow.
Please mind that some storage providers charge for the necessary API requests to list objects.
LONG_DESC
def remove_orphans
progress = create_progress_bar(nil)
reclaimed_bytes = 0
removed = 0
dry_run = options[:dry_run] ? ' (DRY RUN)' : ''

case Paperclip::Attachment.default_options[:storage]
when :s3
paperclip_instance = MediaAttachment.new.file
s3_interface = paperclip_instance.s3_interface
bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
last_key = options[:start_after]

loop do
objects = bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x }

break if objects.empty?

last_key = objects.last.key
attachments_map = MediaAttachment.where(id: objects.map { |object| object.key.split('/')[2..-2].join.to_i }).each_with_object({}) { |attachment, map| map[attachment.id] = attachment }

objects.each do |object|
attachment_id = object.key.split('/')[2..-2].join.to_i
filename = object.key.split('/').last

progress.increment

next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename)

reclaimed_bytes += object.size
removed += 1
object.delete unless options[:dry_run]
progress.log("Found and removed orphan: #{object.key}")
end
end
when :fog
say('The fog storage driver is not supported for this operation at this time', :red)
exit(1)
when :filesystem
require 'find'

root_path = ENV.fetch('RAILS_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)

Find.find(File.join(root_path, 'media_attachments', 'files')) do |path|
next if File.directory?(path)

key = path.gsub("#{root_path}#{File::SEPARATOR}", '')
attachment_id = key.split(File::SEPARATOR)[2..-2].join.to_i
filename = key.split(File::SEPARATOR).last
attachment = MediaAttachment.find_by(id: attachment_id)

progress.increment

next unless attachment.nil? || !attachment.variant?(filename)

reclaimed_bytes += File.size(path)
removed += 1
File.delete(path) unless options[:dry_run]
progress.log("Found and removed orphan: #{key}")
end
end

progress.total = progress.progress
progress.finish

say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
end

option :account, type: :string
option :domain, type: :string
option :status, type: :numeric
Expand Down

0 comments on commit caeca95

Please sign in to comment.