Skip to content

Commit

Permalink
Merge branch 'feature/django_upgrade' of https://github.com/CenterFor…
Browse files Browse the repository at this point in the history
…OpenScience/osf.io into signal-deny-list

* 'feature/django_upgrade' of https://github.com/CenterForOpenScience/osf.io:
  move createcachetable to post migrate signal (CenterForOpenScience#9944)
  remove post-migrate signals from migration stream (CenterForOpenScience#9964)
  Add better logic for routing Dataverse files and their stupid duplicate (CenterForOpenScience#9963)
  [ENG-3872] Fix dataverse 502s and pass version info to FE (CenterForOpenScience#9959)
  Fix File `show_as_unviewed` behavior (CenterForOpenScience#9960)

# Conflicts:
#	osf/apps.py
#	osf/migrations/0137_transfer_preprint_service_permissions.py
#	osf/migrations/__init__.py
  • Loading branch information
John Tordoff committed Jun 27, 2022
2 parents 0245ed1 + 222ce09 commit 578f0ce
Show file tree
Hide file tree
Showing 20 changed files with 455 additions and 104 deletions.
6 changes: 6 additions & 0 deletions addons/base/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,9 @@ class InvalidAuthError(AddonError):

class HookError(AddonError):
pass

class QueryError(AddonError):
pass

class DoesNotExist(AddonError):
pass
25 changes: 24 additions & 1 deletion addons/base/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from api.caching.tasks import update_storage_usage_with_size

from addons.base import exceptions as addon_errors
from addons.base.models import BaseStorageAddon
from addons.osfstorage.models import OsfStorageFile
from addons.osfstorage.models import OsfStorageFileNode
Expand Down Expand Up @@ -726,7 +727,29 @@ def addon_view_or_download_file(auth, path, provider, **kwargs):
})

savepoint_id = transaction.savepoint()
file_node = BaseFileNode.resolve_class(provider, BaseFileNode.FILE).get_or_create(target, path)

try:
file_node = BaseFileNode.resolve_class(
provider, BaseFileNode.FILE
).get_or_create(
target, path, **extras
)
except addon_errors.QueryError as e:
raise HTTPError(
http_status.HTTP_400_BAD_REQUEST,
data={
'message_short': 'Bad Request',
'message_long': str(e)
}
)
except addon_errors.DoesNotExist as e:
raise HTTPError(
http_status.HTTP_404_NOT_FOUND,
data={
'message_short': 'Not Found',
'message_long': str(e)
}
)

# Note: Cookie is provided for authentication to waterbutler
# it is overridden to force authentication as the current user
Expand Down
32 changes: 32 additions & 0 deletions addons/dataverse/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-
from rest_framework import status as http_status

from addons.base import exceptions as addon_errors
from addons.base.models import (BaseOAuthNodeSettings, BaseOAuthUserSettings,
BaseStorageAddon)
from django.contrib.contenttypes.models import ContentType
from django.db import models
from framework.auth.decorators import Auth
from framework.exceptions import HTTPError
Expand All @@ -15,6 +17,36 @@
class DataverseFileNode(BaseFileNode):
_provider = 'dataverse'

@classmethod
def get_or_create(cls, target, path, **query_params):
'''Override get_or_create for Dataverse.
Dataverse is weird and reuses paths, so we need to extract a "version"
query param to determine which file to get. We also don't want to "create"
here, as that might lead to integrity errors.
'''
version = query_params.get('version', None)
if version not in {'latest', 'latest-published'}:
raise addon_errors.QueryError(
'Dataverse requires a "version" query paramater. '
'Acceptable options are "latest" or "latest-published"'
)

content_type = ContentType.objects.get_for_model(target)
try:
obj = cls.objects.get(
target_object_id=target.id,
target_content_type=content_type,
_path='/' + path.lstrip('/'),
_history__0__extra__datasetVersion=version,
)
except cls.DoesNotExist:
raise addon_errors.DoesNotExist(
f'Requested Dataverse file does not exist with version "{version}"'
)

return obj


class DataverseFolder(DataverseFileNode, Folder):
pass
Expand Down
2 changes: 1 addition & 1 deletion addons/osfstorage/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def get(cls, _id, target):
return cls.objects.get(_id=_id, target_object_id=target.id, target_content_type=ContentType.objects.get_for_model(target))

@classmethod
def get_or_create(cls, target, path):
def get_or_create(cls, target, path, **unused_query_params):
"""Override get or create for osfstorage
Path is always the _id of the osfstorage filenode.
Use load here as its way faster than find.
Expand Down
16 changes: 14 additions & 2 deletions api/base/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,11 +638,23 @@ def bulk_get_file_nodes_from_wb_resp(self, files_list):
)

# mirrors BaseFileNode get_or_create
_path = '/' + attrs['path'].lstrip('/')
query = {
'target_object_id': node.id,
'target_content_type': content_type,
'_path': _path,
}

# Dataverse provides us two sets of files with the same path, so we disambiguate the paths, this
# preserves legacy behavior by distingishing them by version (Draft/Published).
if attrs['provider'] == 'dataverse':
query.update({'_history__0__extra__datasetVersion': attrs['extra']['datasetVersion']})

try:
file_obj = base_class.objects.get(target_object_id=node.id, target_content_type=content_type, _path='/' + attrs['path'].lstrip('/'))
file_obj = base_class.objects.get(**query)
except base_class.DoesNotExist:
# create method on BaseFileNode appends provider, bulk_create bypasses this step so it is added here
file_obj = base_class(target=node, _path='/' + attrs['path'].lstrip('/'), provider=base_class._provider)
file_obj = base_class(target=node, _path=_path, provider=base_class._provider)
objs_to_create[base_class].append(file_obj)
else:
file_objs.append(file_obj)
Expand Down
58 changes: 57 additions & 1 deletion api/files/annotations.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from django.db.models import Max, Case, When
from django.db.models import BooleanField, Case, Exists, F, IntegerField, Max, OuterRef, Q, Subquery, Value, When
from django.db.models.functions.base import Cast
from django.contrib.postgres.fields.jsonb import KeyTextTransform
from osf.utils.datetime_aware_jsonfield import DateTimeAwareJSONField
from osf.utils.fields import NonNaiveDateTimeField
from osf.models import FileVersion


# Get date modified for OSF and non-OSF providers
Expand All @@ -28,3 +29,58 @@
), output_field=NonNaiveDateTimeField(),
),
)

def make_show_as_unviewed_annotations(user):
'''Returns the annotations required to set the current_user_has_viewed attribute.
Usage:
OsfStorageFile.objects.annotate(**make_show_as_unviewed_annotations(request.user))
show_as_unviewed is only true if the user has not seen the latest version of a file
but has looked at it previously. Making this determination requires multiple annotations,
which is why this returns a dictionary that must be unpacked into kwargs.
'''
if user.is_anonymous:
return {'show_as_unviewed': Value(False, output_field=BooleanField())}

seen_versions = FileVersion.objects.annotate(
latest_version=Subquery(
FileVersion.objects.filter(
basefilenode=OuterRef('basefilenode'),
).order_by('-created').values('id')[:1],
output_field=IntegerField(),
),
).filter(seen_by=user)

has_seen_latest = Exists(
seen_versions.filter(basefilenode=OuterRef('id')).filter(id=F('latest_version')),
)
has_previously_seen = Exists(
seen_versions.filter(basefilenode=OuterRef('id')).exclude(id=F('latest_version')),
)
show_as_unviewed = Case(
When(Q(has_seen_latest=False) & Q(has_previously_seen=True), then=Value(True)),
default=Value(False),
output_field=BooleanField(),
)

return {
'has_seen_latest': has_seen_latest,
'has_previously_seen': has_previously_seen,
'show_as_unviewed': show_as_unviewed,
}

def check_show_as_unviewed(user, osf_file):
'''A separate function for assigning the show_as_unviewed value to a single instance.
Our logic is not conducive to assigning annotations to a single file, so do it manually
in the DetailView case.
'''
if user.is_anonymous:
return False

latest_version = osf_file.versions.order_by('-created').first()
return (
osf_file.versions.filter(seen_by=user).exists()
and not latest_version.seen_by.filter(id=user.id).exists()
)
25 changes: 16 additions & 9 deletions api/files/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,12 @@ class BaseFileSerializer(JSONAPISerializer):

def absolute_url(self, obj):
if obj.is_file:
return furl.furl(settings.DOMAIN).set(
url = furl.furl(settings.DOMAIN).set(
path=(obj.target._id, 'files', obj.provider, obj.path.lstrip('/')),
).url
)
if obj.provider == 'dataverse':
url.add(query_params={'version': obj.history[-1]['extra']['datasetVersion']})
return url.url

def get_download_link(self, obj):
if obj.is_file:
Expand Down Expand Up @@ -317,6 +320,10 @@ def get_extra(self, obj):
}
if obj.provider == 'osfstorage' and obj.is_file:
extras['downloads'] = obj.get_download_count()

if obj.provider == 'dataverse':
extras.update(obj.history[-1]['extra'])

return extras

def get_current_user_can_comment(self, obj):
Expand Down Expand Up @@ -383,14 +390,14 @@ class FileSerializer(BaseFileSerializer):
min_version='2.0', max_version='2.7',
)
target = TargetField(link_type='related', meta={'type': 'get_target_type'})
current_user_has_viewed = ser.SerializerMethodField(help_text='Whether the current user has already viewed the file')

def get_current_user_has_viewed(self, obj):
version = obj.versions.order_by('created').last()
if version and not self.context['request'].user.is_anonymous: # This is to ensure compatibility with tests, ugh.
return version.seen_by.filter(id=self.context['request'].user.id).exists()
else:
return True
# Assigned via annotation. See api/files/annotations for info
show_as_unviewed = ser.BooleanField(
read_only=True,
required=False,
default=False,
help_text='Whether to mark the file as unviewed for the current user',
)

def get_target_type(self, obj):
if isinstance(obj, Preprint):
Expand Down
4 changes: 4 additions & 0 deletions api/files/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from api.base import permissions as base_permissions
from api.nodes.permissions import ContributorOrPublic
from api.nodes.permissions import ReadOnlyIfRegistration
from api.files import annotations
from api.files.permissions import IsPreprintFile
from api.files.permissions import CheckedOutOrAdmin
from api.files.permissions import FileMetadataRecordPermission
Expand Down Expand Up @@ -110,6 +111,9 @@ def get_object(self):
# We normally would pass this through `get_file` as an annotation, but the `select_for_update` feature prevents
# grouping versions in an annotation
if file.kind == 'file':
file.show_as_unviewed = annotations.check_show_as_unviewed(
user=self.request.user, osf_file=file,
)
if file.provider == 'osfstorage':
file.date_modified = file.versions.aggregate(Max('created'))['created__max']
else:
Expand Down
31 changes: 16 additions & 15 deletions api/nodes/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from distutils.version import StrictVersion
from django.apps import apps
from django.db.models import Q, Subquery, F, Max
from django.db.models import F, Max, Q, Subquery
from django.utils import timezone
from django.contrib.contenttypes.models import ContentType
from rest_framework import generics, permissions as drf_permissions
Expand Down Expand Up @@ -65,7 +65,7 @@
)
from api.draft_registrations.serializers import DraftRegistrationSerializer, DraftRegistrationDetailSerializer
from api.files.serializers import FileSerializer, OsfStorageFileSerializer
from api.files.annotations import DATE_MODIFIED
from api.files import annotations as file_annotations
from api.identifiers.serializers import NodeIdentifierSerializer
from api.identifiers.views import IdentifierList
from api.institutions.serializers import InstitutionSerializer
Expand Down Expand Up @@ -150,7 +150,7 @@
File,
Folder,
)
from addons.osfstorage.models import Region, OsfStorageFileNode
from addons.osfstorage.models import Region
from osf.utils.permissions import ADMIN, WRITE_NODE
from website import mails, settings

Expand Down Expand Up @@ -1178,21 +1178,19 @@ def get_queryset(self):
# query param info when used on a folder gives that folder's metadata instead of the metadata of it's children
if 'info' in self.request.query_params and path.endswith('/'):
resource = self.get_resource()
file_obj = self.get_file_object(resource, path, provider)

if provider == 'osfstorage':
queryset = OsfStorageFileNode.objects.filter(id=file_obj.id)
else:
base_class = BaseFileNode.resolve_class(provider, BaseFileNode.FOLDER)
queryset = base_class.objects.filter(
target_object_id=resource.id,
target_content_type=ContentType.objects.get_for_model(resource),
_path=path,
)
base_class = BaseFileNode.resolve_class(provider, BaseFileNode.FOLDER)
queryset = base_class.objects.filter(
target_object_id=resource.id,
target_content_type=ContentType.objects.get_for_model(resource),
_path=path,
)
else:
queryset = self.get_queryset_from_request()

return queryset.annotate(date_modified=DATE_MODIFIED)
return queryset.annotate(
date_modified=file_annotations.DATE_MODIFIED,
**file_annotations.make_show_as_unviewed_annotations(self.request.user)
)


class NodeFileDetail(JSONAPIBaseView, generics.RetrieveAPIView, WaterButlerMixin, NodeMixin):
Expand Down Expand Up @@ -1223,6 +1221,9 @@ def get_object(self):
# We should not have gotten a folder here
raise NotFound
if fobj.kind == 'file':
fobj.show_as_unviewed = file_annotations.check_show_as_unviewed(
user=self.request.user, osf_file=fobj,
)
if fobj.provider == 'osfstorage':
fobj.date_modified = fobj.versions.aggregate(Max('created'))['created__max']
else:
Expand Down
Loading

0 comments on commit 578f0ce

Please sign in to comment.