-
Notifications
You must be signed in to change notification settings - Fork 14.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: enable ETag header for dashboard GET requests #10963
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,15 +15,15 @@ | |
# specific language governing permissions and limitations | ||
# under the License. | ||
import logging | ||
from datetime import datetime, timedelta | ||
from datetime import datetime, timedelta, timezone | ||
from functools import wraps | ||
from typing import Any, Callable, Iterator | ||
from typing import Any, Callable, Iterator, Optional | ||
|
||
from contextlib2 import contextmanager | ||
from flask import request | ||
from werkzeug.wrappers.etag import ETagResponseMixin | ||
|
||
from superset import app, cache | ||
from superset import app, cache, is_feature_enabled | ||
from superset.stats_logger import BaseStatsLogger | ||
from superset.utils.dates import now_as_float | ||
|
||
|
@@ -34,6 +34,10 @@ | |
logger = logging.getLogger(__name__) | ||
|
||
|
||
def is_dashboard_request(kwargs: Any) -> bool: | ||
return kwargs.get("dashboard_id_or_slug") is not None | ||
|
||
|
||
@contextmanager | ||
def stats_timing(stats_key: str, stats_logger: BaseStatsLogger) -> Iterator[float]: | ||
"""Provide a transactional scope around a series of operations.""" | ||
|
@@ -46,7 +50,11 @@ def stats_timing(stats_key: str, stats_logger: BaseStatsLogger) -> Iterator[floa | |
stats_logger.timing(stats_key, now_as_float() - start_ts) | ||
|
||
|
||
def etag_cache(max_age: int, check_perms: Callable[..., Any]) -> Callable[..., Any]: | ||
def etag_cache( | ||
max_age: int, | ||
check_perms: Callable[..., Any], | ||
check_latest_changed_on: Optional[Callable[..., Any]] = None, | ||
) -> Callable[..., Any]: | ||
""" | ||
A decorator for caching views and handling etag conditional requests. | ||
|
||
|
@@ -72,6 +80,12 @@ def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: | |
if request.method == "POST": | ||
return f(*args, **kwargs) | ||
|
||
# if it is dashboard request but feature is not eabled, | ||
# do not use cache | ||
is_dashboard = is_dashboard_request(kwargs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of adding a helper function, maybe we can just expand def wrapper(*args: Any, dashboard_id_or_slug: str=None, **kwargs: Any) -> ETagResponseMixin:
# ...
is_dashboard = dashboard_id_or_slug is not None Better yet, we should probably aim for keeping all dashboard specific logics out of @etag_cache(skip=lambda: is_feature_enabled("ENABLE_DASHBOARD_ETAG_HEADER")) def etag_cache(
max_age: int,
check_perms: Callable[..., Any],
skip: Optional[Callable[..., Any]] = None,
) -> Callable[..., Any]:
def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin:
check_perms(*args, **kwargs)
if request.method == "POST" or (skip and skip(*args, **kwargs)):
return f(*args, **kwargs) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks! after introduce this |
||
if is_dashboard and not is_feature_enabled("ENABLE_DASHBOARD_ETAG_HEADER"): | ||
return f(*args, **kwargs) | ||
|
||
response = None | ||
if cache: | ||
try: | ||
|
@@ -89,13 +103,25 @@ def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: | |
raise | ||
logger.exception("Exception possibly due to cache backend.") | ||
|
||
# if cache is stale? | ||
if check_latest_changed_on: | ||
latest_changed_on = check_latest_changed_on(*args, **kwargs) | ||
if response and response.last_modified: | ||
latest_record = response.last_modified.replace( | ||
tzinfo=timezone.utc | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this assumes that superset server runs in utc zone, it may be safer to make it as a superset config variable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this convert, |
||
).astimezone(tz=None) | ||
if latest_changed_on.timestamp() > latest_record.timestamp(): | ||
response = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can probably rename if get_latest_changed_on:
latest_changed_on = get_latest_changed_on(*args, **kwargs)
if response and response.last_modified and response.last_modified < latest_changed_on:
response = None
else:
latest_changed_on = datetime.utcnow() There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed. |
||
|
||
# if no response was cached, compute it using the wrapped function | ||
if response is None: | ||
response = f(*args, **kwargs) | ||
|
||
# add headers for caching: Last Modified, Expires and ETag | ||
response.cache_control.public = True | ||
response.last_modified = datetime.utcnow() | ||
response.last_modified = ( | ||
latest_changed_on if is_dashboard else datetime.utcnow() | ||
) | ||
expiration = max_age if max_age != 0 else FAR_FUTURE | ||
response.expires = response.last_modified + timedelta( | ||
seconds=expiration | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,19 +16,27 @@ | |
# under the License. | ||
import logging | ||
from collections import defaultdict | ||
from datetime import date | ||
from datetime import date, datetime | ||
from typing import Any, Callable, DefaultDict, Dict, List, Optional, Set, Tuple, Union | ||
from urllib import parse | ||
|
||
import msgpack | ||
import pyarrow as pa | ||
import simplejson as json | ||
from flask import g, request | ||
from flask import abort, flash, g, redirect, request | ||
from flask_appbuilder.security.sqla import models as ab_models | ||
from flask_appbuilder.security.sqla.models import User | ||
from flask_babel import gettext as __ | ||
|
||
import superset.models.core as models | ||
from superset import app, dataframe, db, is_feature_enabled, result_set | ||
from superset import ( | ||
app, | ||
dataframe, | ||
db, | ||
is_feature_enabled, | ||
result_set, | ||
security_manager, | ||
) | ||
from superset.connectors.connector_registry import ConnectorRegistry | ||
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType | ||
from superset.exceptions import SupersetException, SupersetSecurityException | ||
|
@@ -298,6 +306,46 @@ def get_time_range_endpoints( | |
CONTAINER_TYPES = ["COLUMN", "GRID", "TABS", "TAB", "ROW"] | ||
|
||
|
||
def get_dashboard(dashboard_id_or_slug: str,) -> Dashboard: | ||
session = db.session() | ||
qry = session.query(Dashboard) | ||
if dashboard_id_or_slug.isdigit(): | ||
qry = qry.filter_by(id=int(dashboard_id_or_slug)) | ||
else: | ||
qry = qry.filter_by(slug=dashboard_id_or_slug) | ||
dashboard = qry.one_or_none() | ||
|
||
if not dashboard: | ||
abort(404) | ||
|
||
return dashboard | ||
|
||
|
||
def get_datasources_from_dashboard( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks like a good candidate for the Dashboard class method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a little confusing: Dashboard class has a get So right now i removed this helper function from utils, and build dict in the dashboard function. But i rename datasource to |
||
dashboard: Dashboard, | ||
) -> DefaultDict[Any, List[Any]]: | ||
datasources = defaultdict(list) | ||
for slc in dashboard.slices: | ||
datasource = slc.datasource | ||
if datasource: | ||
datasources[datasource].append(slc) | ||
return datasources | ||
|
||
|
||
def get_dashboard_latest_changed_on(_self: Any, dashboard_id_or_slug: str) -> datetime: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is _self here? ideally we should avoid Any types There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please see other functions that used by decorator: |
||
""" | ||
Get latest changed datetime for a dashboard. The change could be dashboard | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. s/Get latest changed datetime for a dashboard. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes. I rename it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we get more specific with _self type ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know what type to use here. do you have suggestion? (Sorry i am not an expert in Python) |
||
metadata change, or any of its slice data change. | ||
|
||
This function takes `self` since it must have the same signature as the | ||
the decorated method. | ||
""" | ||
dash = get_dashboard(dashboard_id_or_slug) | ||
dash_changed_on = dash.changed_on | ||
slices_changed_on = max([s.changed_on for s in dash.slices]) | ||
return max(dash_changed_on, slices_changed_on) | ||
|
||
|
||
def get_dashboard_extra_filters( | ||
slice_id: int, dashboard_id: int | ||
) -> List[Dict[str, Any]]: | ||
|
@@ -490,6 +538,26 @@ def check_slice_perms(_self: Any, slice_id: int) -> None: | |
viz_obj.raise_for_access() | ||
|
||
|
||
def check_dashboard_perms(_self: Any, dashboard_id_or_slug: str) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. best practice is to have a unit test for every function, it would be great if you could add some There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, i agree. but this function is refactored out from dashboard function. it is tested in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a good practice is to incrementally improve the state of the code, however it will be your call here |
||
""" | ||
Check if user can access a cached response from explore_json. | ||
|
||
This function takes `self` since it must have the same signature as the | ||
the decorated method. | ||
""" | ||
|
||
dash = get_dashboard(dashboard_id_or_slug) | ||
datasources = get_datasources_from_dashboard(dash) | ||
if app.config["ENABLE_ACCESS_REQUEST"]: | ||
for datasource in datasources: | ||
if datasource and not security_manager.can_access_datasource(datasource): | ||
flash( | ||
__(security_manager.get_datasource_access_error_msg(datasource)), | ||
"danger", | ||
) | ||
redirect("superset/request_access/?" f"dashboard_id={dash.id}&") | ||
|
||
|
||
def _deserialize_results_payload( | ||
payload: Union[bytes, str], query: Query, use_msgpack: Optional[bool] = False | ||
) -> Dict[str, Any]: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
doesn't seem robust, it it possible to validate via uri path or just pass a param ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
after introduce this skip,
dashboard_id_or_slug
is not needed in decorator function any more. this check is removed.