Skip to content

Commit

Permalink
fix: Remove expensive logs table migration
Browse files Browse the repository at this point in the history
  • Loading branch information
erik_ritter committed Dec 4, 2020
1 parent e0288bf commit 14fd7fc
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 24 deletions.
1 change: 1 addition & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ assists people when migrating to a new version.

## Next

- [11920](https://github.com/apache/incubator-superset/pull/11920): Undos the DB migration from [11714](https://github.com/apache/incubator-superset/pull/11714) to prevent adding new columns to the logs table. Deploying a sha between these two PRs may result in locking your DB.
- [11704](https://github.com/apache/incubator-superset/pull/11704) Breaking change: Jinja templating for SQL queries has been updated, removing default modules such as `datetime` and `random` and enforcing static template values. To restore or extend functionality, use `JINJA_CONTEXT_ADDONS` and `CUSTOM_TEMPLATE_PROCESSORS` in `superset_config.py`.
- [11714](https://github.com/apache/incubator-superset/pull/11714): Logs
significantly more analytics events (roughly double?), and when
Expand Down
40 changes: 40 additions & 0 deletions superset/migrations/shared/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from alembic import op
from sqlalchemy import engine_from_config
from sqlalchemy.engine import reflection
from sqlalchemy.exc import NoSuchTableError


def table_has_column(table: str, column: str) -> bool:
"""
Checks if a column exists in a given table.
:param table: A table name
:param column: A column name
:returns: True iff the column exists in the table
"""

config = op.get_context().config
engine = engine_from_config(
config.get_section(config.config_ini_section), prefix="sqlalchemy."
)
insp = reflection.Inspector.from_engine(engine)
try:
return any(col["name"] == column for col in insp.get_columns(table))
except NoSuchTableError:
return False
46 changes: 46 additions & 0 deletions superset/migrations/versions/811494c0cc23_remove_path_from_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Remove path, path_no_int, and ref from logs
Revision ID: 811494c0cc23
Revises: 8ee129739cf9
Create Date: 2020-12-03 16:21:06.771684
"""

# revision identifiers, used by Alembic.
revision = "811494c0cc23"
down_revision = "8ee129739cf9"

import sqlalchemy as sa
from alembic import op

from superset.migrations.shared import utils


def upgrade():
with op.batch_alter_table("logs") as batch_op:
if utils.table_has_column("logs", "path"):
batch_op.drop_column("path")
if utils.table_has_column("logs", "path_no_int"):
batch_op.drop_column("path_no_int")
if utils.table_has_column("logs", "ref"):
batch_op.drop_column("ref")


def downgrade():
pass
20 changes: 12 additions & 8 deletions superset/migrations/versions/a8173232b786_add_path_to_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,20 @@
from alembic import op
from sqlalchemy.dialects import mysql

from superset.migrations.shared import utils


def upgrade():
op.add_column("logs", sa.Column("path", sa.String(length=256), nullable=True))
op.add_column(
"logs", sa.Column("path_no_int", sa.String(length=256), nullable=True)
)
op.add_column("logs", sa.Column("ref", sa.String(length=256), nullable=True))
# This migration was modified post hoc to avoid locking the large logs table
# during migrations.
pass


def downgrade():
op.drop_column("logs", "path")
op.drop_column("logs", "path_no_int")
op.drop_column("logs", "ref")
with op.batch_alter_table("logs") as batch_op:
if utils.table_has_column("logs", "path"):
batch_op.drop_column("path")
if utils.table_has_column("logs", "path_no_int"):
batch_op.drop_column("path_no_int")
if utils.table_has_column("logs", "ref"):
batch_op.drop_column("ref")
3 changes: 0 additions & 3 deletions superset/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,9 +715,6 @@ class Log(Model): # pylint: disable=too-few-public-methods
dttm = Column(DateTime, default=datetime.utcnow)
duration_ms = Column(Integer)
referrer = Column(String(1024))
path = Column(String(256))
path_no_int = Column(String(256))
ref = Column(String(256))


class FavStarClassName(str, Enum):
Expand Down
19 changes: 7 additions & 12 deletions superset/utils/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,6 @@ def log( # pylint: disable=too-many-arguments
dashboard_id: Optional[int],
duration_ms: Optional[int],
slice_id: Optional[int],
path: Optional[str],
path_no_int: Optional[str],
ref: Optional[str],
referrer: Optional[str],
*args: Any,
**kwargs: Any,
Expand Down Expand Up @@ -92,6 +89,13 @@ def log_context(
if log_to_statsd:
self.stats_logger.incr(action)

payload.update(
{
"path": request.path,
"path_no_param": strip_int_from_path(request.path),
"ref": ref,
}
)
# bulk insert
try:
explode_by = payload.get("explode")
Expand All @@ -107,9 +111,6 @@ def log_context(
slice_id=slice_id,
duration_ms=round((time.time() - start_time) * 1000),
referrer=referrer,
path=request.path,
path_no_int=strip_int_from_path(request.path),
ref=ref,
)

def _wrapper(
Expand Down Expand Up @@ -210,9 +211,6 @@ def log( # pylint: disable=too-many-arguments,too-many-locals
dashboard_id: Optional[int],
duration_ms: Optional[int],
slice_id: Optional[int],
path: Optional[str],
path_no_int: Optional[str],
ref: Optional[str],
referrer: Optional[str],
*args: Any,
**kwargs: Any,
Expand All @@ -236,9 +234,6 @@ def log( # pylint: disable=too-many-arguments,too-many-locals
duration_ms=duration_ms,
referrer=referrer,
user_id=user_id,
path=path,
path_no_int=path_no_int,
ref=ref,
)
logs.append(log)
try:
Expand Down
5 changes: 4 additions & 1 deletion tests/event_logger_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,8 @@ def test_func(arg1, update_log_payload, karg1=1):
result = test_func(1, karg1=2) # pylint: disable=no-value-for-parameter
self.assertEqual(result, 2)
# should contain only manual payload
self.assertEqual(mock_log.call_args[1]["records"], [{"foo": "bar"}])
self.assertEqual(
mock_log.call_args[1]["records"],
[{"foo": "bar", "path": "/", "path_no_param": "/", "ref": None}],
)
assert mock_log.call_args[1]["duration_ms"] >= 100

0 comments on commit 14fd7fc

Please sign in to comment.