Skip to content

Commit

Permalink
[Data] Configure logging with YAML file (ray-project#44440)
Browse files Browse the repository at this point in the history
Configuring logging with a YAML file confers some advantages over configuring logging with Python:
- It's easier to configure (we don't need to touch source code)
- Currently, configuration is included in the Ray wheel. Later on, however, we can download configuration files at runtime. This'd let us change logging behavior without requiring us to rebuild wheels or images.

---------

Signed-off-by: Balaji Veeramani <[email protected]>
  • Loading branch information
bveeramani authored Apr 10, 2024
1 parent 01d975e commit 173d658
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 70 deletions.
6 changes: 3 additions & 3 deletions python/ray/data/_internal/execution/streaming_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
select_operator_to_run,
update_operator_states,
)
from ray.data._internal.logging import get_log_path
from ray.data._internal.logging import get_log_directory
from ray.data._internal.progress_bar import ProgressBar
from ray.data._internal.stats import DatasetStats, StatsManager
from ray.data.context import DataContext
Expand Down Expand Up @@ -106,9 +106,9 @@ def execute(
context = DataContext.get_current()
if context.print_on_execution_start:
message = "Starting execution of Dataset."
log_path = get_log_path()
log_path = get_log_directory()
if log_path is not None:
message += f" Full log is in {log_path}"
message += f" Full logs are in {log_path}"
logger.info(message)
logger.info(f"Execution plan of Dataset: {dag}")

Expand Down
76 changes: 27 additions & 49 deletions python/ray/data/_internal/logging.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import logging
import logging.config
import os
from typing import Optional

import ray
from ray._private.log import PlainRayHandler
from ray._private.ray_constants import LOGGER_FORMAT
import yaml

_default_file_handler: logging.Handler = None
import ray

DEFAULT_DATASET_LOG_FILENAME = "ray-data.log"
DEFAULT_CONFIG_PATH = os.path.abspath(
os.path.join(os.path.dirname(__file__), "logging.yaml")
)

# To facilitate debugging, Ray Data writes debug logs to a file. However, if Ray Data
# logs every scheduler loop, logging might impact performance. So, we add a "TRACE"
Expand Down Expand Up @@ -53,70 +54,47 @@ def setFormatter(self, fmt: logging.Formatter) -> None:
def _try_create_handler(self):
assert self._handler is None

global_node = ray._private.worker._global_node
if global_node is None:
log_directory = get_log_directory()
if log_directory is None:
return

session_dir = global_node.get_session_dir_path()
self._path = os.path.join(session_dir, "logs", self._filename)
self._path = os.path.join(log_directory, self._filename)
self._handler = logging.FileHandler(self._path)
if self._formatter is not None:
self._handler.setFormatter(self._formatter)

@property
def path(self) -> Optional[str]:
"""Path to the log file or ``None`` if the file hasn't been created yet."""
return self._path


def configure_logging() -> None:
"""Configure the Python logger named 'ray.data'.
This function configures the logger to write Ray Data logs to a file in the Ray
session directory.
Only call this function once. It'll fail if you call it twice.
This function loads the configration YAML specified by the "RAY_DATA_LOGGING_CONFIG"
environment variable. If the variable isn't set, this function loads the
"logging.yaml" file that is adjacent to this module.
"""
# Save the file handler in a global variable so that we can get the log path later.
global _default_file_handler
assert _default_file_handler is None, "Logging already configured."

logger = logging.getLogger("ray.data")
logger.setLevel(logging.DEBUG)
# We don't want to print debug logs to the console, so don't propagate logs to the
# 'ray' parent logger.
logger.propagate = False

formatter = logging.Formatter(fmt=LOGGER_FORMAT)

file_handler = SessionFileHandler(DEFAULT_DATASET_LOG_FILENAME)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

console_handler = PlainRayHandler()
console_handler.setFormatter(formatter)
console_handler.setLevel(logging.INFO)
logger.addHandler(console_handler)

_default_file_handler = file_handler


def is_logging_configured() -> bool:
return _default_file_handler is not None
config_path = os.environ.get("RAY_DATA_LOGGING_CONFIG", DEFAULT_CONFIG_PATH)
with open(config_path) as file:
config = yaml.safe_load(file)
logging.config.dictConfig(config)


def reset_logging() -> None:
"""Reset the logger named 'ray.data' to its initial state.
Used for testing.
"""
global _default_file_handler
_default_file_handler = None

logger = logging.getLogger("ray.data")
logger.handlers.clear()
logger.setLevel(logging.NOTSET)


def get_log_path() -> Optional[str]:
return _default_file_handler.path
def get_log_directory() -> Optional[str]:
"""Return the directory where Ray Data writes log files.
If Ray isn't initialized, this function returns ``None``.
"""
global_node = ray._private.worker._global_node
if global_node is None:
return None

session_dir = global_node.get_session_dir_path()
return os.path.join(session_dir, "logs")
22 changes: 22 additions & 0 deletions python/ray/data/_internal/logging.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: 1
disable_existing_loggers: False

formatters:
ray:
format: "%(asctime)s\t%(levelname)s %(filename)s:%(lineno)s -- %(message)s"

handlers:
file:
class: ray.data._internal.logging.SessionFileHandler
formatter: ray
filename: ray-data.log
console:
class: ray._private.log.PlainRayHandler
formatter: ray
level: INFO

loggers:
ray.data:
level: DEBUG
handlers: [file, console]
propagate: False
4 changes: 2 additions & 2 deletions python/ray/data/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from typing import Callable

from ray.data._internal.logging import get_log_path
from ray.data._internal.logging import get_log_directory
from ray.data.context import DataContext
from ray.exceptions import UserCodeException
from ray.util import log_once
Expand Down Expand Up @@ -64,7 +64,7 @@ def handle_trace(*args, **kwargs):
"Exception occurred in user code, with the abbreviated stack "
"trace below. By default, the Ray Data internal stack trace "
"is omitted from stdout, and only written to the Ray Data log "
f"file at {get_log_path()}. To "
f"files at {get_log_directory()}. To "
"output the full stack trace to stdout, set "
"`DataContext.log_internal_stack_trace_to_stdout` to True."
)
Expand Down
71 changes: 56 additions & 15 deletions python/ray/data/tests/test_logging.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,64 @@
import logging
import os
import re
from datetime import datetime

import pytest
import yaml

import ray
from ray.data._internal.logging import (
configure_logging,
get_log_path,
is_logging_configured,
reset_logging,
)
from ray.data._internal.logging import configure_logging, get_log_directory
from ray.tests.conftest import * # noqa


@pytest.fixture
def setup_logging():
if not is_logging_configured():
configure_logging()
@pytest.fixture(name="configure_logging")
def configure_logging_fixture():
from ray.data._internal.logging import configure_logging

configure_logging()
yield


@pytest.fixture(name="reset_logging")
def reset_logging_fixture():
from ray.data._internal.logging import reset_logging

yield
reset_logging()


def test_messages_logged_to_file(setup_logging, shutdown_only):
def test_messages_logged_to_file(configure_logging, reset_logging, shutdown_only):
ray.init()
logger = logging.getLogger("ray.data.spam")

logger.debug("ham")

log_path = get_log_path()
log_path = os.path.join(get_log_directory(), "ray-data.log")
with open(log_path) as file:
log_contents = file.read()
assert "ham" in log_contents


def test_messages_printed_to_console(propagate_logs, capsys, setup_logging):
def test_messages_printed_to_console(
capsys,
configure_logging,
reset_logging,
propagate_logs,
):
logger = logging.getLogger("ray.data.spam")

logger.info("ham")

assert "ham" in capsys.readouterr().err


def test_message_format(setup_logging, shutdown_only):
def test_message_format(configure_logging, reset_logging, shutdown_only):
ray.init()
logger = logging.getLogger("ray.data.spam")

logger.info("ham")

log_path = get_log_path()
log_path = os.path.join(get_log_directory(), "ray-data.log")
with open(log_path, "r") as f:
log_contents = f.read()
(
Expand All @@ -70,6 +80,37 @@ def test_message_format(setup_logging, shutdown_only):
assert logged_msg == "ham"


def test_custom_config(reset_logging, monkeypatch, tmp_path):
config_path = tmp_path / "logging.yaml"
monkeypatch.setenv("RAY_DATA_LOGGING_CONFIG", config_path)

handlers = {
"console": {"class": "logging.StreamHandler", "stream": "ext://sys.stdout"}
}
loggers = {
"ray.data": {
"level": "CRITICAL",
"handlers": ["console"],
},
}
config = {
"version": 1,
"handlers": handlers,
"loggers": loggers,
"disable_existing_loggers": False,
}
with open(config_path, "w") as file:
yaml.dump(config, file)

configure_logging()

logger = logging.getLogger("ray.data")

assert logger.getEffectiveLevel() == logging.CRITICAL
assert len(logger.handlers) == 1
assert isinstance(logger.handlers[0], logging.StreamHandler)


if __name__ == "__main__":
import sys

Expand Down
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,7 +789,7 @@ def has_ext_modules(self):
]
},
package_data={
"ray": ["includes/*.pxd", "*.pxd"],
"ray": ["includes/*.pxd", "*.pxd", "data/_internal/logging.yaml"],
},
include_package_data=True,
exclude_package_data={
Expand Down

0 comments on commit 173d658

Please sign in to comment.