Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cli exporter #59

Merged
merged 11 commits into from
Jun 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/e2e/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ runs:

- name: Run tests
working-directory: ${{ inputs.working_directory }}
run: pytest -n auto -v
run: pytest -v
shell: bash
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## (UNRELEASED) neptune-tensorboard 1.0.0.pre3

### Changes
- Add support to export existing Tensorboard logs via CLI plugin ([#59](https://github.com/neptune-ai/neptune-tensorboard/pull/59/))

## neptune-tensorboard 1.0.0.pre2

### Changes
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ importlib-metadata = { version = "*", python = "<3.8" }
tensorflow = { version = ">=2.0.0", optional = true }
torch = { version = ">=1.9.0", optional = true }
tensorboardX = { version = ">=2.2.0", optional = true }
tbparse = { version = "*", optional = true }

# dev
pre-commit = { version = "*", optional = true }
Expand Down Expand Up @@ -99,3 +100,6 @@ force_grid_wrap = 2
[tool.flake8]
max-line-length = 120
extend-ignore = "E203"

[tool.poetry.plugins."neptune.plugins"]
"tensorboard" = "neptune_tensorboard_plugin:sync"
3 changes: 3 additions & 0 deletions src/neptune_tensorboard/sync/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
__all__ = ["DataSync"]

from neptune_tensorboard.sync.sync_impl import DataSync
98 changes: 98 additions & 0 deletions src/neptune_tensorboard/sync/sync_impl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import hashlib
import pathlib
import traceback

import click
import neptune
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

try:
import tbparse
except ModuleNotFoundError:
raise ModuleNotFoundError("neptune-tensorboard: require `tbparse` for exporting logs (pip install tbparse)")


def compute_md5_hash(path):
return hashlib.md5(path.encode()).hexdigest()


class DataSync:
def __init__(self, project, api_token, path):
self._project = project
self._api_token = api_token
self._path = path

def run(self):
# NOTE: Fetching custom_run_ids is not a trivial operation, so
# we cache the custom_run_ids here.
self._existing_custom_run_ids = self._get_existing_neptune_custom_run_ids()
# Inspect if files correspond to EventFiles.
for path in pathlib.Path(self._path).glob("**/*tfevents*"):
try:
# methods below expect path to be str.
str_path = str(path)

# only try export for valid files i.e. files which EventAccumulator
# can actually read.
if self._is_valid_tf_event_file(str_path):
self._export_to_neptune_run(str_path)
except Exception as e:
click.echo("Cannot load run from file '{}'. ".format(path) + "Error: " + str(e))
try:
traceback.print_exc(e)
except: # noqa: E722
pass

def _is_valid_tf_event_file(self, path):
accumulator = EventAccumulator(path)
accumulator.Reload()
try:
accumulator.FirstEventTimestamp()
except ValueError:
return False
return True

def _get_existing_neptune_custom_run_ids(self):
with neptune.init_project(project=self._project, api_token=self._api_token) as project:
try:
return {run_id for run_id in project.fetch_runs_table().to_pandas()["sys/custom_run_id"].to_list()}
except KeyError:
# empty project
return set()

def _experiment_exists(self, hash_run_id, run_path):
return hash_run_id in self._existing_custom_run_ids

def _export_to_neptune_run(self, path):
# custom_run_id supports str with max length of 32.
hash_run_id = compute_md5_hash(path)

if self._experiment_exists(hash_run_id, self._project):
click.echo(f"{path} was already synced")
return

with neptune.init_run(custom_run_id=hash_run_id, project=self._project, api_token=self._api_token) as run:
run["tensorboard_path"] = path

namespace_handler = run["tensorboard"]

# parse events file
reader = tbparse.SummaryReader(path)

# Read scalars
for scalar in reader.scalars.itertuples():
namespace_handler["scalar"][scalar.tag].append(scalar.value)

# Read images (and figures)
for image in reader.images.itertuples():
namespace_handler["image"][image.tag].append(neptune.types.File.as_image(image.value))

# Read text
for text in reader.text.itertuples():
namespace_handler["text"][text.tag].append(text.value)

# Read hparams
for hparam in reader.hparams.itertuples():
namespace_handler["hparams"][hparam.tag].append(hparam.value)

click.echo(f"{path} was exported with run_id: {hash_run_id}")
18 changes: 18 additions & 0 deletions src/neptune_tensorboard_plugin/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os

import click


@click.command("tensorboard")
@click.option("--project", help="Neptune Project name")
@click.option("--api_token", help="Neptune API token")
@click.argument("log_dir", required=True)
def sync(project, api_token, log_dir):
if not os.path.exists(log_dir):
click.echo("ERROR: Provided `log_dir` path doesn't exist", err=True)
return

# We do not want to import anything if process was executed for autocompletion purposes.
from neptune_tensorboard.sync import DataSync

DataSync(project, api_token, log_dir).run()
49 changes: 49 additions & 0 deletions tests/test_exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import hashlib
import os
import shutil
import uuid
import time

import neptune
import torch
from tensorboardX.writer import SummaryWriter

from neptune_tensorboard.sync.sync_impl import DataSync


def test_exporter():
log_dir = str(uuid.uuid4())
writer = SummaryWriter(log_dir=log_dir)

writer.add_scalar("tensorboardX_scalar", 0.5)
writer.add_image("zero", torch.zeros(12, 12, 3), dataformats="HWC")
writer.add_images("zeros", torch.zeros(4, 12, 12, 3), dataformats="NHWC")
writer.add_text("my_text", "Hello World")
writer.add_text("my_text", "Hello World 2")

writer.flush()
writer.close()

DataSync(project=None, api_token=None, path=log_dir).run()

# let the data sync (we can't call run.sync() as we don't have
# access to run)
time.sleep(10)

for fname in os.listdir(log_dir):
path = os.path.join(log_dir, fname)
hash_run_id = hashlib.md5(path.encode()).hexdigest()
break

with neptune.init_project() as project:
runs_df = project.fetch_runs_table().to_pandas()
custom_run_id_map = dict(zip(runs_df["sys/custom_run_id"], runs_df["sys/id"]))
run_id = custom_run_id_map[hash_run_id]

with neptune.init_run(with_id=run_id) as run:
assert run.exists("tensorboard_path")
assert run.exists("tensorboard/image")
assert run.exists("tensorboard/scalar")
assert run.exists("tensorboard/text")

shutil.rmtree(log_dir)