Skip to content

Commit

Permalink
fix versioned buckets: list, rename, delete (#134)
Browse files Browse the repository at this point in the history
Co-authored-by: Thomas Zanivan <[email protected]>
  • Loading branch information
Zhell1 and Thomas Zanivan authored Nov 29, 2021
1 parent ea8fd5e commit 7b5481f
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 4 deletions.
35 changes: 34 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,22 @@

## Development environment

The following requires poetry 1.2.0a2 or newer.

Create Python env

```shell
make env
source ./.venv/bin/activate
```

In case you have no `source ./.venv/bin/activate`, you can use:

```shell
conda create --name s3contents
conda activate s3contents
```

## Iteration

Start minio (using docker) in one terminal:
Expand All @@ -32,12 +41,36 @@ c.S3ContentsManager.bucket = "notebooks"

c.ServerApp.open_browser = False
c.ServerApp.tornado_settings = {"debug": True}

# only log s3contents but not boto
import logging
log = logging.getLogger()
log.setLevel(logging.ERROR)
c.log_level = "DEBUG"
c.Application.log_level = "DEBUG"
```

Start Jupyter Notebook in another terminal:

```shell
jupyter lab
jupyter lab --config ~/.jupyter/jupyter_notebook_config.py
```

## Applying changes

`make env` should make poetry install the package in edit mode.

In case you have issues, you can use:

```
conda activate s3contents && make env && jupyter lab --config ~/.jupyter/jupyter_notebook_config.py
```

or this faster one that will copy changes to the installation path:

```bash
rsync -r --exclude '.git' ./s3contents/ $(echo "$(pip show s3contents | grep Location: | cut -d' ' -f2)/s3contents/") && jupyter lab --config ~/.jupyter/jupyter_notebook_config.py

```

## Tests
Expand Down
25 changes: 24 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,30 @@ reset: clean ## Reset Python

minio: ## Run minio server
mkdir -p ${S3DIR}/notebooks
docker run -p 9000:9000 -p 9001:9001 -v ${S3DIR}:/data -e MINIO_ROOT_USER=access-key -e MINIO_ROOT_PASSWORD=secret-key minio/minio:RELEASE.2021-08-05T22-01-19Z server /data --console-address ":9001"
docker run -p 9000:9000 -p 9001:9001 -v ${S3DIR}:/data \
-e MINIO_ROOT_USER=access-key -e MINIO_ROOT_PASSWORD=secret-key \
minio/minio:RELEASE.2021-11-09T03-21-45Z server /data --console-address ":9001"

# from https://docs.min.io/minio/baremetal/installation/deploy-minio-distributed.html?ref=con#deploy-distributed-minio
minio-distributed: ## Run minio server in distributed mode (necessary for versioning)
echo "Once running, manually create a versioned 'notebooks' bucket in Minio-console"
mkdir -p "${S3DIR}/mnt/disk1/notebooks"
mkdir -p "${S3DIR}/mnt/disk2/notebooks"
mkdir -p "${S3DIR}/mnt/disk3/notebooks"
mkdir -p "${S3DIR}/mnt/disk4/notebooks"
docker run \
-p 9000:9000 -p 9001:9001 \
-v "${s3DIR}/mnt/disk1:/data1" \
-v "${s3DIR}/mnt/disk2:/data2" \
-v "${s3DIR}/mnt/disk3:/data3" \
-v "${s3DIR}/mnt/disk4:/data4" \
-e MINIO_ROOT_USER=access-key -e MINIO_ROOT_PASSWORD=secret-key \
minio/minio:RELEASE.2021-11-09T03-21-45Z server \
"/data1" \
"/data2" \
"/data3" \
"/data4" \
--console-address ":9001"


help: ## Show this help menu
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tool.poetry]
name = "s3content"
name = "s3contents"
version = "0.8.1"
packages = [{ include = "s3contents" }]
homepage = "https://github.com/danielfrg/s3contents"
Expand Down
38 changes: 38 additions & 0 deletions s3contents/genericmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ def guess_type(self, path, allow_directory=True):
----------
obj: s3.Object or string
"""
self.log.debug(
f"guess_type with path={path} and allow_directory={allow_directory}"
)
if path.endswith(".ipynb"):
return "notebook"
elif allow_directory and self.dir_exists(path):
Expand Down Expand Up @@ -202,6 +205,12 @@ def s3_detail_to_model(s3_detail):
model = base_directory_model(path)
if self.fs.isdir(path):
lstat = self.fs.lstat(path)

self.log.debug(
f"s3_detail_to_model={s3_detail_to_model}"
f"dir_s3_detail: path='{path}', lstat={lstat}"
)

if "ST_MTIME" in lstat and lstat["ST_MTIME"]:
model["created"] = model["last_modified"] = lstat["ST_MTIME"]
if content:
Expand All @@ -212,13 +221,34 @@ def s3_detail_to_model(s3_detail):
files_s3_detail = sync(
self.fs.fs.loop, self.fs.fs._lsdir, prefixed_path
)
# filter out .s3keep files
filtered_files_s3_detail = list(
filter(
lambda detail: os.path.basename(detail["Key"])
!= self.fs.dir_keep_file,
files_s3_detail,
)
)

# filter out delete_markers in versioned buckets
def is_delete_marker(detail):
lstat = self.fs.lstat(detail["Key"])
return bool("ST_MTIME" in lstat and lstat["ST_MTIME"])

filtered_files_s3_detail = list(
filter(
lambda detail: is_delete_marker(detail),
filtered_files_s3_detail,
)
)

for file_s3_detail in filtered_files_s3_detail:
self.log.debug(
f"\n file_s3_detail: {file_s3_detail}"
f"lstat={self.fs.lstat(file_s3_detail['Key'])}"
f"is_delete_marker = {is_delete_marker(file_s3_detail)}"
)

model["content"] = list(
map(s3_detail_to_model, filtered_files_s3_detail)
)
Expand All @@ -228,6 +258,9 @@ def _notebook_model_from_path(self, path, content=False, format=None):
"""
Build a notebook model from database record.
"""
self.log.debug(
f"_notebook_model_from_path with path={path}, content={content}, format={format}"
)
model = base_model(path)
model["type"] = "notebook"
if self.fs.isfile(path):
Expand All @@ -251,6 +284,9 @@ def _file_model_from_path(self, path, content=False, format=None):
"""
Build a file model from database record.
"""
self.log.debug(
f"_file_model_from_path with path={path}, content={content}, format={format}"
)
model = base_model(path)
model["type"] = "file"
if self.fs.isfile(path):
Expand All @@ -275,6 +311,8 @@ def _file_model_from_path(self, path, content=False, format=None):
def save(self, model, path):
"""Save a file or directory model to path."""

self.log.debug(f"save with path={path}, model={model}")

# Chunked uploads
# See https://jupyter-notebook.readthedocs.io/en/stable/extending/contents.html#chunked-saving
chunk = model.get("chunk", None)
Expand Down
2 changes: 1 addition & 1 deletion s3contents/s3_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def mv(self, old_path, new_path):
def cp(self, old_path, new_path):
old_path_, new_path_ = self.path(old_path), self.path(new_path)
self.log.debug(
"S3contents.S3FS: Coping `%s` to `%s`", old_path_, new_path_
"S3contents.S3FS: Copying `%s` to `%s`", old_path_, new_path_
)

if self.isdir(old_path):
Expand Down

0 comments on commit 7b5481f

Please sign in to comment.