diff --git a/.ci/run b/.ci/run new file mode 100755 index 0000000..a0a314a --- /dev/null +++ b/.ci/run @@ -0,0 +1,24 @@ +#!/bin/bash -eu + +cd "$(dirname "$0")" +cd .. + +if ! command -v sudo; then + # CI or Docker sometimes don't have it, so useful to have a dummy + function sudo { + "$@" + } +fi + +if ! [ -z "$CI" ]; then + # install OS specific stuff here + if [[ "$OSTYPE" == "darwin"* ]]; then + # macos + : + else + : + fi +fi + +pip3 install --user tox +tox diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..8487dd0 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,48 @@ +# see https://github.com/karlicoss/pymplate for up-to-date reference + +name: CI +on: + push: + branches: '*' + tags: 'v[0-9]+.*' # only trigger on 'release' tags for PyPi + # Ideally I would put this in the pypi job... but github syntax doesn't allow for regexes there :shrug: + # P.S. fuck made up yaml DSLs. + # TODO cron? + workflow_dispatch: # needed to trigger workflows manually + +env: + # useful for scripts & sometimes tests to know + CI: true + +jobs: + build: + strategy: + matrix: + platform: [ubuntu-latest] + python-version: [3.6, 3.7, 3.8] + + runs-on: ${{ matrix.platform }} + + steps: + # ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation + - run: echo "$HOME/.local/bin" >> $GITHUB_PATH + + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions/checkout@v2 + with: + submodules: recursive + + # uncomment for SSH debugging + # - uses: mxschmitt/action-tmate@v2 + + - run: .ci/run + + - uses: actions/upload-artifact@v2 + with: + name: .coverage.mypy + path: .coverage.mypy/ + # restrict to a single python version, otherwise uploading fails + if: ${{ matrix.python-version == '3.8' }} diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..7b361ee --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "src/stexport/exporthelpers"] + path = src/stexport/exporthelpers + url = https://github.com/karlicoss/exporthelpers.git diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b7112f4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Dmitrii Gerasimov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.org b/README.org new file mode 100644 index 0000000..c0acd6d --- /dev/null +++ b/README.org @@ -0,0 +1,78 @@ +#+begin_src python :dir src :results drawer :exports results +import stexport.export as E; return E.make_parser().prog +#+end_src + +#+RESULTS: +:results: +Export your personal Stackexchange data +:end: + + +* Setting up +1. The easiest way is =pip3 install --user git+https://github.com/karlicoss/stexport=. + + Alternatively, use =git clone --recursive=, or =git pull && git submodules update --init=. After that, you can use =pip3 install --editable=. +2. See [[https://meta.stackexchange.com/questions/261829/where-i-can-get-my-access-token-key-for-the-api][this]] for info on getting application =key= and =access_token= +# TODO hmm, do we need user access token at all? not sure +# key is probably needed to have more queries +# TODO I have some notes on getting the token in my private secrets.py file + +* Exporting + +#+begin_src python :dir src :results drawer :exports results +import stexport.export as E; return E.make_parser().epilog +#+end_src + +#+RESULTS: +:results: + +Usage: + +*Recommended*: create =secrets.py= keeping your api parameters, e.g.: + + +: key = "KEY" +: access_token = "ACCESS_TOKEN" +: user_id = "USER_ID" + + +After that, use: + +: python3 -m stexport.export --secrets /path/to/secrets.py + +That way you type less and have control over where you keep your plaintext secrets. + +*Alternatively*, you can pass parameters directly, e.g. + +: python3 -m stexport.export --key --access_token --user_id + +However, this is verbose and prone to leaking your keys/tokens/passwords in shell history. + + + +I *highly* recommend checking exported files at least once just to make sure they contain everything you expect from your export. If not, please feel free to ask or raise an issue! + +:end: + + +* Using data + +#+begin_src python :dir src :results drawer :exports results +import stexport.exporthelpers.dal_helper as D; return D.make_parser().epilog +#+end_src + +#+RESULTS: +:results: + +You can use =stexport.dal= (stands for "Data Access/Abstraction Layer") to access your exported data, even offline. +I elaborate on motivation behind it [[https://beepb00p.xyz/exports.html#dal][here]]. + +- main usecase is to be imported as python module to allow for *programmatic access* to your data. + + You can find some inspiration in [[https://beepb00p.xyz/mypkg.html][=my.=]] package that I'm using as an API to all my personal data. + +- to test it against your export, simply run: ~python3 -m stexport.dal --source /path/to/export~ + +- you can also try it interactively: ~python3 -m stexport.dal --source /path/to/export --interactive~ + +:end: diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..0f8386f --- /dev/null +++ b/mypy.ini @@ -0,0 +1,10 @@ +[mypy] +pretty = True +show_error_context = True +show_error_codes = True +check_untyped_defs = True +namespace_packages = True + +# an example of suppressing +# [mypy-my.config.repos.pdfannots.pdfannots] +# ignore_errors = True diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..c6834ed --- /dev/null +++ b/pytest.ini @@ -0,0 +1,8 @@ +[pytest] +# discover files that don't follow test_ naming. Useful to keep tests along with the source code +python_files = *.py +addopts = + --verbose + + # otherwise it won't discover doctests + --doctest-modules diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..dbf2472 --- /dev/null +++ b/setup.py @@ -0,0 +1,60 @@ +# see https://github.com/karlicoss/pymplate for up-to-date reference + + +from setuptools import setup, find_namespace_packages # type: ignore + + +def main(): + # works with both ordinary and namespace packages + pkgs = find_namespace_packages('src') + pkg = min(pkgs) # lexicographically smallest is the correct one usually? + setup( + name=pkg, + use_scm_version={ + 'version_scheme': 'python-simplified-semver', + 'local_scheme': 'dirty-tag', + }, + setup_requires=['setuptools_scm'], + + zip_safe=False, + + packages=pkgs, + package_dir={'': 'src'}, + # necessary so that package works with mypy + package_data={pkg: ['py.typed']}, + + ## ^^^ this should be mostly automatic and not requiring any changes + + install_requires=[ + 'stackapi', + 'backoff', + # vvv example of git repo dependency + # 'repo @ git+https://github.com/karlicoss/repo.git', + + # vvv example of local file dependency. yes, DUMMY is necessary for some reason + # 'repo @ git+file://DUMMY/path/to/repo', + ], + extras_require={ + 'testing': ['pytest'], + 'linting': ['pytest', 'mypy', 'lxml'], # lxml for mypy coverage report + }, + + + # this needs to be set if you're planning to upload to pypi + # url='', + # author='', + # author_email='', + # description='', + + # Rest of the stuff -- classifiers, license, etc, I don't think it matters for pypi + # it's just unnecessary duplication + ) + + +if __name__ == '__main__': + main() + +# TODO +# from setuptools_scm import get_version +# https://github.com/pypa/setuptools_scm#default-versioning-scheme +# get_version(version_scheme='python-simplified-semver', local_scheme='no-local-version') diff --git a/src/stexport/__init__.py b/src/stexport/__init__.py new file mode 100644 index 0000000..dba6ae1 --- /dev/null +++ b/src/stexport/__init__.py @@ -0,0 +1,7 @@ +# NOTE: without __init__.py/__init__.pyi, mypy behaves weird. +# see https://github.com/python/mypy/issues/8584 and the related discussions +# sometime it's kinda valuable to have namespace package and not have __init__.py though, + +# TLDR: you're better off having dimmy pyi, or alternatively you can use 'mypy -p src' (but that's a bit dirty?) + +# todo not sure how it behaves when installed? diff --git a/model.py b/src/stexport/dal.py similarity index 50% rename from model.py rename to src/stexport/dal.py index 1428a79..7f5f864 100755 --- a/model.py +++ b/src/stexport/dal.py @@ -1,22 +1,21 @@ #!/usr/bin/env python3 from functools import lru_cache from pathlib import Path -from typing import NamedTuple, Sequence, Any +from typing import NamedTuple, Sequence, Any, Iterable from glob import glob -from datetime import datetime +from datetime import datetime, timezone import json import logging -from kython import setup_logzero +from .exporthelpers.logging_helper import LazyLogger +from .exporthelpers.dal_helper import Json -import pytz -def get_logger(): - return logging.getLogger('stexport') +logger = LazyLogger('stexport') class Question(NamedTuple): - j: Any + j: Json # TODO wonder if could use something like attrs to reduce boilerplate # TODO: e.g. generate baseed on namedtuple schema? @@ -35,58 +34,38 @@ def tags(self) -> Sequence[str]: @property def creation_date(self) -> datetime: # all utc https://api.stackexchange.com/docs/dates - return datetime.fromtimestamp(self.j['creation_date'], tz=pytz.utc) + return datetime.fromtimestamp(self.j['creation_date'], tz=timezone.utc) @property def link(self) -> str: return self.j['link'] -class SiteModel: - def __init__(self, j): - self.j = j +class SiteDAL(NamedTuple): + j: Json @property - def questions(self): + def questions(self) -> Iterable[Question]: return list(sorted(map(Question, self.j['users/{ids}/questions']), key=lambda q: q.creation_date)) -class Model: - def __init__(self, sources: Sequence[Path]): - # TODO allow passing multiple later to construct the whole model from chunks - [src] = sources - self.src = src +class DAL: + def __init__(self, sources: Sequence[Path]) -> None: + # TODO later, reconstruct from chunks? + self.src = max(sorted(sources)) self.data = json.loads(self.src.read_text()) - def sites(self): + def sites(self) -> Sequence[str]: return list(sorted(self.data.keys())) - def site_model(self, site: str): - return SiteModel(self.data[site]) + def site_dal(self, site: str) -> SiteDAL: + return SiteDAL(self.data[site]) -def main(): - logger = get_logger() - setup_logzero(logger, level=logging.DEBUG) - import argparse - p = argparse.ArgumentParser() - p.add_argument('--source', type=str, required=True) - p.add_argument('--no-glob', action='store_true') - args = p.parse_args() - - if '*' in args.source and not args.no_glob: - sources = glob(args.source) - else: - sources = [args.source] - - src = Path(max(sources)) - - logger.debug('using %s', src) - model = Model([src]) - - for site in model.sites(): - sm = model.site_model(site) - qs = sm.questions +def demo(dal: DAL) -> None: + for site in dal.sites(): + sm = dal.site_dal(site) + qs = list(sm.questions) if len(qs) == 0: continue print(f"At {qs}:") @@ -94,6 +73,10 @@ def main(): print(q) +def main() -> None: + from .exporthelpers import dal_helper + dal_helper.main(DAL=DAL, demo=demo) + if __name__ == '__main__': main() diff --git a/run.py b/src/stexport/export.py similarity index 83% rename from run.py rename to src/stexport/export.py index 01aef34..0dcb7b5 100755 --- a/run.py +++ b/src/stexport/export.py @@ -72,9 +72,11 @@ import backoff # type: ignore from stackapi import StackAPI, StackAPIError # type: ignore -def get_logger(): - return logging.getLogger('stexport') +from .exporthelpers.export_helper import Json +from .exporthelpers.logging_helper import LazyLogger + +logger = LazyLogger('stexport') # few wrappers to make less api calls ti 'sites' endpoint.. from functools import lru_cache @@ -98,7 +100,7 @@ def _get_api(**kwargs): StackAPIError, # ugh, not sure why is it happening.. giveup=lambda e: "Remote end closed connection without response" not in e.message, - logger=get_logger(), + logger=logger, ) def fetch_backoff(api, *args, **kwargs): return api.fetch(*args, **kwargs) @@ -108,7 +110,6 @@ class Exporter: def __init__(self, **kwargs) -> None: self.api = _get_api(**kwargs) self.user_id = kwargs['user_id'] - self.logger = get_logger() @lru_cache() def get_all_sites(self) -> Dict[str, str]: @@ -123,12 +124,12 @@ def get_site_api(self, site: str): api._api_key = site return api - def export_site(self, site: str): - self.logger.info('exporting %s: started...', site) + def export_site(self, site: str) -> Json: + logger.info('exporting %s: started...', site) api = self.get_site_api(site) data = {} for ep in ENDPOINTS: - self.logger.info('exporting %s: %s', site, ep) + logger.info('exporting %s: %s', site, ep) # TODO ugh. still not sure about using weird patterns as dictionary keys... data[ep] = fetch_backoff( api, @@ -138,14 +139,14 @@ def export_site(self, site: str): return data - def export_json(self, sites: Optional[List[str]]): + def export_json(self, sites: Optional[List[str]]) -> Json: """ sites: None means all of them """ if sites is None: sites = list(sorted(self.get_all_sites().keys())) # sort for determinism - self.logger.info('exporting %s', sites) + logger.info('exporting %s', sites) all_data = {} for site in sites: @@ -153,17 +154,22 @@ def export_json(self, sites: Optional[List[str]]): return all_data -def main(): - logging.basicConfig(level=logging.DEBUG) - - from export_helper import setup_parser - p = argparse.ArgumentParser('Export your personal Stackexchange data') - setup_parser(parser=p, params=['key', 'access_token', 'user_id']) - g = p.add_mutually_exclusive_group(required=True) +def make_parser() -> argparse.ArgumentParser: + from .exporthelpers.export_helper import setup_parser, Parser + parser = Parser('Export your personal Stackexchange data') + setup_parser( + parser=parser, + params=['key', 'access_token', 'user_id'], + ) + g = parser.add_mutually_exclusive_group(required=True) g.add_argument('--all-sites', action='store_true') g.add_argument('--site', action='append') - args = p.parse_args() + return parser + +def main() -> None: + parser = make_parser() + args = parser.parse_args() params = args.params dumper = args.dumper @@ -177,5 +183,6 @@ def main(): js = json.dumps(j, ensure_ascii=False, indent=1) dumper(js) + if __name__ == '__main__': main() diff --git a/src/stexport/exporthelpers b/src/stexport/exporthelpers new file mode 160000 index 0000000..9b9133a --- /dev/null +++ b/src/stexport/exporthelpers @@ -0,0 +1 @@ +Subproject commit 9b9133ac407fa4ecc516e419120780f34d2cd5e6 diff --git a/src/stexport/py.typed b/src/stexport/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..f624166 --- /dev/null +++ b/tox.ini @@ -0,0 +1,23 @@ +[tox] +minversion = 3.5 +# relies on the correct version of Python installed +envlist = py3,mypy + +[testenv] +passenv = + CI CI_* +commands = + pip install -e .[testing] + # posargs allow test filtering, e.g. tox ... -- -k test_name + # -rap to print tests summary even when they are successful + python -m pytest -rap src {posargs} + +[testenv:mypy] +skip_install = true +commands = + pip install -e .[linting] + python -m mypy src \ + # txt report is a bit more convenient to view on CI + --txt-report .coverage.mypy \ + --html-report .coverage.mypy \ + {posargs}