foerstner-lab · joelb123 · Dec 8, 2020 · Dec 8, 2020 · Dec 8, 2020 · Dec 9, 2020
diff --git a/.gitignore b/.gitignore
@@ -62,3 +62,9 @@ target/
 
 # pyenv python configuration file
 .python-version
+
+# virtual environment
+venv/
+
+# test data
+tests/data/
diff --git a/.travis.yml b/.travis.yml
@@ -1,26 +1,15 @@
-# Config file for automatic testing at travis-ci.org
-# This file will be regenerated if you run travis_pypi_setup.py
-
+dist: focal
+os: linux
 language: python
+cache:
+    directories:
+        - $HOME/.cache/pip
+        - $HOME/tests/data
 python:
-  - 3.6
-
-# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
+    - "3.8"
 install:
-  - "pip install -r requirements_dev.txt"
-
-# command to run tests
-script: cd tests && pytest && cd ..
+    - pip install -r requirements.txt
+    - pip install -v pytest pytest-cov pytest-datadir-mgr
+script:
+    - pytest --cov=gffpandas -s
 
-# After you create the Github repo and add it to Travis, run the
-# travis_pypi_setup.py script to finish PyPI deployment setup
-# deploy:
-#   provider: pypi
-#   distributions: sdist bdist_wheel
-#   user: konrad
-#   password:
-#     secure: PLEASE_REPLACE_ME
-#   on:
-#     tags: true
-#     repo: konrad/pandasgff
-#     python: 2.7
diff --git a/fixtures b/fixtures
diff --git a/gffpandas/gffpandas.py b/gffpandas/gffpandas.py
@@ -6,14 +6,22 @@ def read_gff3(input_file):
     return Gff3DataFrame(input_file)
 
 
+def _split_atts(atts):
+    """Split a feature string into attributes."""
+    splits_list = [a.split("=") for a in atts.split(";") if "=" in a]
+    return {item[0]: "=".join(item[1:]) for item in splits_list}
+
+
 class Gff3DataFrame(object):
     """This class contains header information in the header attribute and
     a actual annotation data in the pandas dataframe in the df
     attribute.
 
     """
 
-    def __init__(self, input_gff_file=None, input_df=None, input_header=None) -> None:
+    def __init__(
+        self, input_gff_file=None, input_df=None, input_header=None
+    ) -> None:
         """Create an instance."""
         if input_gff_file is not None:
             self._gff_file = input_gff_file
@@ -146,7 +154,9 @@ def filter_feature_of_type(self, feature_type_list) -> "Gff3DataFrame":
         feature_df = self.df.loc[self.df.type.isin(feature_type_list)]
         return Gff3DataFrame(input_df=feature_df, input_header=self.header)
 
-    def filter_by_length(self, min_length=None, max_length=None) -> "Gff3DataFrame":
+    def filter_by_length(
+        self, min_length=None, max_length=None
+    ) -> "Gff3DataFrame":
         """Filtering the pandas dataframe by the gene_length.
 
         For this method the desired minimal and maximal bp length
@@ -165,7 +175,9 @@ def filter_by_length(self, min_length=None, max_length=None) -> "Gff3DataFrame":
         filtered_by_length = self.df[
             (gene_length >= min_length) & (gene_length <= max_length)
         ]
-        return Gff3DataFrame(input_df=filtered_by_length, input_header=self.header)
+        return Gff3DataFrame(
+            input_df=filtered_by_length, input_header=self.header
+        )
 
     def attributes_to_columns(self) -> pd.DataFrame:
         """Saving each attribute-tag to a single column.
@@ -181,14 +193,7 @@ def attributes_to_columns(self) -> pd.DataFrame:
         """
         attribute_df = self.df.copy()
         df_attributes = attribute_df.loc[:, "seq_id":"attributes"]
-        attribute_df["at_dic"] = attribute_df.attributes.apply(
-            lambda attributes: dict(
-                [
-                    key_value_pair.split(sep="=", maxsplit=1)
-                    for key_value_pair in attributes.split(";")
-                ]
-            )
-        )
+        attribute_df["at_dic"] = attribute_df.attributes.apply(_split_atts)
         attribute_df["at_dic_keys"] = attribute_df["at_dic"].apply(
             lambda at_dic: list(at_dic.keys())
         )
@@ -202,7 +207,9 @@ def attributes_to_columns(self) -> pd.DataFrame:
             )
         return df_attributes
 
-    def get_feature_by_attribute(self, attr_tag, attr_value_list) -> "Gff3DataFrame":
+    def get_feature_by_attribute(
+        self, attr_tag, attr_value_list
+    ) -> "Gff3DataFrame":
         """Filtering the pandas dataframe by a attribute.
 
         The 9th column of a gff3-file contains the list of feature
@@ -226,8 +233,12 @@ def get_feature_by_attribute(self, attr_tag, attr_value_list) -> "Gff3DataFrame"
         """
         df_copy = self.df.copy()
         attribute_df = Gff3DataFrame.attributes_to_columns(self)
-        filtered_by_attr_df = df_copy.loc[attribute_df[attr_tag].isin(attr_value_list)]
-        return Gff3DataFrame(input_df=filtered_by_attr_df, input_header=self.header)
+        filtered_by_attr_df = df_copy.loc[
+            attribute_df[attr_tag].isin(attr_value_list)
+        ]
+        return Gff3DataFrame(
+            input_df=filtered_by_attr_df, input_header=self.header
+        )
 
     def stats_dic(self) -> dict:
         """Gives the following statistics for the data:
@@ -320,7 +331,9 @@ def overlaps_with(
             overlap_df = overlap_df[~condition]
         return Gff3DataFrame(input_df=overlap_df, input_header=self.header)
 
-    def find_duplicated_entries(self, seq_id=None, type=None) -> "Gff3DataFrame":
+    def find_duplicated_entries(
+        self, seq_id=None, type=None
+    ) -> "Gff3DataFrame":
         """Find entries which are redundant.
 
         For this method the chromosom accession number (seq_id) as well as the
@@ -338,5 +351,7 @@ def find_duplicated_entries(self, seq_id=None, type=None) -> "Gff3DataFrame":
         """
         input_df = self.df[self.df.seq_id == seq_id]
         df_feature = input_df[input_df.type == type]
-        duplicate = df_feature.loc[df_feature[["end", "start", "strand"]].duplicated()]
+        duplicate = df_feature.loc[
+            df_feature[["end", "start", "strand"]].duplicated()
+        ]
         return Gff3DataFrame(input_df=duplicate, input_header=self.header)
diff --git a/pytest.ini b/pytest.ini
diff --git a/requirements_dev.txt b/requirements_dev.txt
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -1,3 +1,22 @@
 # -*- coding: utf-8 -*-
-
 """Unit test package for gffpandas."""
+
+# standard library imports
+import functools
+
+
+def print_docstring():
+    """Decorator to print a docstring."""
+
+    def decorator(func):
+        """Define decorator"""
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            """Print docstring and call function"""
+            print(func.__doc__)
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
diff --git a/tests/gffpandas b/tests/gffpandas
diff --git a/tests/pytest.ini b/tests/pytest.ini