From ff52377f94b39c46c91ed031c44294a73745c1ae Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 22 Nov 2021 13:29:07 +0200 Subject: [PATCH 01/38] Added Refactored Tap Boilerplate Classes --- tap_mambu/sync.py | 54 ++++++++++++------- .../TapGenerators/__init__.py | 0 .../TapGenerators/generator.py | 27 ++++++++++ .../TapProcessors/__init__.py | 0 .../TapProcessors/processor.py | 3 ++ tap_mambu/tap_mambu_refactor/__init__.py | 19 +++++++ 6 files changed, 83 insertions(+), 20 deletions(-) create mode 100644 tap_mambu/tap_mambu_refactor/TapGenerators/__init__.py create mode 100644 tap_mambu/tap_mambu_refactor/TapGenerators/generator.py create mode 100644 tap_mambu/tap_mambu_refactor/TapProcessors/__init__.py create mode 100644 tap_mambu/tap_mambu_refactor/TapProcessors/processor.py create mode 100644 tap_mambu/tap_mambu_refactor/__init__.py diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index 5f9f3d2..c740dff 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -4,6 +4,8 @@ import singer from singer import Transformer, metadata, metrics, utils from singer.utils import strftime, strptime_to_utc + +from tap_mambu.tap_mambu_refactor import sync_endpoint_refactor from tap_mambu.transform import transform_json, transform_activities LOGGER = singer.get_logger() @@ -894,26 +896,38 @@ def sync(client, config, catalog, state): if sub_type_param: endpoint_config['params']['type'] = sub_type - total_records = sync_endpoint( - client=client, - catalog=catalog, - state=state, - start_date=start_date, - stream_name=stream_name, - path=path, - endpoint_config=endpoint_config, - api_version=endpoint_config.get('api_version', 'v2'), - api_method=endpoint_config.get('api_method', 'GET'), - static_params=endpoint_config.get('params', {}), - sub_type=sub_type, - bookmark_query_field=endpoint_config.get('bookmark_query_field'), - bookmark_field=endpoint_config.get('bookmark_field'), - bookmark_type=endpoint_config.get('bookmark_type'), - data_key=endpoint_config.get('data_key', None), - body=endpoint_config.get('body', None), - id_fields=endpoint_config.get('id_fields'), - apikey_type=endpoint_config.get('apikey_type', None) - ) + if stream_name in ["loan_accounts"]: + total_records = sync_endpoint_refactor( + client=client, + catalog=catalog, + state=state, + start_date=start_date, + stream_name=stream_name, + path=path, + endpoint_config=endpoint_config, + sub_type=sub_type + ) + else: + total_records = sync_endpoint( + client=client, + catalog=catalog, + state=state, + start_date=start_date, + stream_name=stream_name, + path=path, + endpoint_config=endpoint_config, + api_version=endpoint_config.get('api_version', 'v2'), + api_method=endpoint_config.get('api_method', 'GET'), + static_params=endpoint_config.get('params', {}), + sub_type=sub_type, + bookmark_query_field=endpoint_config.get('bookmark_query_field'), + bookmark_field=endpoint_config.get('bookmark_field'), + bookmark_type=endpoint_config.get('bookmark_type'), + data_key=endpoint_config.get('data_key', None), + body=endpoint_config.get('body', None), + id_fields=endpoint_config.get('id_fields'), + apikey_type=endpoint_config.get('apikey_type', None) + ) update_currently_syncing(state, None) LOGGER.info('Synced: {}, total_records: {}'.format( diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/__init__.py b/tap_mambu/tap_mambu_refactor/TapGenerators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py new file mode 100644 index 0000000..492b1d6 --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -0,0 +1,27 @@ +import time +from typing import List + +import requests + + +class TapGenerator: + def __init__(self, client): + self.buffer: List = list() + self.start_date = 0 + + def __iter__(self): + self.buffer = self.fetch_batch() + return self + + def __next__(self): + if not self.buffer: + self.buffer = self.fetch_batch() + if not self.buffer: + raise StopIteration() + return self.buffer.pop(0) + + def fetch_batch(self): + if not self.buffer: + # fetch lm records + pass + return list() diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/__init__.py b/tap_mambu/tap_mambu_refactor/TapProcessors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py new file mode 100644 index 0000000..8605d4c --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -0,0 +1,3 @@ +class TapProcessor: + def __init__(self): + pass diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py new file mode 100644 index 0000000..1d8c770 --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -0,0 +1,19 @@ +from tap_mambu.tap_mambu_refactor.TapGenerators.generator import TapGenerator +from tap_mambu.tap_mambu_refactor.TapProcessors.processor import TapProcessor + +stream_generator_processor_dict = { + "loan_accounts": (TapGenerator, TapProcessor) +} + + +def sync_endpoint_refactor(client, catalog, state, start_date, stream_name, + path, endpoint_config, sub_type): + generator_class, processor_class = stream_generator_processor_dict[stream_name] + generator, processor = generator_class(client), processor_class() + for record in generator: + processor.process(record) + + # for child_stream_name, child_endpoint_config in endpoint_config.get("children"): + # child_generator, child_processor = stream_generator_processor_dict[child_stream_name] + # for child_record in child_generator: + # child_processor.process(child_record) From b738db8a99f7e15f48e5e37d3e2add5112e11834 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 22 Nov 2021 13:31:32 +0200 Subject: [PATCH 02/38] ECDDC-476 Testing jira integration --- tap_mambu/tap_mambu_refactor/__init__.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 1d8c770..949d98e 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -12,8 +12,3 @@ def sync_endpoint_refactor(client, catalog, state, start_date, stream_name, generator, processor = generator_class(client), processor_class() for record in generator: processor.process(record) - - # for child_stream_name, child_endpoint_config in endpoint_config.get("children"): - # child_generator, child_processor = stream_generator_processor_dict[child_stream_name] - # for child_record in child_generator: - # child_processor.process(child_record) From efb3ea0e66e960d7538402af4c8dda8468f85594 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Tue, 23 Nov 2021 11:57:54 +0200 Subject: [PATCH 03/38] Implemented some logic, need to rethink inheritance, especially initialization --- .../TapGenerators/generator.py | 41 +++++++++++++++---- .../TapGenerators/loan_accounts_generator.py | 19 +++++++++ tap_mambu/tap_mambu_refactor/__init__.py | 10 ++++- 3 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 492b1d6..18e7588 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -5,9 +5,31 @@ class TapGenerator: - def __init__(self, client): - self.buffer: List = list() - self.start_date = 0 + buffer: List + + def __init__(self, stream_name, client, config, endpoint_config): + self.stream_name = stream_name + self.client = client + self.config = config + self.endpoint_config = endpoint_config + self.__init_buffers() + self.__init_bookmarks() + self.__init_params() + + def __init_buffers(self): + self.buffer = list() + + def __init_bookmarks(self): + self.bookmark_query_field = self.endpoint_config.get('bookmark_query_field') + self.bookmark_type = self.endpoint_config.get('bookmark_type') + self.last_bookmark_value = 0 if self.bookmark_type == "integer" else self.start_date + + def __init_params(self): + self.start_date = self.config.get('start_date') + self.static_params = self.endpoint_config.get('params', {}) + self.offset = 0 + self.limit = self.client.page_size + self.params = self.static_params def __iter__(self): self.buffer = self.fetch_batch() @@ -21,7 +43,12 @@ def __next__(self): return self.buffer.pop(0) def fetch_batch(self): - if not self.buffer: - # fetch lm records - pass - return list() + response = self.client.request( + method=self.endpoint_config.get('api_method', 'GET'), + path=self.endpoint_config.get('path'), + version=self.endpoint_config.get('api_version', 'v2'), + apikey_type=self.endpoint_config.get('apikey_type', None), + params=self.params, + endpoint=self.stream_name, + json=self.endpoint_config.get('body', None)) + return response diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py new file mode 100644 index 0000000..b4c21f6 --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -0,0 +1,19 @@ +import time +import requests + +from typing import List +from .. import TapGenerator + + +class LoanAccountsGenerator(TapGenerator): + def __init__(self, stream_name, client, config, endpoint_config): + super(LoanAccountsGenerator, self).__init__(stream_name, client, config, endpoint_config) + + def __iter__(self): + return super(LoanAccountsGenerator, self).__iter__() + + def __next__(self): + return super(LoanAccountsGenerator, self).__next__() + + def fetch_batch(self): + return super(LoanAccountsGenerator, self).fetch_batch() diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 949d98e..3aeaf14 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -1,4 +1,5 @@ from tap_mambu.tap_mambu_refactor.TapGenerators.generator import TapGenerator +from tap_mambu.tap_mambu_refactor.TapGenerators.loan_accounts_generator import LoanAccountsGenerator from tap_mambu.tap_mambu_refactor.TapProcessors.processor import TapProcessor stream_generator_processor_dict = { @@ -9,6 +10,13 @@ def sync_endpoint_refactor(client, catalog, state, start_date, stream_name, path, endpoint_config, sub_type): generator_class, processor_class = stream_generator_processor_dict[stream_name] - generator, processor = generator_class(client), processor_class() + generator, processor = generator_class(stream_name=stream_name, + client=client, + endpoint_config=endpoint_config), \ + processor_class() for record in generator: processor.process(record) + + +if __name__ == '__main__': + LoanAccountsGenerator(None, None, None, None) From 33459da773d760acc068bec30f2e24bae088077d Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Tue, 23 Nov 2021 12:24:36 +0200 Subject: [PATCH 04/38] Pushing more changes, will test after this --- .../TapGenerators/generator.py | 27 ++++++++++++++++--- .../TapGenerators/loan_accounts_generator.py | 9 ------- .../TapProcessors/processor.py | 6 +++++ tap_mambu/tap_mambu_refactor/__init__.py | 6 +---- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 18e7588..eacd434 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -1,7 +1,10 @@ import time +import requests + from typing import List +from singer import utils -import requests +from tap_mambu.transform import transform_json class TapGenerator: @@ -26,18 +29,21 @@ def __init_bookmarks(self): def __init_params(self): self.start_date = self.config.get('start_date') + self.time_extracted = None self.static_params = self.endpoint_config.get('params', {}) self.offset = 0 self.limit = self.client.page_size self.params = self.static_params def __iter__(self): - self.buffer = self.fetch_batch() + raw_batch = self.fetch_batch() + self.buffer = self.transform_batch(raw_batch) return self def __next__(self): if not self.buffer: - self.buffer = self.fetch_batch() + raw_batch = self.fetch_batch() + self.buffer = self.transform_batch(raw_batch) if not self.buffer: raise StopIteration() return self.buffer.pop(0) @@ -48,7 +54,20 @@ def fetch_batch(self): path=self.endpoint_config.get('path'), version=self.endpoint_config.get('api_version', 'v2'), apikey_type=self.endpoint_config.get('apikey_type', None), - params=self.params, + params='&'.join([f'{key}={value}' for (key, value) in self.params.items()]), endpoint=self.stream_name, json=self.endpoint_config.get('body', None)) + self.time_extracted = utils.now() + if isinstance(response, dict): + return [response] return response + + def transform_batch(self, batch): + data_key = self.endpoint_config.get('data_key', None) + transformed_batch = list() + if data_key is None: + transformed_batch = transform_json(batch, self.stream_name) + elif data_key in batch: + transformed_batch = transform_json(batch, data_key)[data_key] + return transformed_batch + diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index b4c21f6..6e8db72 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -8,12 +8,3 @@ class LoanAccountsGenerator(TapGenerator): def __init__(self, stream_name, client, config, endpoint_config): super(LoanAccountsGenerator, self).__init__(stream_name, client, config, endpoint_config) - - def __iter__(self): - return super(LoanAccountsGenerator, self).__iter__() - - def __next__(self): - return super(LoanAccountsGenerator, self).__next__() - - def fetch_batch(self): - return super(LoanAccountsGenerator, self).fetch_batch() diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index 8605d4c..d266159 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -1,3 +1,9 @@ +import json + + class TapProcessor: def __init__(self): pass + + def process(self, record): + print(json.dumps(record, indent=2)) diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 3aeaf14..7c4024e 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -3,7 +3,7 @@ from tap_mambu.tap_mambu_refactor.TapProcessors.processor import TapProcessor stream_generator_processor_dict = { - "loan_accounts": (TapGenerator, TapProcessor) + "loan_accounts": (LoanAccountsGenerator, TapProcessor) } @@ -16,7 +16,3 @@ def sync_endpoint_refactor(client, catalog, state, start_date, stream_name, processor_class() for record in generator: processor.process(record) - - -if __name__ == '__main__': - LoanAccountsGenerator(None, None, None, None) From 72e3caa1707f9c942aa3a86306538a854b68245b Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Thu, 25 Nov 2021 14:03:05 +0200 Subject: [PATCH 05/38] Intermediary commit --- tap_mambu/sync.py | 5 +- .../tap_mambu_refactor/Helpers/__init__.py | 101 ++++++++++++++++++ .../TapGenerators/generator.py | 53 +++++++-- .../TapGenerators/loan_accounts_generator.py | 10 +- .../TapProcessors/processor.py | 79 +++++++++++++- tap_mambu/tap_mambu_refactor/__init__.py | 26 +++-- 6 files changed, 249 insertions(+), 25 deletions(-) create mode 100644 tap_mambu/tap_mambu_refactor/Helpers/__init__.py diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index c740dff..e4c3345 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -901,11 +901,10 @@ def sync(client, config, catalog, state): client=client, catalog=catalog, state=state, - start_date=start_date, stream_name=stream_name, - path=path, endpoint_config=endpoint_config, - sub_type=sub_type + sub_type=sub_type, + config=config ) else: total_records = sync_endpoint( diff --git a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py new file mode 100644 index 0000000..ba93b4c --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py @@ -0,0 +1,101 @@ +import re +from singer import write_state + + +def write_bookmark(state, stream, sub_type, value): + if 'bookmarks' not in state: + state['bookmarks'] = {} + if stream not in state['bookmarks']: + state['bookmarks'][stream] = {} + if sub_type == 'self': + state['bookmarks'][stream] = value + else: + if sub_type not in state['bookmarks'][stream]: + state['bookmarks'][stream][sub_type] = {} + state['bookmarks'][stream][sub_type] = value + write_state(state) + + +# Convert camelCase to snake_case +def convert(name): + regsub = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', regsub).lower() + + +# Convert keys in json array +def convert_array(arr): + new_arr = [] + for i in arr: + if isinstance(i, list): + new_arr.append(convert_array(i)) + elif isinstance(i, dict): + new_arr.append(convert_json(i)) + else: + new_arr.append(i) + return new_arr + + +# Convert keys in json +def convert_json(this_json): + out = {} + for key in this_json: + new_key = convert(key) + if isinstance(this_json[key], dict): + out[new_key] = convert_json(this_json[key]) + elif isinstance(this_json[key], list): + out[new_key] = convert_array(this_json[key]) + else: + out[new_key] = this_json[key] + return out + + +def remove_custom_nodes(this_json): + if not isinstance(this_json, (dict, list)): + return this_json + if isinstance(this_json, list): + return [remove_custom_nodes(vv) for vv in this_json] + return {kk: remove_custom_nodes(vv) for kk, vv in this_json.items() \ + if not kk[:1] == '_'} + + +def add_cust_field(key, record, cust_field_sets): + for cf_key, cf_value in record.items(): + field = { + 'field_set_id' : key, + 'id' : cf_key, + 'value' : cf_value, + } + cust_field_sets.append(field) + +# Convert custom fields and sets +# Generalize/Abstract custom fields to key/value pairs +def convert_custom_fields(this_json): + for record in this_json: + cust_field_sets = [] + for key, value in record.items(): + if key.startswith('_'): + if isinstance(value, dict): + add_cust_field(key, value, cust_field_sets) + elif isinstance(value, list): + for element in value: + add_cust_field(key, element, cust_field_sets) + record['custom_fields'] = cust_field_sets + return this_json + + +# Run all transforms: denests _embedded, removes _embedded/_links, and +# converst camelCase to snake_case for fieldname keys. +def transform_json(this_json, path): + new_json = remove_custom_nodes(convert_custom_fields(this_json)) + out = {} + out[path] = new_json + transformed_json = convert_json(out) + return transformed_json[path] + + +def transform_activities(this_json): + for record in this_json: + for key, value in record['activity'].items(): + record[key] = value + del record['activity'] + return this_json diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index eacd434..d1fd645 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -2,33 +2,41 @@ import requests from typing import List -from singer import utils +from singer import utils, get_bookmark -from tap_mambu.transform import transform_json +from ..Helpers import write_bookmark, transform_json class TapGenerator: - buffer: List - - def __init__(self, stream_name, client, config, endpoint_config): + def __init__(self, stream_name, client, config, endpoint_config, state, sub_type): self.stream_name = stream_name self.client = client self.config = config self.endpoint_config = endpoint_config + self.state = state + self.sub_type = sub_type + self.__init_config() self.__init_buffers() self.__init_bookmarks() self.__init_params() + def __init_config(self): + self.start_date = self.config.get('start_date') + def __init_buffers(self): - self.buffer = list() + self.buffer: List = list() def __init_bookmarks(self): self.bookmark_query_field = self.endpoint_config.get('bookmark_query_field') self.bookmark_type = self.endpoint_config.get('bookmark_type') - self.last_bookmark_value = 0 if self.bookmark_type == "integer" else self.start_date + self.bookmark_field = self.endpoint_config.get('bookmark_field') + if self.bookmark_type == "integer": + self.last_bookmark_value = get_bookmark(self.state, self.stream_name, self.sub_type, 0) + else: + self.last_bookmark_value = get_bookmark(self.state, self.stream_name, self.sub_type, self.start_date) + self.max_bookmark_value = self.last_bookmark_value def __init_params(self): - self.start_date = self.config.get('start_date') self.time_extracted = None self.static_params = self.endpoint_config.get('params', {}) self.offset = 0 @@ -36,19 +44,46 @@ def __init_params(self): self.params = self.static_params def __iter__(self): + self.prepare_batch() raw_batch = self.fetch_batch() self.buffer = self.transform_batch(raw_batch) + self.last_batch_size = len(self.buffer) return self def __next__(self): if not self.buffer: + if self.last_batch_size < self.limit: + raise StopIteration() + self.offset += self.limit + self.write_bookmark() + self.prepare_batch() raw_batch = self.fetch_batch() self.buffer = self.transform_batch(raw_batch) + self.last_batch_size = len(self.buffer) if not self.buffer: raise StopIteration() return self.buffer.pop(0) - def fetch_batch(self): + def __del__(self): + self.write_bookmark() + + def write_bookmark(self): + if self.bookmark_field: + write_bookmark(self.state, + self.stream_name, + self.sub_type, + self.max_bookmark_value) + + def prepare_batch(self): + self.params = { + "offset": self.offset, + "limit": self.limit, + **self.static_params + } + + def fetch_batch(self): # TODO: Take Bookmark into consideration + if self.bookmark_query_field: + self.params[self.bookmark_query_field] = self.last_bookmark_value response = self.client.request( method=self.endpoint_config.get('api_method', 'GET'), path=self.endpoint_config.get('path'), diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index 6e8db72..fe4f9ae 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -6,5 +6,11 @@ class LoanAccountsGenerator(TapGenerator): - def __init__(self, stream_name, client, config, endpoint_config): - super(LoanAccountsGenerator, self).__init__(stream_name, client, config, endpoint_config) + def __init__(self, *args, **kwargs): + super(LoanAccountsGenerator, self).__init__(*args, **kwargs) + # self.__init_buffers() + + # def __init_buffers(self): + # super(LoanAccountsGenerator, self).__init_buffers() + # self.buffer_ad: List = list() + # self.buffer_lm: List = list() diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index d266159..d88dd16 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -1,9 +1,80 @@ import json +from singer import write_record, Transformer, metadata, write_schema +from singer.utils import strptime_to_utc + +from ..TapGenerators.generator import TapGenerator class TapProcessor: - def __init__(self): - pass + def __init__(self, generator: TapGenerator, catalog, stream_name, endpoint_config): + self.generator = generator + self.catalog = catalog + self.stream_name = stream_name + self.endpoint_config = endpoint_config + self.stream = self.catalog.get_stream(stream_name) + self.schema = self.stream.schema.to_dict() + self.stream_metadata = metadata.to_map(self.stream.metadata) + + def write_schema(self): + stream = self.catalog.get_stream(self.stream_name) + schema = stream.schema.to_dict() + write_schema(self.stream_name, schema, stream.key_properties) + + def process_stream_from_generator(self): + self.write_schema() + for record in self.generator: + self.process_record(record) + + def __is_record_past_bookmark(self, transformed_record): + bookmark_type = self.endpoint_config.get('bookmark_type') + bookmark_field = self.endpoint_config.get('bookmark_field') + if type(bookmark_field) is list: + bookmark_found = False + for bookmark in bookmark_field: + if bookmark and (bookmark in transformed_record): + bookmark_dttm = strptime_to_utc(transformed_record[bookmark]) + if self.generator.max_bookmark_value: + max_bookmark_value_dttm = strptime_to_utc(self.generator.max_bookmark_value) + if bookmark_dttm > max_bookmark_value_dttm: + self.generator.max_bookmark_value = transformed_record[bookmark] + else: + self.generator.max_bookmark_value = transformed_record[bookmark] + + if bookmark and (bookmark in transformed_record): + bookmark_found = True + if bookmark_type == 'integer': + # Keep only records whose bookmark is after the last_integer + if transformed_record[bookmark] >= self.generator.last_bookmark_value: + return True + elif bookmark_type == 'datetime': + with Transformer() as transformer: + last_dttm = transformer._transform_datetime(self.generator.last_bookmark_value) + with Transformer() as transformer: + bookmark_dttm = transformer._transform_datetime(transformed_record[bookmark]) + # Keep only records whose bookmark is after the last_datetime + if bookmark_dttm >= last_dttm: + return True + index = (bookmark_field.index(bookmark)) + 1 + # Check if the rest of the bookmarks have a value higher than the current max_bookmark + for bookmark in bookmark_field[index:]: + if bookmark and (bookmark in transformed_record): + bookmark_dttm = strptime_to_utc(transformed_record[bookmark]) + max_bookmark_value_dttm = strptime_to_utc(self.generator.max_bookmark_value) + if bookmark_dttm > max_bookmark_value_dttm: + self.generator.max_bookmark_value = transformed_record[bookmark] + + break + if not bookmark_found: + return True + return False + + def process_record(self, record): + with Transformer() as transformer: + transformed_record = transformer.transform(record, + self.schema, + self.stream_metadata) - def process(self, record): - print(json.dumps(record, indent=2)) + if self.__is_record_past_bookmark(transformed_record): + write_record(self.stream_name, + transformed_record, + time_extracted=self.generator.time_extracted) diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 7c4024e..6b39062 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -1,18 +1,30 @@ +import singer + from tap_mambu.tap_mambu_refactor.TapGenerators.generator import TapGenerator from tap_mambu.tap_mambu_refactor.TapGenerators.loan_accounts_generator import LoanAccountsGenerator from tap_mambu.tap_mambu_refactor.TapProcessors.processor import TapProcessor + +LOGGER = singer.get_logger() + + stream_generator_processor_dict = { "loan_accounts": (LoanAccountsGenerator, TapProcessor) } -def sync_endpoint_refactor(client, catalog, state, start_date, stream_name, - path, endpoint_config, sub_type): +def sync_endpoint_refactor(client, catalog, state, stream_name, + endpoint_config, sub_type, config): generator_class, processor_class = stream_generator_processor_dict[stream_name] - generator, processor = generator_class(stream_name=stream_name, + generator = generator_class(stream_name=stream_name, client=client, - endpoint_config=endpoint_config), \ - processor_class() - for record in generator: - processor.process(record) + endpoint_config=endpoint_config, + config=config, + state=state, + sub_type=sub_type) + processor = processor_class(generator=generator, + catalog=catalog, + stream_name=stream_name, + endpoint_config=endpoint_config) + + processor.process_stream_from_generator() From 906b3ccca6808bec5e158d51605fcef4d8e07680 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Thu, 25 Nov 2021 15:20:35 +0200 Subject: [PATCH 06/38] Added last_account_appraisal_date to loan_accounts.json --- tap_mambu/schemas/loan_accounts.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tap_mambu/schemas/loan_accounts.json b/tap_mambu/schemas/loan_accounts.json index 27ed1f6..947a804 100644 --- a/tap_mambu/schemas/loan_accounts.json +++ b/tap_mambu/schemas/loan_accounts.json @@ -883,6 +883,13 @@ ], "format": "date-time" }, + "last_account_appraisal_date": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, "penalty_settings": { "type": [ "null", From 30cbdfe3d2c59cdff192f6e3e077c7f94405abb4 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Fri, 26 Nov 2021 17:17:23 +0200 Subject: [PATCH 07/38] Implemented bookmark resume and ABC for Generators, with Loan Accounts specific fixes and classes (WIP) --- tap_mambu/sync.py | 2 +- .../tap_mambu_refactor/Helpers/__init__.py | 8 +- .../TapGenerators/generator.py | 28 +++-- .../TapGenerators/loan_accounts_generator.py | 71 ++++++++++-- .../TapProcessors/processor.py | 103 ++++++++++-------- tap_mambu/tap_mambu_refactor/__init__.py | 21 ++-- 6 files changed, 153 insertions(+), 80 deletions(-) diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index e4c3345..1aa5f15 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -639,7 +639,7 @@ def sync(client, config, catalog, state): }, "filterCriteria": [ { - "field": "lastModifiedDate", + "field": "lastAccountAppraisalDate", "operator": "AFTER", "value": loan_accounts_dt_str } diff --git a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py index ba93b4c..06e09b9 100644 --- a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py +++ b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py @@ -1,5 +1,11 @@ import re -from singer import write_state +from singer import write_state, Transformer + + +def transform_datetime(this_dttm): + with Transformer() as transformer: + new_dttm = transformer._transform_datetime(this_dttm) + return new_dttm def write_bookmark(state, stream, sub_type, value): diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index d1fd645..1f39401 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -1,4 +1,6 @@ import time +from abc import ABC + import requests from typing import List @@ -7,26 +9,29 @@ from ..Helpers import write_bookmark, transform_json -class TapGenerator: - def __init__(self, stream_name, client, config, endpoint_config, state, sub_type): +class TapGenerator(ABC): + def __init__(self, stream_name, client, config, state, sub_type): self.stream_name = stream_name self.client = client self.config = config - self.endpoint_config = endpoint_config self.state = state self.sub_type = sub_type - self.__init_config() - self.__init_buffers() - self.__init_bookmarks() - self.__init_params() + self._init_config() + self._init_endpoint_config() + self._init_buffers() + self._init_bookmarks() + self._init_params() - def __init_config(self): + def _init_config(self): self.start_date = self.config.get('start_date') - def __init_buffers(self): + def _init_endpoint_config(self): + self.endpoint_config = {} + + def _init_buffers(self): self.buffer: List = list() - def __init_bookmarks(self): + def _init_bookmarks(self): self.bookmark_query_field = self.endpoint_config.get('bookmark_query_field') self.bookmark_type = self.endpoint_config.get('bookmark_type') self.bookmark_field = self.endpoint_config.get('bookmark_field') @@ -36,7 +41,7 @@ def __init_bookmarks(self): self.last_bookmark_value = get_bookmark(self.state, self.stream_name, self.sub_type, self.start_date) self.max_bookmark_value = self.last_bookmark_value - def __init_params(self): + def _init_params(self): self.time_extracted = None self.static_params = self.endpoint_config.get('params', {}) self.offset = 0 @@ -105,4 +110,3 @@ def transform_batch(self, batch): elif data_key in batch: transformed_batch = transform_json(batch, data_key)[data_key] return transformed_batch - diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index fe4f9ae..29cb17a 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -1,16 +1,71 @@ +import abc import time +from operator import itemgetter + import requests from typing import List from .. import TapGenerator +from ..Helpers import transform_datetime +from singer import get_bookmark class LoanAccountsGenerator(TapGenerator): - def __init__(self, *args, **kwargs): - super(LoanAccountsGenerator, self).__init__(*args, **kwargs) - # self.__init_buffers() - - # def __init_buffers(self): - # super(LoanAccountsGenerator, self).__init_buffers() - # self.buffer_ad: List = list() - # self.buffer_lm: List = list() + @abc.abstractmethod + def _init_endpoint_config(self): + self.endpoint_config = { + 'path': 'loans:search', + 'api_version': 'v2', + 'api_method': 'POST', + 'params': { + 'detailsLevel': 'FULL', + 'paginationDetails': 'ON' + }, + 'body': { + "sortingCriteria": { + "field": "lastModifiedDate", + "order": "ASC" + }, + "filterCriteria": [ + { + "field": "", + "operator": "AFTER", + "value": transform_datetime( + get_bookmark(self.state, 'deposit_accounts', 'self', self.start_date))[:10] + } + ] + }, + 'bookmark_field': '', + 'bookmark_type': 'datetime', + 'id_fields': ['id'], + 'children': { + 'loan_repayments': { + 'path': 'loans/{}/repayments', + 'api_version': 'v1', + 'api_method': 'GET', + 'params': { + 'detailsLevel': 'FULL', + 'paginationDetails': 'ON' + }, + 'id_fields': ['encoded_key'], + 'parent': 'loan_accounts' + } + } + } + +class LoanAccountsLMGenerator(LoanAccountsGenerator): + def _init_endpoint_config(self): + super()._init_endpoint_config() + self.endpoint_config["body"]["filterCriteria"][0]["field"] = "lastModifiedDate" + self.endpoint_config["bookmark_field"] = "lastModifiedDate" + + +class LoanAccountsADGenerator(LoanAccountsGenerator): + def _init_endpoint_config(self): + super()._init_endpoint_config() + self.endpoint_config["body"]["filterCriteria"][0]["field"] = "lastAccountAppraisalDate" + self.endpoint_config["bookmark_field"] = "lastAccountAppraisalDate" + + def transform_batch(self, batch): + transformed_batch = super().transform_batch(batch) + return sorted(transformed_batch, key=itemgetter("last_account_appraisal_date")) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index d88dd16..08621d3 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -1,16 +1,19 @@ import json +from typing import List + from singer import write_record, Transformer, metadata, write_schema from singer.utils import strptime_to_utc +from ..Helpers import transform_datetime, convert from ..TapGenerators.generator import TapGenerator class TapProcessor: - def __init__(self, generator: TapGenerator, catalog, stream_name, endpoint_config): - self.generator = generator + def __init__(self, generators: List[TapGenerator], catalog, stream_name): + self.generators = generators + self.generator_values = dict() self.catalog = catalog self.stream_name = stream_name - self.endpoint_config = endpoint_config self.stream = self.catalog.get_stream(stream_name) self.schema = self.stream.schema.to_dict() self.stream_metadata = metadata.to_map(self.stream.metadata) @@ -20,55 +23,61 @@ def write_schema(self): schema = stream.schema.to_dict() write_schema(self.stream_name, schema, stream.key_properties) - def process_stream_from_generator(self): + def process_streams_from_generators(self): self.write_schema() - for record in self.generator: - self.process_record(record) + for generator in self.generators: + self.generator_values[iter(generator)] = None + while True: + for iterator in list(self.generator_values.keys()): + if self.generator_values[iterator] is None: + self.generator_values[iterator] = next(iterator, None) + if self.generator_values[iterator] is None: + self.generator_values.pop(iterator) + if not self.generator_values: + break + + min_record_key: TapGenerator = None + min_record_value = None + for iterator in self.generator_values.keys(): + if min_record_value is None \ + or min_record_value > self.generator_values[iterator][convert(iterator.bookmark_field)]: + min_record_key = iterator + min_record_value = self.generator_values[iterator][convert(iterator.bookmark_field)] + self.process_record(self.generator_values[min_record_key], min_record_key.time_extracted) + self.generator_values[min_record_key] = None def __is_record_past_bookmark(self, transformed_record): - bookmark_type = self.endpoint_config.get('bookmark_type') - bookmark_field = self.endpoint_config.get('bookmark_field') - if type(bookmark_field) is list: - bookmark_found = False - for bookmark in bookmark_field: - if bookmark and (bookmark in transformed_record): - bookmark_dttm = strptime_to_utc(transformed_record[bookmark]) - if self.generator.max_bookmark_value: - max_bookmark_value_dttm = strptime_to_utc(self.generator.max_bookmark_value) - if bookmark_dttm > max_bookmark_value_dttm: - self.generator.max_bookmark_value = transformed_record[bookmark] - else: - self.generator.max_bookmark_value = transformed_record[bookmark] + is_record_past_bookmark = False + bookmark_type = self.generators[0].endpoint_config.get('bookmark_type') + bookmark_field = self.generators[0].endpoint_config.get('bookmark_field') + + # Reset max_bookmark_value to new value if higher + if bookmark_field and (bookmark_field in transformed_record): + bookmark_dttm = strptime_to_utc(transformed_record[bookmark_field]) + if self.generators[0].max_bookmark_value: + max_bookmark_value_dttm = strptime_to_utc(self.generators[0].max_bookmark_value) + if bookmark_dttm > max_bookmark_value_dttm: + self.generators[0].max_bookmark_value = transformed_record[bookmark_field] + else: + self.generators[0].max_bookmark_value = transformed_record[bookmark_field] - if bookmark and (bookmark in transformed_record): - bookmark_found = True - if bookmark_type == 'integer': - # Keep only records whose bookmark is after the last_integer - if transformed_record[bookmark] >= self.generator.last_bookmark_value: - return True - elif bookmark_type == 'datetime': - with Transformer() as transformer: - last_dttm = transformer._transform_datetime(self.generator.last_bookmark_value) - with Transformer() as transformer: - bookmark_dttm = transformer._transform_datetime(transformed_record[bookmark]) - # Keep only records whose bookmark is after the last_datetime - if bookmark_dttm >= last_dttm: - return True - index = (bookmark_field.index(bookmark)) + 1 - # Check if the rest of the bookmarks have a value higher than the current max_bookmark - for bookmark in bookmark_field[index:]: - if bookmark and (bookmark in transformed_record): - bookmark_dttm = strptime_to_utc(transformed_record[bookmark]) - max_bookmark_value_dttm = strptime_to_utc(self.generator.max_bookmark_value) - if bookmark_dttm > max_bookmark_value_dttm: - self.generator.max_bookmark_value = transformed_record[bookmark] + if bookmark_field and (bookmark_field in transformed_record): + if bookmark_type == 'integer': + # Keep only records whose bookmark is after the last_integer + if transformed_record[bookmark_field] >= self.generators[0].last_bookmark_value: + is_record_past_bookmark = True + elif bookmark_type == 'datetime': + last_dttm = transform_datetime(self.generator.last_bookmark_value) + bookmark_dttm = transform_datetime(transformed_record[bookmark_field]) + # Keep only records whose bookmark is after the last_datetime + if bookmark_dttm >= last_dttm: + is_record_past_bookmark = True + else: + is_record_past_bookmark = True - break - if not bookmark_found: - return True - return False + return is_record_past_bookmark - def process_record(self, record): + def process_record(self, record, time_extracted): with Transformer() as transformer: transformed_record = transformer.transform(record, self.schema, @@ -77,4 +86,4 @@ def process_record(self, record): if self.__is_record_past_bookmark(transformed_record): write_record(self.stream_name, transformed_record, - time_extracted=self.generator.time_extracted) + time_extracted=time_extracted) diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 6b39062..33f2828 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -1,30 +1,29 @@ import singer -from tap_mambu.tap_mambu_refactor.TapGenerators.generator import TapGenerator -from tap_mambu.tap_mambu_refactor.TapGenerators.loan_accounts_generator import LoanAccountsGenerator -from tap_mambu.tap_mambu_refactor.TapProcessors.processor import TapProcessor +from .TapGenerators.generator import TapGenerator +from .TapGenerators.loan_accounts_generator import LoanAccountsADGenerator, LoanAccountsLMGenerator +from .TapProcessors.processor import TapProcessor LOGGER = singer.get_logger() stream_generator_processor_dict = { - "loan_accounts": (LoanAccountsGenerator, TapProcessor) + "loan_accounts": ((LoanAccountsADGenerator, LoanAccountsLMGenerator), TapProcessor) } def sync_endpoint_refactor(client, catalog, state, stream_name, endpoint_config, sub_type, config): - generator_class, processor_class = stream_generator_processor_dict[stream_name] - generator = generator_class(stream_name=stream_name, + generator_classes, processor_class = stream_generator_processor_dict[stream_name] + generators = [generator_class(stream_name=stream_name, client=client, - endpoint_config=endpoint_config, config=config, state=state, sub_type=sub_type) - processor = processor_class(generator=generator, + for generator_class in generator_classes] + processor = processor_class(generators=generators, catalog=catalog, - stream_name=stream_name, - endpoint_config=endpoint_config) + stream_name=stream_name) - processor.process_stream_from_generator() + processor.process_streams_from_generators() From c45132527112020b2f162592ddbd9881bf4b8c89 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 29 Nov 2021 15:30:30 +0100 Subject: [PATCH 08/38] Commit before fixing Appraisal Order Bug --- .../tap_mambu_refactor/TapProcessors/processor.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index 08621d3..bc3d3d2 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -39,12 +39,19 @@ def process_streams_from_generators(self): min_record_key: TapGenerator = None min_record_value = None for iterator in self.generator_values.keys(): + new_bookmark_field = convert(iterator.bookmark_field) if min_record_value is None \ - or min_record_value > self.generator_values[iterator][convert(iterator.bookmark_field)]: + or transform_datetime(min_record_value) > transform_datetime(self.generator_values[iterator][new_bookmark_field]): min_record_key = iterator - min_record_value = self.generator_values[iterator][convert(iterator.bookmark_field)] - self.process_record(self.generator_values[min_record_key], min_record_key.time_extracted) - self.generator_values[min_record_key] = None + min_record_value = self.generator_values[iterator][new_bookmark_field] + + record = self.generator_values[min_record_key] + record["generator"] = list(self.generator_values.keys()).index(min_record_key) + self.process_record(record, min_record_key.time_extracted) + + for iterator in self.generator_values.keys(): + if self.generator_values[min_record_key] == self.generator_values[iterator]: + self.generator_values[iterator] = None def __is_record_past_bookmark(self, transformed_record): is_record_past_bookmark = False From 284f56a4303a259a07497874255b65daebcafd08 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 29 Nov 2021 16:20:23 +0100 Subject: [PATCH 09/38] Finished implementation of Loan Accounts Generator/Processor --- .../TapGenerators/generator.py | 5 +--- .../TapGenerators/loan_accounts_generator.py | 7 ++--- .../TapProcessors/loan_accounts_processor.py | 5 ++++ .../TapProcessors/processor.py | 28 +++++++++++-------- tap_mambu/tap_mambu_refactor/__init__.py | 3 +- 5 files changed, 27 insertions(+), 21 deletions(-) create mode 100644 tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 1f39401..1f7e0cc 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -60,7 +60,7 @@ def __next__(self): if self.last_batch_size < self.limit: raise StopIteration() self.offset += self.limit - self.write_bookmark() + # self.write_bookmark() self.prepare_batch() raw_batch = self.fetch_batch() self.buffer = self.transform_batch(raw_batch) @@ -69,9 +69,6 @@ def __next__(self): raise StopIteration() return self.buffer.pop(0) - def __del__(self): - self.write_bookmark() - def write_bookmark(self): if self.bookmark_field: write_bookmark(self.state, diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index 29cb17a..afc8b45 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -23,7 +23,7 @@ def _init_endpoint_config(self): }, 'body': { "sortingCriteria": { - "field": "lastModifiedDate", + "field": "encodedKey", "order": "ASC" }, "filterCriteria": [ @@ -53,6 +53,7 @@ def _init_endpoint_config(self): } } + class LoanAccountsLMGenerator(LoanAccountsGenerator): def _init_endpoint_config(self): super()._init_endpoint_config() @@ -65,7 +66,3 @@ def _init_endpoint_config(self): super()._init_endpoint_config() self.endpoint_config["body"]["filterCriteria"][0]["field"] = "lastAccountAppraisalDate" self.endpoint_config["bookmark_field"] = "lastAccountAppraisalDate" - - def transform_batch(self, batch): - transformed_batch = super().transform_batch(batch) - return sorted(transformed_batch, key=itemgetter("last_account_appraisal_date")) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py new file mode 100644 index 0000000..15e51fd --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py @@ -0,0 +1,5 @@ +from .processor import TapProcessor + + +class LoanAccountsProcessor(TapProcessor): + pass diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index bc3d3d2..88b85c3 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -1,19 +1,21 @@ import json +from abc import ABC from typing import List from singer import write_record, Transformer, metadata, write_schema from singer.utils import strptime_to_utc -from ..Helpers import transform_datetime, convert +from ..Helpers import transform_datetime, convert, write_bookmark from ..TapGenerators.generator import TapGenerator -class TapProcessor: +class TapProcessor(ABC): def __init__(self, generators: List[TapGenerator], catalog, stream_name): self.generators = generators self.generator_values = dict() self.catalog = catalog self.stream_name = stream_name + self.deduplication_key = "encoded_key" self.stream = self.catalog.get_stream(stream_name) self.schema = self.stream.schema.to_dict() self.stream_metadata = metadata.to_map(self.stream.metadata) @@ -28,6 +30,7 @@ def process_streams_from_generators(self): for generator in self.generators: self.generator_values[iter(generator)] = None while True: + # Populate list of values from generators (if any were removed) for iterator in list(self.generator_values.keys()): if self.generator_values[iterator] is None: self.generator_values[iterator] = next(iterator, None) @@ -35,33 +38,36 @@ def process_streams_from_generators(self): self.generator_values.pop(iterator) if not self.generator_values: break - + # Find lowest value in the list min_record_key: TapGenerator = None min_record_value = None for iterator in self.generator_values.keys(): - new_bookmark_field = convert(iterator.bookmark_field) if min_record_value is None \ - or transform_datetime(min_record_value) > transform_datetime(self.generator_values[iterator][new_bookmark_field]): + or min_record_value > self.generator_values[iterator][self.deduplication_key]: min_record_key = iterator - min_record_value = self.generator_values[iterator][new_bookmark_field] + min_record_value = self.generator_values[iterator][self.deduplication_key] + # Process the record record = self.generator_values[min_record_key] - record["generator"] = list(self.generator_values.keys()).index(min_record_key) self.process_record(record, min_record_key.time_extracted) + # Remove any record with the same deduplication_key from the list + # (so we don't process the same record twice) for iterator in self.generator_values.keys(): - if self.generator_values[min_record_key] == self.generator_values[iterator]: + if min_record_value == self.generator_values[iterator][self.deduplication_key]: self.generator_values[iterator] = None + self.generators[0].write_bookmark() + def __is_record_past_bookmark(self, transformed_record): is_record_past_bookmark = False bookmark_type = self.generators[0].endpoint_config.get('bookmark_type') - bookmark_field = self.generators[0].endpoint_config.get('bookmark_field') + bookmark_field = convert(self.generators[0].endpoint_config.get('bookmark_field')) # Reset max_bookmark_value to new value if higher if bookmark_field and (bookmark_field in transformed_record): bookmark_dttm = strptime_to_utc(transformed_record[bookmark_field]) - if self.generators[0].max_bookmark_value: + if hasattr(self.generators[0], "max_bookmark_value"): max_bookmark_value_dttm = strptime_to_utc(self.generators[0].max_bookmark_value) if bookmark_dttm > max_bookmark_value_dttm: self.generators[0].max_bookmark_value = transformed_record[bookmark_field] @@ -74,7 +80,7 @@ def __is_record_past_bookmark(self, transformed_record): if transformed_record[bookmark_field] >= self.generators[0].last_bookmark_value: is_record_past_bookmark = True elif bookmark_type == 'datetime': - last_dttm = transform_datetime(self.generator.last_bookmark_value) + last_dttm = transform_datetime(self.generators[0].last_bookmark_value) bookmark_dttm = transform_datetime(transformed_record[bookmark_field]) # Keep only records whose bookmark is after the last_datetime if bookmark_dttm >= last_dttm: diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 33f2828..da8b4ca 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -2,6 +2,7 @@ from .TapGenerators.generator import TapGenerator from .TapGenerators.loan_accounts_generator import LoanAccountsADGenerator, LoanAccountsLMGenerator +from .TapProcessors.loan_accounts_processor import LoanAccountsProcessor from .TapProcessors.processor import TapProcessor @@ -9,7 +10,7 @@ stream_generator_processor_dict = { - "loan_accounts": ((LoanAccountsADGenerator, LoanAccountsLMGenerator), TapProcessor) + "loan_accounts": ((LoanAccountsLMGenerator, LoanAccountsADGenerator), LoanAccountsProcessor) } From 0039dd6090995dd52ffefc71e1b10731dd06b068 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 6 Dec 2021 15:59:30 +0200 Subject: [PATCH 10/38] Small Bugfixes --- tap_mambu/sync.py | 2 +- tap_mambu/tap_mambu_refactor/Helpers/__init__.py | 10 ++++++++++ .../tap_mambu_refactor/TapGenerators/generator.py | 4 ++-- .../TapGenerators/loan_accounts_generator.py | 5 ++--- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index 1aa5f15..e4c3345 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -639,7 +639,7 @@ def sync(client, config, catalog, state): }, "filterCriteria": [ { - "field": "lastAccountAppraisalDate", + "field": "lastModifiedDate", "operator": "AFTER", "value": loan_accounts_dt_str } diff --git a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py index 06e09b9..e582a91 100644 --- a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py +++ b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py @@ -2,6 +2,16 @@ from singer import write_state, Transformer +def get_bookmark(state, stream, sub_type, default): + if (state is None) or ('bookmarks' not in state): + return default + + if sub_type == 'self': + return state.get('bookmarks', {}).get(stream, default) + else: + return state.get('bookmarks', {}).get(stream, {}).get(sub_type, default) + + def transform_datetime(this_dttm): with Transformer() as transformer: new_dttm = transformer._transform_datetime(this_dttm) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 1f7e0cc..ad16027 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -4,9 +4,9 @@ import requests from typing import List -from singer import utils, get_bookmark +from singer import utils -from ..Helpers import write_bookmark, transform_json +from ..Helpers import write_bookmark, transform_json, get_bookmark class TapGenerator(ABC): diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index afc8b45..232aa30 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -6,8 +6,7 @@ from typing import List from .. import TapGenerator -from ..Helpers import transform_datetime -from singer import get_bookmark +from ..Helpers import transform_datetime, get_bookmark class LoanAccountsGenerator(TapGenerator): @@ -31,7 +30,7 @@ def _init_endpoint_config(self): "field": "", "operator": "AFTER", "value": transform_datetime( - get_bookmark(self.state, 'deposit_accounts', 'self', self.start_date))[:10] + get_bookmark(self.state, 'loan_accounts', 'self', self.start_date))[:10] } ] }, From 19800f3e19ba3bf1d2139a898c1e23a5c5c0cbb3 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 6 Dec 2021 18:43:06 +0200 Subject: [PATCH 11/38] Small fixes suggested by Alex via PR Comments --- tap_mambu/sync.py | 1 - .../tap_mambu_refactor/TapGenerators/generator.py | 10 +++------- .../TapGenerators/loan_accounts_generator.py | 5 ----- .../tap_mambu_refactor/TapProcessors/processor.py | 15 +++++++-------- tap_mambu/tap_mambu_refactor/__init__.py | 9 ++++----- 5 files changed, 14 insertions(+), 26 deletions(-) diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index b0eb320..2e3d741 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -907,7 +907,6 @@ def sync(client, config, catalog, state): catalog=catalog, state=state, stream_name=stream_name, - endpoint_config=endpoint_config, sub_type=sub_type, config=config ) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index ad16027..650bba6 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -1,8 +1,4 @@ -import time from abc import ABC - -import requests - from typing import List from singer import utils @@ -65,8 +61,8 @@ def __next__(self): raw_batch = self.fetch_batch() self.buffer = self.transform_batch(raw_batch) self.last_batch_size = len(self.buffer) - if not self.buffer: - raise StopIteration() + if not self.buffer: + raise StopIteration() return self.buffer.pop(0) def write_bookmark(self): @@ -83,7 +79,7 @@ def prepare_batch(self): **self.static_params } - def fetch_batch(self): # TODO: Take Bookmark into consideration + def fetch_batch(self): if self.bookmark_query_field: self.params[self.bookmark_query_field] = self.last_bookmark_value response = self.client.request( diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index 232aa30..8abae82 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -1,10 +1,5 @@ import abc -import time -from operator import itemgetter -import requests - -from typing import List from .. import TapGenerator from ..Helpers import transform_datetime, get_bookmark diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index 88b85c3..d1bf358 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -1,17 +1,15 @@ -import json from abc import ABC -from typing import List from singer import write_record, Transformer, metadata, write_schema from singer.utils import strptime_to_utc -from ..Helpers import transform_datetime, convert, write_bookmark +from ..Helpers import transform_datetime, convert from ..TapGenerators.generator import TapGenerator class TapProcessor(ABC): - def __init__(self, generators: List[TapGenerator], catalog, stream_name): - self.generators = generators + def __init__(self, catalog, stream_name): + self.generators = list() self.generator_values = dict() self.catalog = catalog self.stream_name = stream_name @@ -25,13 +23,14 @@ def write_schema(self): schema = stream.schema.to_dict() write_schema(self.stream_name, schema, stream.key_properties) - def process_streams_from_generators(self): + def process_streams_from_generators(self, generators): + self.generators = generators self.write_schema() for generator in self.generators: self.generator_values[iter(generator)] = None while True: # Populate list of values from generators (if any were removed) - for iterator in list(self.generator_values.keys()): + for iterator in list(self.generator_values): if self.generator_values[iterator] is None: self.generator_values[iterator] = next(iterator, None) if self.generator_values[iterator] is None: @@ -41,7 +40,7 @@ def process_streams_from_generators(self): # Find lowest value in the list min_record_key: TapGenerator = None min_record_value = None - for iterator in self.generator_values.keys(): + for iterator in self.generator_values: if min_record_value is None \ or min_record_value > self.generator_values[iterator][self.deduplication_key]: min_record_key = iterator diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index da8b4ca..193b998 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -14,8 +14,8 @@ } -def sync_endpoint_refactor(client, catalog, state, stream_name, - endpoint_config, sub_type, config): +def sync_endpoint_refactor(client, catalog, state, + stream_name, sub_type, config): generator_classes, processor_class = stream_generator_processor_dict[stream_name] generators = [generator_class(stream_name=stream_name, client=client, @@ -23,8 +23,7 @@ def sync_endpoint_refactor(client, catalog, state, stream_name, state=state, sub_type=sub_type) for generator_class in generator_classes] - processor = processor_class(generators=generators, - catalog=catalog, + processor = processor_class(catalog=catalog, stream_name=stream_name) - processor.process_streams_from_generators() + processor.process_streams_from_generators(generators=generators) From f8617c0c24e8bff65c50a5e36380507fbd91e24e Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 6 Dec 2021 18:52:09 +0200 Subject: [PATCH 12/38] Solved __iter__ method and __next__ method duplicated code according to PR comment --- .../tap_mambu_refactor/TapGenerators/generator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 650bba6..99ff325 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -44,11 +44,14 @@ def _init_params(self): self.limit = self.client.page_size self.params = self.static_params - def __iter__(self): + def __all_fetch_batch_steps(self): self.prepare_batch() raw_batch = self.fetch_batch() self.buffer = self.transform_batch(raw_batch) self.last_batch_size = len(self.buffer) + + def __iter__(self): + self.__all_fetch_batch_steps() return self def __next__(self): @@ -57,10 +60,7 @@ def __next__(self): raise StopIteration() self.offset += self.limit # self.write_bookmark() - self.prepare_batch() - raw_batch = self.fetch_batch() - self.buffer = self.transform_batch(raw_batch) - self.last_batch_size = len(self.buffer) + self.__all_fetch_batch_steps() if not self.buffer: raise StopIteration() return self.buffer.pop(0) From f7c4d20343bc53ba253adbc918964fbd1c2b2dce Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Tue, 7 Dec 2021 11:53:51 +0200 Subject: [PATCH 13/38] Added a comment so we don't forget about the deduplication_key in the future --- tap_mambu/tap_mambu_refactor/TapProcessors/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index d1bf358..388e2a6 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -13,7 +13,7 @@ def __init__(self, catalog, stream_name): self.generator_values = dict() self.catalog = catalog self.stream_name = stream_name - self.deduplication_key = "encoded_key" + self.deduplication_key = "encoded_key" # To be replaced with 'id_fields' from endpoint config self.stream = self.catalog.get_stream(stream_name) self.schema = self.stream.schema.to_dict() self.stream_metadata = metadata.to_map(self.stream.metadata) From 6868cbda53cba691f247b7d079a702a9eca38fcd Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Tue, 7 Dec 2021 13:09:49 +0200 Subject: [PATCH 14/38] Also added record_count --- tap_mambu/tap_mambu_refactor/TapProcessors/processor.py | 3 +++ tap_mambu/tap_mambu_refactor/__init__.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index 388e2a6..f4511b8 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -26,6 +26,7 @@ def write_schema(self): def process_streams_from_generators(self, generators): self.generators = generators self.write_schema() + record_count = 0 for generator in self.generators: self.generator_values[iter(generator)] = None while True: @@ -49,6 +50,7 @@ def process_streams_from_generators(self, generators): # Process the record record = self.generator_values[min_record_key] self.process_record(record, min_record_key.time_extracted) + record_count += 1 # Remove any record with the same deduplication_key from the list # (so we don't process the same record twice) @@ -57,6 +59,7 @@ def process_streams_from_generators(self, generators): self.generator_values[iterator] = None self.generators[0].write_bookmark() + return record_count def __is_record_past_bookmark(self, transformed_record): is_record_past_bookmark = False diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 193b998..ca49828 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -26,4 +26,4 @@ def sync_endpoint_refactor(client, catalog, state, processor = processor_class(catalog=catalog, stream_name=stream_name) - processor.process_streams_from_generators(generators=generators) + return processor.process_streams_from_generators(generators=generators) From 7bc95dff6f8e25aaa8e9038a3c1cce66ac7898a3 Mon Sep 17 00:00:00 2001 From: DownstreamDataTeam Date: Thu, 16 Dec 2021 12:37:05 +0200 Subject: [PATCH 15/38] adjusted the last_datetime var from sync_endpoint func to use the lookback_window for the loan_transactions stream --- tap_mambu/sync.py | 30 +++++++++++------------------- tap_mambu/transform.py | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index 2e3d741..b6951be 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -1,16 +1,14 @@ -from _datetime import timedelta -from datetime import datetime - import singer from singer import Transformer, metadata, metrics, utils from singer.utils import strftime, strptime_to_utc from tap_mambu.tap_mambu_refactor import sync_endpoint_refactor -from tap_mambu.transform import transform_json, transform_activities +from tap_mambu.transform import transform_json, transform_activities, transform_datetime, check_loan_transaction_date LOGGER = singer.get_logger() LOOKBACK_DEFAULT = 14 + def write_schema(catalog, stream_name): stream = catalog.get_stream(stream_name) schema = stream.schema.to_dict() @@ -54,12 +52,6 @@ def write_bookmark(state, stream, sub_type, value): singer.write_state(state) -def transform_datetime(this_dttm): - with Transformer() as transformer: - new_dttm = transformer._transform_datetime(this_dttm) - return new_dttm - - def process_records(catalog, #pylint: disable=too-many-branches stream_name, records, @@ -137,7 +129,8 @@ def sync_endpoint(client, #pylint: disable=too-many-branches id_fields=None, parent=None, parent_id=None, - apikey_type=None): + apikey_type=None, + **kwargs): # Get the latest bookmark for the stream and set the last_integer/datetime @@ -152,6 +145,9 @@ def sync_endpoint(client, #pylint: disable=too-many-branches audit_trail_bookmark = get_bookmark(state, stream_name, sub_type, [start_date, 0]) last_datetime, number_last_occurrence = audit_trail_bookmark if type(audit_trail_bookmark) == list \ else (audit_trail_bookmark, 0) + elif stream_name == 'loan_transactions': + last_datetime = check_loan_transaction_date(kwargs.get('lookback_window', LOOKBACK_DEFAULT) or LOOKBACK_DEFAULT, + get_bookmark(state, stream_name, sub_type, start_date)) else: last_datetime = get_bookmark(state, stream_name, sub_type, start_date) max_bookmark_value = last_datetime @@ -400,9 +396,8 @@ def sync(client, config, catalog, state): # LOGGER.info('deposit_transactions bookmark_date = {}'.format(deposit_transactions_dt_str)) loan_transactions_dttm_str = get_bookmark(state, 'loan_transactions', 'self', start_date) - loan_transactions_dt_str = transform_datetime(loan_transactions_dttm_str)[:10] - loan_transactions_dttm = strptime_to_utc(loan_transactions_dt_str) - + loan_transactions_dt_str = check_loan_transaction_date(int(config.get('lookback_window', LOOKBACK_DEFAULT)), + loan_transactions_dttm_str)[:10] clients_dttm_str = get_bookmark(state, 'clients', 'self', start_date) clients_dt_str = transform_datetime(clients_dttm_str)[:10] @@ -415,10 +410,6 @@ def sync(client, config, catalog, state): deposit_accounts_dttm_str = get_bookmark(state, 'deposit_accounts', 'self', start_date) deposit_accounts_dt_str = transform_datetime(deposit_accounts_dttm_str)[:10] - lookback_days = int(config.get('lookback_window', LOOKBACK_DEFAULT)) - lookback_date = utils.now() - timedelta(lookback_days) - if loan_transactions_dttm > lookback_date: - loan_transactions_dt_str = transform_datetime(strftime(lookback_date))[:10] # LOGGER.info('loan_transactions bookmark_date = {}'.format(loan_transactions_dt_str)) # endpoints: API URL endpoints to be called @@ -929,7 +920,8 @@ def sync(client, config, catalog, state): data_key=endpoint_config.get('data_key', None), body=endpoint_config.get('body', None), id_fields=endpoint_config.get('id_fields'), - apikey_type=endpoint_config.get('apikey_type', None) + apikey_type=endpoint_config.get('apikey_type', None), + lookback_window=config.get('lookback_window', None) ) update_currently_syncing(state, None) diff --git a/tap_mambu/transform.py b/tap_mambu/transform.py index b2939a0..201fdc4 100644 --- a/tap_mambu/transform.py +++ b/tap_mambu/transform.py @@ -1,4 +1,8 @@ import re +from _datetime import timedelta +from singer import Transformer +from singer.utils import strftime, strptime_to_utc, now + # Convert camelCase to snake_case def convert(name): @@ -83,3 +87,17 @@ def transform_activities(this_json): record[key] = value del record['activity'] return this_json + + +def transform_datetime(this_dttm): + with Transformer() as transformer: + new_dttm = transformer._transform_datetime(this_dttm) + return new_dttm + + +def check_loan_transaction_date(lookback_window, dttm_str): + dttm_str = transform_datetime(dttm_str) + lookback_dt = now() - timedelta(lookback_window) + if strptime_to_utc(dttm_str) > lookback_dt: + return transform_datetime(strftime(lookback_dt)[:10]) + return dttm_str From 062e19d83209747e95de2774ee0a0729b68ef3f6 Mon Sep 17 00:00:00 2001 From: Alexandru Rosca Date: Mon, 20 Dec 2021 11:52:36 +0000 Subject: [PATCH 16/38] Revert "Merge branch 'feature/ECDCC-500_lookback_window_bugfix' into 'release/32'" This reverts merge request !10 --- tap_mambu/sync.py | 30 +++++++++++++++++++----------- tap_mambu/transform.py | 18 ------------------ 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index b6951be..2e3d741 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -1,14 +1,16 @@ +from _datetime import timedelta +from datetime import datetime + import singer from singer import Transformer, metadata, metrics, utils from singer.utils import strftime, strptime_to_utc from tap_mambu.tap_mambu_refactor import sync_endpoint_refactor -from tap_mambu.transform import transform_json, transform_activities, transform_datetime, check_loan_transaction_date +from tap_mambu.transform import transform_json, transform_activities LOGGER = singer.get_logger() LOOKBACK_DEFAULT = 14 - def write_schema(catalog, stream_name): stream = catalog.get_stream(stream_name) schema = stream.schema.to_dict() @@ -52,6 +54,12 @@ def write_bookmark(state, stream, sub_type, value): singer.write_state(state) +def transform_datetime(this_dttm): + with Transformer() as transformer: + new_dttm = transformer._transform_datetime(this_dttm) + return new_dttm + + def process_records(catalog, #pylint: disable=too-many-branches stream_name, records, @@ -129,8 +137,7 @@ def sync_endpoint(client, #pylint: disable=too-many-branches id_fields=None, parent=None, parent_id=None, - apikey_type=None, - **kwargs): + apikey_type=None): # Get the latest bookmark for the stream and set the last_integer/datetime @@ -145,9 +152,6 @@ def sync_endpoint(client, #pylint: disable=too-many-branches audit_trail_bookmark = get_bookmark(state, stream_name, sub_type, [start_date, 0]) last_datetime, number_last_occurrence = audit_trail_bookmark if type(audit_trail_bookmark) == list \ else (audit_trail_bookmark, 0) - elif stream_name == 'loan_transactions': - last_datetime = check_loan_transaction_date(kwargs.get('lookback_window', LOOKBACK_DEFAULT) or LOOKBACK_DEFAULT, - get_bookmark(state, stream_name, sub_type, start_date)) else: last_datetime = get_bookmark(state, stream_name, sub_type, start_date) max_bookmark_value = last_datetime @@ -396,8 +400,9 @@ def sync(client, config, catalog, state): # LOGGER.info('deposit_transactions bookmark_date = {}'.format(deposit_transactions_dt_str)) loan_transactions_dttm_str = get_bookmark(state, 'loan_transactions', 'self', start_date) - loan_transactions_dt_str = check_loan_transaction_date(int(config.get('lookback_window', LOOKBACK_DEFAULT)), - loan_transactions_dttm_str)[:10] + loan_transactions_dt_str = transform_datetime(loan_transactions_dttm_str)[:10] + loan_transactions_dttm = strptime_to_utc(loan_transactions_dt_str) + clients_dttm_str = get_bookmark(state, 'clients', 'self', start_date) clients_dt_str = transform_datetime(clients_dttm_str)[:10] @@ -410,6 +415,10 @@ def sync(client, config, catalog, state): deposit_accounts_dttm_str = get_bookmark(state, 'deposit_accounts', 'self', start_date) deposit_accounts_dt_str = transform_datetime(deposit_accounts_dttm_str)[:10] + lookback_days = int(config.get('lookback_window', LOOKBACK_DEFAULT)) + lookback_date = utils.now() - timedelta(lookback_days) + if loan_transactions_dttm > lookback_date: + loan_transactions_dt_str = transform_datetime(strftime(lookback_date))[:10] # LOGGER.info('loan_transactions bookmark_date = {}'.format(loan_transactions_dt_str)) # endpoints: API URL endpoints to be called @@ -920,8 +929,7 @@ def sync(client, config, catalog, state): data_key=endpoint_config.get('data_key', None), body=endpoint_config.get('body', None), id_fields=endpoint_config.get('id_fields'), - apikey_type=endpoint_config.get('apikey_type', None), - lookback_window=config.get('lookback_window', None) + apikey_type=endpoint_config.get('apikey_type', None) ) update_currently_syncing(state, None) diff --git a/tap_mambu/transform.py b/tap_mambu/transform.py index 201fdc4..b2939a0 100644 --- a/tap_mambu/transform.py +++ b/tap_mambu/transform.py @@ -1,8 +1,4 @@ import re -from _datetime import timedelta -from singer import Transformer -from singer.utils import strftime, strptime_to_utc, now - # Convert camelCase to snake_case def convert(name): @@ -87,17 +83,3 @@ def transform_activities(this_json): record[key] = value del record['activity'] return this_json - - -def transform_datetime(this_dttm): - with Transformer() as transformer: - new_dttm = transformer._transform_datetime(this_dttm) - return new_dttm - - -def check_loan_transaction_date(lookback_window, dttm_str): - dttm_str = transform_datetime(dttm_str) - lookback_dt = now() - timedelta(lookback_window) - if strptime_to_utc(dttm_str) > lookback_dt: - return transform_datetime(strftime(lookback_dt)[:10]) - return dttm_str From fa82719f0c7b272a13264ed4aa5a9174fa867b52 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 22 Nov 2021 13:29:07 +0200 Subject: [PATCH 17/38] Added Refactored Tap Boilerplate Classes --- tap_mambu/sync.py | 54 ++++++++++++------- .../TapGenerators/__init__.py | 0 .../TapGenerators/generator.py | 27 ++++++++++ .../TapProcessors/__init__.py | 0 .../TapProcessors/processor.py | 3 ++ tap_mambu/tap_mambu_refactor/__init__.py | 19 +++++++ 6 files changed, 83 insertions(+), 20 deletions(-) create mode 100644 tap_mambu/tap_mambu_refactor/TapGenerators/__init__.py create mode 100644 tap_mambu/tap_mambu_refactor/TapGenerators/generator.py create mode 100644 tap_mambu/tap_mambu_refactor/TapProcessors/__init__.py create mode 100644 tap_mambu/tap_mambu_refactor/TapProcessors/processor.py create mode 100644 tap_mambu/tap_mambu_refactor/__init__.py diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index e23d1b1..1a876ac 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -4,6 +4,8 @@ import singer from singer import Transformer, metadata, metrics, utils from singer.utils import strftime, strptime_to_utc + +from tap_mambu.tap_mambu_refactor import sync_endpoint_refactor from tap_mambu.transform import transform_json, transform_activities LOGGER = singer.get_logger() @@ -899,26 +901,38 @@ def sync(client, config, catalog, state): if sub_type_param: endpoint_config['params']['type'] = sub_type - total_records = sync_endpoint( - client=client, - catalog=catalog, - state=state, - start_date=start_date, - stream_name=stream_name, - path=path, - endpoint_config=endpoint_config, - api_version=endpoint_config.get('api_version', 'v2'), - api_method=endpoint_config.get('api_method', 'GET'), - static_params=endpoint_config.get('params', {}), - sub_type=sub_type, - bookmark_query_field=endpoint_config.get('bookmark_query_field'), - bookmark_field=endpoint_config.get('bookmark_field'), - bookmark_type=endpoint_config.get('bookmark_type'), - data_key=endpoint_config.get('data_key', None), - body=endpoint_config.get('body', None), - id_fields=endpoint_config.get('id_fields'), - apikey_type=endpoint_config.get('apikey_type', None) - ) + if stream_name in ["loan_accounts"]: + total_records = sync_endpoint_refactor( + client=client, + catalog=catalog, + state=state, + start_date=start_date, + stream_name=stream_name, + path=path, + endpoint_config=endpoint_config, + sub_type=sub_type + ) + else: + total_records = sync_endpoint( + client=client, + catalog=catalog, + state=state, + start_date=start_date, + stream_name=stream_name, + path=path, + endpoint_config=endpoint_config, + api_version=endpoint_config.get('api_version', 'v2'), + api_method=endpoint_config.get('api_method', 'GET'), + static_params=endpoint_config.get('params', {}), + sub_type=sub_type, + bookmark_query_field=endpoint_config.get('bookmark_query_field'), + bookmark_field=endpoint_config.get('bookmark_field'), + bookmark_type=endpoint_config.get('bookmark_type'), + data_key=endpoint_config.get('data_key', None), + body=endpoint_config.get('body', None), + id_fields=endpoint_config.get('id_fields'), + apikey_type=endpoint_config.get('apikey_type', None) + ) update_currently_syncing(state, None) LOGGER.info('Synced: {}, total_records: {}'.format( diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/__init__.py b/tap_mambu/tap_mambu_refactor/TapGenerators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py new file mode 100644 index 0000000..492b1d6 --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -0,0 +1,27 @@ +import time +from typing import List + +import requests + + +class TapGenerator: + def __init__(self, client): + self.buffer: List = list() + self.start_date = 0 + + def __iter__(self): + self.buffer = self.fetch_batch() + return self + + def __next__(self): + if not self.buffer: + self.buffer = self.fetch_batch() + if not self.buffer: + raise StopIteration() + return self.buffer.pop(0) + + def fetch_batch(self): + if not self.buffer: + # fetch lm records + pass + return list() diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/__init__.py b/tap_mambu/tap_mambu_refactor/TapProcessors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py new file mode 100644 index 0000000..8605d4c --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -0,0 +1,3 @@ +class TapProcessor: + def __init__(self): + pass diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py new file mode 100644 index 0000000..1d8c770 --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -0,0 +1,19 @@ +from tap_mambu.tap_mambu_refactor.TapGenerators.generator import TapGenerator +from tap_mambu.tap_mambu_refactor.TapProcessors.processor import TapProcessor + +stream_generator_processor_dict = { + "loan_accounts": (TapGenerator, TapProcessor) +} + + +def sync_endpoint_refactor(client, catalog, state, start_date, stream_name, + path, endpoint_config, sub_type): + generator_class, processor_class = stream_generator_processor_dict[stream_name] + generator, processor = generator_class(client), processor_class() + for record in generator: + processor.process(record) + + # for child_stream_name, child_endpoint_config in endpoint_config.get("children"): + # child_generator, child_processor = stream_generator_processor_dict[child_stream_name] + # for child_record in child_generator: + # child_processor.process(child_record) From 7b11dd8524c489d1b5ac181d7ee8532eade8e231 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 22 Nov 2021 13:31:32 +0200 Subject: [PATCH 18/38] ECDDC-476 Testing jira integration --- tap_mambu/tap_mambu_refactor/__init__.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 1d8c770..949d98e 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -12,8 +12,3 @@ def sync_endpoint_refactor(client, catalog, state, start_date, stream_name, generator, processor = generator_class(client), processor_class() for record in generator: processor.process(record) - - # for child_stream_name, child_endpoint_config in endpoint_config.get("children"): - # child_generator, child_processor = stream_generator_processor_dict[child_stream_name] - # for child_record in child_generator: - # child_processor.process(child_record) From 952efdc291ffb9c7b31a0ac8e6cf23dbd49f981b Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Tue, 23 Nov 2021 11:57:54 +0200 Subject: [PATCH 19/38] Implemented some logic, need to rethink inheritance, especially initialization --- .../TapGenerators/generator.py | 41 +++++++++++++++---- .../TapGenerators/loan_accounts_generator.py | 19 +++++++++ tap_mambu/tap_mambu_refactor/__init__.py | 10 ++++- 3 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 492b1d6..18e7588 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -5,9 +5,31 @@ class TapGenerator: - def __init__(self, client): - self.buffer: List = list() - self.start_date = 0 + buffer: List + + def __init__(self, stream_name, client, config, endpoint_config): + self.stream_name = stream_name + self.client = client + self.config = config + self.endpoint_config = endpoint_config + self.__init_buffers() + self.__init_bookmarks() + self.__init_params() + + def __init_buffers(self): + self.buffer = list() + + def __init_bookmarks(self): + self.bookmark_query_field = self.endpoint_config.get('bookmark_query_field') + self.bookmark_type = self.endpoint_config.get('bookmark_type') + self.last_bookmark_value = 0 if self.bookmark_type == "integer" else self.start_date + + def __init_params(self): + self.start_date = self.config.get('start_date') + self.static_params = self.endpoint_config.get('params', {}) + self.offset = 0 + self.limit = self.client.page_size + self.params = self.static_params def __iter__(self): self.buffer = self.fetch_batch() @@ -21,7 +43,12 @@ def __next__(self): return self.buffer.pop(0) def fetch_batch(self): - if not self.buffer: - # fetch lm records - pass - return list() + response = self.client.request( + method=self.endpoint_config.get('api_method', 'GET'), + path=self.endpoint_config.get('path'), + version=self.endpoint_config.get('api_version', 'v2'), + apikey_type=self.endpoint_config.get('apikey_type', None), + params=self.params, + endpoint=self.stream_name, + json=self.endpoint_config.get('body', None)) + return response diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py new file mode 100644 index 0000000..b4c21f6 --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -0,0 +1,19 @@ +import time +import requests + +from typing import List +from .. import TapGenerator + + +class LoanAccountsGenerator(TapGenerator): + def __init__(self, stream_name, client, config, endpoint_config): + super(LoanAccountsGenerator, self).__init__(stream_name, client, config, endpoint_config) + + def __iter__(self): + return super(LoanAccountsGenerator, self).__iter__() + + def __next__(self): + return super(LoanAccountsGenerator, self).__next__() + + def fetch_batch(self): + return super(LoanAccountsGenerator, self).fetch_batch() diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 949d98e..3aeaf14 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -1,4 +1,5 @@ from tap_mambu.tap_mambu_refactor.TapGenerators.generator import TapGenerator +from tap_mambu.tap_mambu_refactor.TapGenerators.loan_accounts_generator import LoanAccountsGenerator from tap_mambu.tap_mambu_refactor.TapProcessors.processor import TapProcessor stream_generator_processor_dict = { @@ -9,6 +10,13 @@ def sync_endpoint_refactor(client, catalog, state, start_date, stream_name, path, endpoint_config, sub_type): generator_class, processor_class = stream_generator_processor_dict[stream_name] - generator, processor = generator_class(client), processor_class() + generator, processor = generator_class(stream_name=stream_name, + client=client, + endpoint_config=endpoint_config), \ + processor_class() for record in generator: processor.process(record) + + +if __name__ == '__main__': + LoanAccountsGenerator(None, None, None, None) From 12d36285406cc379addb8d5dbac3dc480dcb4efc Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Tue, 23 Nov 2021 12:24:36 +0200 Subject: [PATCH 20/38] Pushing more changes, will test after this --- .../TapGenerators/generator.py | 27 ++++++++++++++++--- .../TapGenerators/loan_accounts_generator.py | 9 ------- .../TapProcessors/processor.py | 6 +++++ tap_mambu/tap_mambu_refactor/__init__.py | 6 +---- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 18e7588..eacd434 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -1,7 +1,10 @@ import time +import requests + from typing import List +from singer import utils -import requests +from tap_mambu.transform import transform_json class TapGenerator: @@ -26,18 +29,21 @@ def __init_bookmarks(self): def __init_params(self): self.start_date = self.config.get('start_date') + self.time_extracted = None self.static_params = self.endpoint_config.get('params', {}) self.offset = 0 self.limit = self.client.page_size self.params = self.static_params def __iter__(self): - self.buffer = self.fetch_batch() + raw_batch = self.fetch_batch() + self.buffer = self.transform_batch(raw_batch) return self def __next__(self): if not self.buffer: - self.buffer = self.fetch_batch() + raw_batch = self.fetch_batch() + self.buffer = self.transform_batch(raw_batch) if not self.buffer: raise StopIteration() return self.buffer.pop(0) @@ -48,7 +54,20 @@ def fetch_batch(self): path=self.endpoint_config.get('path'), version=self.endpoint_config.get('api_version', 'v2'), apikey_type=self.endpoint_config.get('apikey_type', None), - params=self.params, + params='&'.join([f'{key}={value}' for (key, value) in self.params.items()]), endpoint=self.stream_name, json=self.endpoint_config.get('body', None)) + self.time_extracted = utils.now() + if isinstance(response, dict): + return [response] return response + + def transform_batch(self, batch): + data_key = self.endpoint_config.get('data_key', None) + transformed_batch = list() + if data_key is None: + transformed_batch = transform_json(batch, self.stream_name) + elif data_key in batch: + transformed_batch = transform_json(batch, data_key)[data_key] + return transformed_batch + diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index b4c21f6..6e8db72 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -8,12 +8,3 @@ class LoanAccountsGenerator(TapGenerator): def __init__(self, stream_name, client, config, endpoint_config): super(LoanAccountsGenerator, self).__init__(stream_name, client, config, endpoint_config) - - def __iter__(self): - return super(LoanAccountsGenerator, self).__iter__() - - def __next__(self): - return super(LoanAccountsGenerator, self).__next__() - - def fetch_batch(self): - return super(LoanAccountsGenerator, self).fetch_batch() diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index 8605d4c..d266159 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -1,3 +1,9 @@ +import json + + class TapProcessor: def __init__(self): pass + + def process(self, record): + print(json.dumps(record, indent=2)) diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 3aeaf14..7c4024e 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -3,7 +3,7 @@ from tap_mambu.tap_mambu_refactor.TapProcessors.processor import TapProcessor stream_generator_processor_dict = { - "loan_accounts": (TapGenerator, TapProcessor) + "loan_accounts": (LoanAccountsGenerator, TapProcessor) } @@ -16,7 +16,3 @@ def sync_endpoint_refactor(client, catalog, state, start_date, stream_name, processor_class() for record in generator: processor.process(record) - - -if __name__ == '__main__': - LoanAccountsGenerator(None, None, None, None) From b7ee002ba0a43afcc2b2d85dae61e81fcd9d70e9 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Thu, 25 Nov 2021 14:03:05 +0200 Subject: [PATCH 21/38] Intermediary commit --- tap_mambu/sync.py | 5 +- .../tap_mambu_refactor/Helpers/__init__.py | 101 ++++++++++++++++++ .../TapGenerators/generator.py | 53 +++++++-- .../TapGenerators/loan_accounts_generator.py | 10 +- .../TapProcessors/processor.py | 79 +++++++++++++- tap_mambu/tap_mambu_refactor/__init__.py | 26 +++-- 6 files changed, 249 insertions(+), 25 deletions(-) create mode 100644 tap_mambu/tap_mambu_refactor/Helpers/__init__.py diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index 1a876ac..b0eb320 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -906,11 +906,10 @@ def sync(client, config, catalog, state): client=client, catalog=catalog, state=state, - start_date=start_date, stream_name=stream_name, - path=path, endpoint_config=endpoint_config, - sub_type=sub_type + sub_type=sub_type, + config=config ) else: total_records = sync_endpoint( diff --git a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py new file mode 100644 index 0000000..ba93b4c --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py @@ -0,0 +1,101 @@ +import re +from singer import write_state + + +def write_bookmark(state, stream, sub_type, value): + if 'bookmarks' not in state: + state['bookmarks'] = {} + if stream not in state['bookmarks']: + state['bookmarks'][stream] = {} + if sub_type == 'self': + state['bookmarks'][stream] = value + else: + if sub_type not in state['bookmarks'][stream]: + state['bookmarks'][stream][sub_type] = {} + state['bookmarks'][stream][sub_type] = value + write_state(state) + + +# Convert camelCase to snake_case +def convert(name): + regsub = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', regsub).lower() + + +# Convert keys in json array +def convert_array(arr): + new_arr = [] + for i in arr: + if isinstance(i, list): + new_arr.append(convert_array(i)) + elif isinstance(i, dict): + new_arr.append(convert_json(i)) + else: + new_arr.append(i) + return new_arr + + +# Convert keys in json +def convert_json(this_json): + out = {} + for key in this_json: + new_key = convert(key) + if isinstance(this_json[key], dict): + out[new_key] = convert_json(this_json[key]) + elif isinstance(this_json[key], list): + out[new_key] = convert_array(this_json[key]) + else: + out[new_key] = this_json[key] + return out + + +def remove_custom_nodes(this_json): + if not isinstance(this_json, (dict, list)): + return this_json + if isinstance(this_json, list): + return [remove_custom_nodes(vv) for vv in this_json] + return {kk: remove_custom_nodes(vv) for kk, vv in this_json.items() \ + if not kk[:1] == '_'} + + +def add_cust_field(key, record, cust_field_sets): + for cf_key, cf_value in record.items(): + field = { + 'field_set_id' : key, + 'id' : cf_key, + 'value' : cf_value, + } + cust_field_sets.append(field) + +# Convert custom fields and sets +# Generalize/Abstract custom fields to key/value pairs +def convert_custom_fields(this_json): + for record in this_json: + cust_field_sets = [] + for key, value in record.items(): + if key.startswith('_'): + if isinstance(value, dict): + add_cust_field(key, value, cust_field_sets) + elif isinstance(value, list): + for element in value: + add_cust_field(key, element, cust_field_sets) + record['custom_fields'] = cust_field_sets + return this_json + + +# Run all transforms: denests _embedded, removes _embedded/_links, and +# converst camelCase to snake_case for fieldname keys. +def transform_json(this_json, path): + new_json = remove_custom_nodes(convert_custom_fields(this_json)) + out = {} + out[path] = new_json + transformed_json = convert_json(out) + return transformed_json[path] + + +def transform_activities(this_json): + for record in this_json: + for key, value in record['activity'].items(): + record[key] = value + del record['activity'] + return this_json diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index eacd434..d1fd645 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -2,33 +2,41 @@ import requests from typing import List -from singer import utils +from singer import utils, get_bookmark -from tap_mambu.transform import transform_json +from ..Helpers import write_bookmark, transform_json class TapGenerator: - buffer: List - - def __init__(self, stream_name, client, config, endpoint_config): + def __init__(self, stream_name, client, config, endpoint_config, state, sub_type): self.stream_name = stream_name self.client = client self.config = config self.endpoint_config = endpoint_config + self.state = state + self.sub_type = sub_type + self.__init_config() self.__init_buffers() self.__init_bookmarks() self.__init_params() + def __init_config(self): + self.start_date = self.config.get('start_date') + def __init_buffers(self): - self.buffer = list() + self.buffer: List = list() def __init_bookmarks(self): self.bookmark_query_field = self.endpoint_config.get('bookmark_query_field') self.bookmark_type = self.endpoint_config.get('bookmark_type') - self.last_bookmark_value = 0 if self.bookmark_type == "integer" else self.start_date + self.bookmark_field = self.endpoint_config.get('bookmark_field') + if self.bookmark_type == "integer": + self.last_bookmark_value = get_bookmark(self.state, self.stream_name, self.sub_type, 0) + else: + self.last_bookmark_value = get_bookmark(self.state, self.stream_name, self.sub_type, self.start_date) + self.max_bookmark_value = self.last_bookmark_value def __init_params(self): - self.start_date = self.config.get('start_date') self.time_extracted = None self.static_params = self.endpoint_config.get('params', {}) self.offset = 0 @@ -36,19 +44,46 @@ def __init_params(self): self.params = self.static_params def __iter__(self): + self.prepare_batch() raw_batch = self.fetch_batch() self.buffer = self.transform_batch(raw_batch) + self.last_batch_size = len(self.buffer) return self def __next__(self): if not self.buffer: + if self.last_batch_size < self.limit: + raise StopIteration() + self.offset += self.limit + self.write_bookmark() + self.prepare_batch() raw_batch = self.fetch_batch() self.buffer = self.transform_batch(raw_batch) + self.last_batch_size = len(self.buffer) if not self.buffer: raise StopIteration() return self.buffer.pop(0) - def fetch_batch(self): + def __del__(self): + self.write_bookmark() + + def write_bookmark(self): + if self.bookmark_field: + write_bookmark(self.state, + self.stream_name, + self.sub_type, + self.max_bookmark_value) + + def prepare_batch(self): + self.params = { + "offset": self.offset, + "limit": self.limit, + **self.static_params + } + + def fetch_batch(self): # TODO: Take Bookmark into consideration + if self.bookmark_query_field: + self.params[self.bookmark_query_field] = self.last_bookmark_value response = self.client.request( method=self.endpoint_config.get('api_method', 'GET'), path=self.endpoint_config.get('path'), diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index 6e8db72..fe4f9ae 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -6,5 +6,11 @@ class LoanAccountsGenerator(TapGenerator): - def __init__(self, stream_name, client, config, endpoint_config): - super(LoanAccountsGenerator, self).__init__(stream_name, client, config, endpoint_config) + def __init__(self, *args, **kwargs): + super(LoanAccountsGenerator, self).__init__(*args, **kwargs) + # self.__init_buffers() + + # def __init_buffers(self): + # super(LoanAccountsGenerator, self).__init_buffers() + # self.buffer_ad: List = list() + # self.buffer_lm: List = list() diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index d266159..d88dd16 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -1,9 +1,80 @@ import json +from singer import write_record, Transformer, metadata, write_schema +from singer.utils import strptime_to_utc + +from ..TapGenerators.generator import TapGenerator class TapProcessor: - def __init__(self): - pass + def __init__(self, generator: TapGenerator, catalog, stream_name, endpoint_config): + self.generator = generator + self.catalog = catalog + self.stream_name = stream_name + self.endpoint_config = endpoint_config + self.stream = self.catalog.get_stream(stream_name) + self.schema = self.stream.schema.to_dict() + self.stream_metadata = metadata.to_map(self.stream.metadata) + + def write_schema(self): + stream = self.catalog.get_stream(self.stream_name) + schema = stream.schema.to_dict() + write_schema(self.stream_name, schema, stream.key_properties) + + def process_stream_from_generator(self): + self.write_schema() + for record in self.generator: + self.process_record(record) + + def __is_record_past_bookmark(self, transformed_record): + bookmark_type = self.endpoint_config.get('bookmark_type') + bookmark_field = self.endpoint_config.get('bookmark_field') + if type(bookmark_field) is list: + bookmark_found = False + for bookmark in bookmark_field: + if bookmark and (bookmark in transformed_record): + bookmark_dttm = strptime_to_utc(transformed_record[bookmark]) + if self.generator.max_bookmark_value: + max_bookmark_value_dttm = strptime_to_utc(self.generator.max_bookmark_value) + if bookmark_dttm > max_bookmark_value_dttm: + self.generator.max_bookmark_value = transformed_record[bookmark] + else: + self.generator.max_bookmark_value = transformed_record[bookmark] + + if bookmark and (bookmark in transformed_record): + bookmark_found = True + if bookmark_type == 'integer': + # Keep only records whose bookmark is after the last_integer + if transformed_record[bookmark] >= self.generator.last_bookmark_value: + return True + elif bookmark_type == 'datetime': + with Transformer() as transformer: + last_dttm = transformer._transform_datetime(self.generator.last_bookmark_value) + with Transformer() as transformer: + bookmark_dttm = transformer._transform_datetime(transformed_record[bookmark]) + # Keep only records whose bookmark is after the last_datetime + if bookmark_dttm >= last_dttm: + return True + index = (bookmark_field.index(bookmark)) + 1 + # Check if the rest of the bookmarks have a value higher than the current max_bookmark + for bookmark in bookmark_field[index:]: + if bookmark and (bookmark in transformed_record): + bookmark_dttm = strptime_to_utc(transformed_record[bookmark]) + max_bookmark_value_dttm = strptime_to_utc(self.generator.max_bookmark_value) + if bookmark_dttm > max_bookmark_value_dttm: + self.generator.max_bookmark_value = transformed_record[bookmark] + + break + if not bookmark_found: + return True + return False + + def process_record(self, record): + with Transformer() as transformer: + transformed_record = transformer.transform(record, + self.schema, + self.stream_metadata) - def process(self, record): - print(json.dumps(record, indent=2)) + if self.__is_record_past_bookmark(transformed_record): + write_record(self.stream_name, + transformed_record, + time_extracted=self.generator.time_extracted) diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 7c4024e..6b39062 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -1,18 +1,30 @@ +import singer + from tap_mambu.tap_mambu_refactor.TapGenerators.generator import TapGenerator from tap_mambu.tap_mambu_refactor.TapGenerators.loan_accounts_generator import LoanAccountsGenerator from tap_mambu.tap_mambu_refactor.TapProcessors.processor import TapProcessor + +LOGGER = singer.get_logger() + + stream_generator_processor_dict = { "loan_accounts": (LoanAccountsGenerator, TapProcessor) } -def sync_endpoint_refactor(client, catalog, state, start_date, stream_name, - path, endpoint_config, sub_type): +def sync_endpoint_refactor(client, catalog, state, stream_name, + endpoint_config, sub_type, config): generator_class, processor_class = stream_generator_processor_dict[stream_name] - generator, processor = generator_class(stream_name=stream_name, + generator = generator_class(stream_name=stream_name, client=client, - endpoint_config=endpoint_config), \ - processor_class() - for record in generator: - processor.process(record) + endpoint_config=endpoint_config, + config=config, + state=state, + sub_type=sub_type) + processor = processor_class(generator=generator, + catalog=catalog, + stream_name=stream_name, + endpoint_config=endpoint_config) + + processor.process_stream_from_generator() From 4d106f4aa71331968fd8bb4edc1364cd29f90f9d Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Thu, 25 Nov 2021 15:20:35 +0200 Subject: [PATCH 22/38] Added last_account_appraisal_date to loan_accounts.json --- tap_mambu/schemas/loan_accounts.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tap_mambu/schemas/loan_accounts.json b/tap_mambu/schemas/loan_accounts.json index 69cd67d..1e89405 100644 --- a/tap_mambu/schemas/loan_accounts.json +++ b/tap_mambu/schemas/loan_accounts.json @@ -883,6 +883,13 @@ ], "format": "date-time" }, + "last_account_appraisal_date": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, "penalty_settings": { "type": [ "null", From c4de88f34fb0c2e45dbafce9301aecf03fecab49 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Fri, 26 Nov 2021 17:17:23 +0200 Subject: [PATCH 23/38] Implemented bookmark resume and ABC for Generators, with Loan Accounts specific fixes and classes (WIP) --- tap_mambu/sync.py | 2 +- .../tap_mambu_refactor/Helpers/__init__.py | 8 +- .../TapGenerators/generator.py | 28 +++-- .../TapGenerators/loan_accounts_generator.py | 71 ++++++++++-- .../TapProcessors/processor.py | 103 ++++++++++-------- tap_mambu/tap_mambu_refactor/__init__.py | 21 ++-- 6 files changed, 153 insertions(+), 80 deletions(-) diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index b0eb320..5ceeae6 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -643,7 +643,7 @@ def sync(client, config, catalog, state): }, "filterCriteria": [ { - "field": "lastModifiedDate", + "field": "lastAccountAppraisalDate", "operator": "AFTER", "value": loan_accounts_dt_str } diff --git a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py index ba93b4c..06e09b9 100644 --- a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py +++ b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py @@ -1,5 +1,11 @@ import re -from singer import write_state +from singer import write_state, Transformer + + +def transform_datetime(this_dttm): + with Transformer() as transformer: + new_dttm = transformer._transform_datetime(this_dttm) + return new_dttm def write_bookmark(state, stream, sub_type, value): diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index d1fd645..1f39401 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -1,4 +1,6 @@ import time +from abc import ABC + import requests from typing import List @@ -7,26 +9,29 @@ from ..Helpers import write_bookmark, transform_json -class TapGenerator: - def __init__(self, stream_name, client, config, endpoint_config, state, sub_type): +class TapGenerator(ABC): + def __init__(self, stream_name, client, config, state, sub_type): self.stream_name = stream_name self.client = client self.config = config - self.endpoint_config = endpoint_config self.state = state self.sub_type = sub_type - self.__init_config() - self.__init_buffers() - self.__init_bookmarks() - self.__init_params() + self._init_config() + self._init_endpoint_config() + self._init_buffers() + self._init_bookmarks() + self._init_params() - def __init_config(self): + def _init_config(self): self.start_date = self.config.get('start_date') - def __init_buffers(self): + def _init_endpoint_config(self): + self.endpoint_config = {} + + def _init_buffers(self): self.buffer: List = list() - def __init_bookmarks(self): + def _init_bookmarks(self): self.bookmark_query_field = self.endpoint_config.get('bookmark_query_field') self.bookmark_type = self.endpoint_config.get('bookmark_type') self.bookmark_field = self.endpoint_config.get('bookmark_field') @@ -36,7 +41,7 @@ def __init_bookmarks(self): self.last_bookmark_value = get_bookmark(self.state, self.stream_name, self.sub_type, self.start_date) self.max_bookmark_value = self.last_bookmark_value - def __init_params(self): + def _init_params(self): self.time_extracted = None self.static_params = self.endpoint_config.get('params', {}) self.offset = 0 @@ -105,4 +110,3 @@ def transform_batch(self, batch): elif data_key in batch: transformed_batch = transform_json(batch, data_key)[data_key] return transformed_batch - diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index fe4f9ae..29cb17a 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -1,16 +1,71 @@ +import abc import time +from operator import itemgetter + import requests from typing import List from .. import TapGenerator +from ..Helpers import transform_datetime +from singer import get_bookmark class LoanAccountsGenerator(TapGenerator): - def __init__(self, *args, **kwargs): - super(LoanAccountsGenerator, self).__init__(*args, **kwargs) - # self.__init_buffers() - - # def __init_buffers(self): - # super(LoanAccountsGenerator, self).__init_buffers() - # self.buffer_ad: List = list() - # self.buffer_lm: List = list() + @abc.abstractmethod + def _init_endpoint_config(self): + self.endpoint_config = { + 'path': 'loans:search', + 'api_version': 'v2', + 'api_method': 'POST', + 'params': { + 'detailsLevel': 'FULL', + 'paginationDetails': 'ON' + }, + 'body': { + "sortingCriteria": { + "field": "lastModifiedDate", + "order": "ASC" + }, + "filterCriteria": [ + { + "field": "", + "operator": "AFTER", + "value": transform_datetime( + get_bookmark(self.state, 'deposit_accounts', 'self', self.start_date))[:10] + } + ] + }, + 'bookmark_field': '', + 'bookmark_type': 'datetime', + 'id_fields': ['id'], + 'children': { + 'loan_repayments': { + 'path': 'loans/{}/repayments', + 'api_version': 'v1', + 'api_method': 'GET', + 'params': { + 'detailsLevel': 'FULL', + 'paginationDetails': 'ON' + }, + 'id_fields': ['encoded_key'], + 'parent': 'loan_accounts' + } + } + } + +class LoanAccountsLMGenerator(LoanAccountsGenerator): + def _init_endpoint_config(self): + super()._init_endpoint_config() + self.endpoint_config["body"]["filterCriteria"][0]["field"] = "lastModifiedDate" + self.endpoint_config["bookmark_field"] = "lastModifiedDate" + + +class LoanAccountsADGenerator(LoanAccountsGenerator): + def _init_endpoint_config(self): + super()._init_endpoint_config() + self.endpoint_config["body"]["filterCriteria"][0]["field"] = "lastAccountAppraisalDate" + self.endpoint_config["bookmark_field"] = "lastAccountAppraisalDate" + + def transform_batch(self, batch): + transformed_batch = super().transform_batch(batch) + return sorted(transformed_batch, key=itemgetter("last_account_appraisal_date")) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index d88dd16..08621d3 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -1,16 +1,19 @@ import json +from typing import List + from singer import write_record, Transformer, metadata, write_schema from singer.utils import strptime_to_utc +from ..Helpers import transform_datetime, convert from ..TapGenerators.generator import TapGenerator class TapProcessor: - def __init__(self, generator: TapGenerator, catalog, stream_name, endpoint_config): - self.generator = generator + def __init__(self, generators: List[TapGenerator], catalog, stream_name): + self.generators = generators + self.generator_values = dict() self.catalog = catalog self.stream_name = stream_name - self.endpoint_config = endpoint_config self.stream = self.catalog.get_stream(stream_name) self.schema = self.stream.schema.to_dict() self.stream_metadata = metadata.to_map(self.stream.metadata) @@ -20,55 +23,61 @@ def write_schema(self): schema = stream.schema.to_dict() write_schema(self.stream_name, schema, stream.key_properties) - def process_stream_from_generator(self): + def process_streams_from_generators(self): self.write_schema() - for record in self.generator: - self.process_record(record) + for generator in self.generators: + self.generator_values[iter(generator)] = None + while True: + for iterator in list(self.generator_values.keys()): + if self.generator_values[iterator] is None: + self.generator_values[iterator] = next(iterator, None) + if self.generator_values[iterator] is None: + self.generator_values.pop(iterator) + if not self.generator_values: + break + + min_record_key: TapGenerator = None + min_record_value = None + for iterator in self.generator_values.keys(): + if min_record_value is None \ + or min_record_value > self.generator_values[iterator][convert(iterator.bookmark_field)]: + min_record_key = iterator + min_record_value = self.generator_values[iterator][convert(iterator.bookmark_field)] + self.process_record(self.generator_values[min_record_key], min_record_key.time_extracted) + self.generator_values[min_record_key] = None def __is_record_past_bookmark(self, transformed_record): - bookmark_type = self.endpoint_config.get('bookmark_type') - bookmark_field = self.endpoint_config.get('bookmark_field') - if type(bookmark_field) is list: - bookmark_found = False - for bookmark in bookmark_field: - if bookmark and (bookmark in transformed_record): - bookmark_dttm = strptime_to_utc(transformed_record[bookmark]) - if self.generator.max_bookmark_value: - max_bookmark_value_dttm = strptime_to_utc(self.generator.max_bookmark_value) - if bookmark_dttm > max_bookmark_value_dttm: - self.generator.max_bookmark_value = transformed_record[bookmark] - else: - self.generator.max_bookmark_value = transformed_record[bookmark] + is_record_past_bookmark = False + bookmark_type = self.generators[0].endpoint_config.get('bookmark_type') + bookmark_field = self.generators[0].endpoint_config.get('bookmark_field') + + # Reset max_bookmark_value to new value if higher + if bookmark_field and (bookmark_field in transformed_record): + bookmark_dttm = strptime_to_utc(transformed_record[bookmark_field]) + if self.generators[0].max_bookmark_value: + max_bookmark_value_dttm = strptime_to_utc(self.generators[0].max_bookmark_value) + if bookmark_dttm > max_bookmark_value_dttm: + self.generators[0].max_bookmark_value = transformed_record[bookmark_field] + else: + self.generators[0].max_bookmark_value = transformed_record[bookmark_field] - if bookmark and (bookmark in transformed_record): - bookmark_found = True - if bookmark_type == 'integer': - # Keep only records whose bookmark is after the last_integer - if transformed_record[bookmark] >= self.generator.last_bookmark_value: - return True - elif bookmark_type == 'datetime': - with Transformer() as transformer: - last_dttm = transformer._transform_datetime(self.generator.last_bookmark_value) - with Transformer() as transformer: - bookmark_dttm = transformer._transform_datetime(transformed_record[bookmark]) - # Keep only records whose bookmark is after the last_datetime - if bookmark_dttm >= last_dttm: - return True - index = (bookmark_field.index(bookmark)) + 1 - # Check if the rest of the bookmarks have a value higher than the current max_bookmark - for bookmark in bookmark_field[index:]: - if bookmark and (bookmark in transformed_record): - bookmark_dttm = strptime_to_utc(transformed_record[bookmark]) - max_bookmark_value_dttm = strptime_to_utc(self.generator.max_bookmark_value) - if bookmark_dttm > max_bookmark_value_dttm: - self.generator.max_bookmark_value = transformed_record[bookmark] + if bookmark_field and (bookmark_field in transformed_record): + if bookmark_type == 'integer': + # Keep only records whose bookmark is after the last_integer + if transformed_record[bookmark_field] >= self.generators[0].last_bookmark_value: + is_record_past_bookmark = True + elif bookmark_type == 'datetime': + last_dttm = transform_datetime(self.generator.last_bookmark_value) + bookmark_dttm = transform_datetime(transformed_record[bookmark_field]) + # Keep only records whose bookmark is after the last_datetime + if bookmark_dttm >= last_dttm: + is_record_past_bookmark = True + else: + is_record_past_bookmark = True - break - if not bookmark_found: - return True - return False + return is_record_past_bookmark - def process_record(self, record): + def process_record(self, record, time_extracted): with Transformer() as transformer: transformed_record = transformer.transform(record, self.schema, @@ -77,4 +86,4 @@ def process_record(self, record): if self.__is_record_past_bookmark(transformed_record): write_record(self.stream_name, transformed_record, - time_extracted=self.generator.time_extracted) + time_extracted=time_extracted) diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 6b39062..33f2828 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -1,30 +1,29 @@ import singer -from tap_mambu.tap_mambu_refactor.TapGenerators.generator import TapGenerator -from tap_mambu.tap_mambu_refactor.TapGenerators.loan_accounts_generator import LoanAccountsGenerator -from tap_mambu.tap_mambu_refactor.TapProcessors.processor import TapProcessor +from .TapGenerators.generator import TapGenerator +from .TapGenerators.loan_accounts_generator import LoanAccountsADGenerator, LoanAccountsLMGenerator +from .TapProcessors.processor import TapProcessor LOGGER = singer.get_logger() stream_generator_processor_dict = { - "loan_accounts": (LoanAccountsGenerator, TapProcessor) + "loan_accounts": ((LoanAccountsADGenerator, LoanAccountsLMGenerator), TapProcessor) } def sync_endpoint_refactor(client, catalog, state, stream_name, endpoint_config, sub_type, config): - generator_class, processor_class = stream_generator_processor_dict[stream_name] - generator = generator_class(stream_name=stream_name, + generator_classes, processor_class = stream_generator_processor_dict[stream_name] + generators = [generator_class(stream_name=stream_name, client=client, - endpoint_config=endpoint_config, config=config, state=state, sub_type=sub_type) - processor = processor_class(generator=generator, + for generator_class in generator_classes] + processor = processor_class(generators=generators, catalog=catalog, - stream_name=stream_name, - endpoint_config=endpoint_config) + stream_name=stream_name) - processor.process_stream_from_generator() + processor.process_streams_from_generators() From 139a7a79c55d96884a30dbf8793667cff686fbb4 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 29 Nov 2021 15:30:30 +0100 Subject: [PATCH 24/38] Commit before fixing Appraisal Order Bug --- .../tap_mambu_refactor/TapProcessors/processor.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index 08621d3..bc3d3d2 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -39,12 +39,19 @@ def process_streams_from_generators(self): min_record_key: TapGenerator = None min_record_value = None for iterator in self.generator_values.keys(): + new_bookmark_field = convert(iterator.bookmark_field) if min_record_value is None \ - or min_record_value > self.generator_values[iterator][convert(iterator.bookmark_field)]: + or transform_datetime(min_record_value) > transform_datetime(self.generator_values[iterator][new_bookmark_field]): min_record_key = iterator - min_record_value = self.generator_values[iterator][convert(iterator.bookmark_field)] - self.process_record(self.generator_values[min_record_key], min_record_key.time_extracted) - self.generator_values[min_record_key] = None + min_record_value = self.generator_values[iterator][new_bookmark_field] + + record = self.generator_values[min_record_key] + record["generator"] = list(self.generator_values.keys()).index(min_record_key) + self.process_record(record, min_record_key.time_extracted) + + for iterator in self.generator_values.keys(): + if self.generator_values[min_record_key] == self.generator_values[iterator]: + self.generator_values[iterator] = None def __is_record_past_bookmark(self, transformed_record): is_record_past_bookmark = False From 8531705125bbeb5c5ecb7f82b820ac9cf162ba32 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 29 Nov 2021 16:20:23 +0100 Subject: [PATCH 25/38] Finished implementation of Loan Accounts Generator/Processor --- .../TapGenerators/generator.py | 5 +--- .../TapGenerators/loan_accounts_generator.py | 7 ++--- .../TapProcessors/loan_accounts_processor.py | 5 ++++ .../TapProcessors/processor.py | 28 +++++++++++-------- tap_mambu/tap_mambu_refactor/__init__.py | 3 +- 5 files changed, 27 insertions(+), 21 deletions(-) create mode 100644 tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 1f39401..1f7e0cc 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -60,7 +60,7 @@ def __next__(self): if self.last_batch_size < self.limit: raise StopIteration() self.offset += self.limit - self.write_bookmark() + # self.write_bookmark() self.prepare_batch() raw_batch = self.fetch_batch() self.buffer = self.transform_batch(raw_batch) @@ -69,9 +69,6 @@ def __next__(self): raise StopIteration() return self.buffer.pop(0) - def __del__(self): - self.write_bookmark() - def write_bookmark(self): if self.bookmark_field: write_bookmark(self.state, diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index 29cb17a..afc8b45 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -23,7 +23,7 @@ def _init_endpoint_config(self): }, 'body': { "sortingCriteria": { - "field": "lastModifiedDate", + "field": "encodedKey", "order": "ASC" }, "filterCriteria": [ @@ -53,6 +53,7 @@ def _init_endpoint_config(self): } } + class LoanAccountsLMGenerator(LoanAccountsGenerator): def _init_endpoint_config(self): super()._init_endpoint_config() @@ -65,7 +66,3 @@ def _init_endpoint_config(self): super()._init_endpoint_config() self.endpoint_config["body"]["filterCriteria"][0]["field"] = "lastAccountAppraisalDate" self.endpoint_config["bookmark_field"] = "lastAccountAppraisalDate" - - def transform_batch(self, batch): - transformed_batch = super().transform_batch(batch) - return sorted(transformed_batch, key=itemgetter("last_account_appraisal_date")) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py new file mode 100644 index 0000000..15e51fd --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py @@ -0,0 +1,5 @@ +from .processor import TapProcessor + + +class LoanAccountsProcessor(TapProcessor): + pass diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index bc3d3d2..88b85c3 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -1,19 +1,21 @@ import json +from abc import ABC from typing import List from singer import write_record, Transformer, metadata, write_schema from singer.utils import strptime_to_utc -from ..Helpers import transform_datetime, convert +from ..Helpers import transform_datetime, convert, write_bookmark from ..TapGenerators.generator import TapGenerator -class TapProcessor: +class TapProcessor(ABC): def __init__(self, generators: List[TapGenerator], catalog, stream_name): self.generators = generators self.generator_values = dict() self.catalog = catalog self.stream_name = stream_name + self.deduplication_key = "encoded_key" self.stream = self.catalog.get_stream(stream_name) self.schema = self.stream.schema.to_dict() self.stream_metadata = metadata.to_map(self.stream.metadata) @@ -28,6 +30,7 @@ def process_streams_from_generators(self): for generator in self.generators: self.generator_values[iter(generator)] = None while True: + # Populate list of values from generators (if any were removed) for iterator in list(self.generator_values.keys()): if self.generator_values[iterator] is None: self.generator_values[iterator] = next(iterator, None) @@ -35,33 +38,36 @@ def process_streams_from_generators(self): self.generator_values.pop(iterator) if not self.generator_values: break - + # Find lowest value in the list min_record_key: TapGenerator = None min_record_value = None for iterator in self.generator_values.keys(): - new_bookmark_field = convert(iterator.bookmark_field) if min_record_value is None \ - or transform_datetime(min_record_value) > transform_datetime(self.generator_values[iterator][new_bookmark_field]): + or min_record_value > self.generator_values[iterator][self.deduplication_key]: min_record_key = iterator - min_record_value = self.generator_values[iterator][new_bookmark_field] + min_record_value = self.generator_values[iterator][self.deduplication_key] + # Process the record record = self.generator_values[min_record_key] - record["generator"] = list(self.generator_values.keys()).index(min_record_key) self.process_record(record, min_record_key.time_extracted) + # Remove any record with the same deduplication_key from the list + # (so we don't process the same record twice) for iterator in self.generator_values.keys(): - if self.generator_values[min_record_key] == self.generator_values[iterator]: + if min_record_value == self.generator_values[iterator][self.deduplication_key]: self.generator_values[iterator] = None + self.generators[0].write_bookmark() + def __is_record_past_bookmark(self, transformed_record): is_record_past_bookmark = False bookmark_type = self.generators[0].endpoint_config.get('bookmark_type') - bookmark_field = self.generators[0].endpoint_config.get('bookmark_field') + bookmark_field = convert(self.generators[0].endpoint_config.get('bookmark_field')) # Reset max_bookmark_value to new value if higher if bookmark_field and (bookmark_field in transformed_record): bookmark_dttm = strptime_to_utc(transformed_record[bookmark_field]) - if self.generators[0].max_bookmark_value: + if hasattr(self.generators[0], "max_bookmark_value"): max_bookmark_value_dttm = strptime_to_utc(self.generators[0].max_bookmark_value) if bookmark_dttm > max_bookmark_value_dttm: self.generators[0].max_bookmark_value = transformed_record[bookmark_field] @@ -74,7 +80,7 @@ def __is_record_past_bookmark(self, transformed_record): if transformed_record[bookmark_field] >= self.generators[0].last_bookmark_value: is_record_past_bookmark = True elif bookmark_type == 'datetime': - last_dttm = transform_datetime(self.generator.last_bookmark_value) + last_dttm = transform_datetime(self.generators[0].last_bookmark_value) bookmark_dttm = transform_datetime(transformed_record[bookmark_field]) # Keep only records whose bookmark is after the last_datetime if bookmark_dttm >= last_dttm: diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 33f2828..da8b4ca 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -2,6 +2,7 @@ from .TapGenerators.generator import TapGenerator from .TapGenerators.loan_accounts_generator import LoanAccountsADGenerator, LoanAccountsLMGenerator +from .TapProcessors.loan_accounts_processor import LoanAccountsProcessor from .TapProcessors.processor import TapProcessor @@ -9,7 +10,7 @@ stream_generator_processor_dict = { - "loan_accounts": ((LoanAccountsADGenerator, LoanAccountsLMGenerator), TapProcessor) + "loan_accounts": ((LoanAccountsLMGenerator, LoanAccountsADGenerator), LoanAccountsProcessor) } From 3c39c6f959ac009ffe812a7c52d8a096a87e309e Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 6 Dec 2021 15:59:30 +0200 Subject: [PATCH 26/38] Small Bugfixes --- tap_mambu/sync.py | 2 +- tap_mambu/tap_mambu_refactor/Helpers/__init__.py | 10 ++++++++++ .../tap_mambu_refactor/TapGenerators/generator.py | 4 ++-- .../TapGenerators/loan_accounts_generator.py | 5 ++--- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index 5ceeae6..b0eb320 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -643,7 +643,7 @@ def sync(client, config, catalog, state): }, "filterCriteria": [ { - "field": "lastAccountAppraisalDate", + "field": "lastModifiedDate", "operator": "AFTER", "value": loan_accounts_dt_str } diff --git a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py index 06e09b9..e582a91 100644 --- a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py +++ b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py @@ -2,6 +2,16 @@ from singer import write_state, Transformer +def get_bookmark(state, stream, sub_type, default): + if (state is None) or ('bookmarks' not in state): + return default + + if sub_type == 'self': + return state.get('bookmarks', {}).get(stream, default) + else: + return state.get('bookmarks', {}).get(stream, {}).get(sub_type, default) + + def transform_datetime(this_dttm): with Transformer() as transformer: new_dttm = transformer._transform_datetime(this_dttm) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 1f7e0cc..ad16027 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -4,9 +4,9 @@ import requests from typing import List -from singer import utils, get_bookmark +from singer import utils -from ..Helpers import write_bookmark, transform_json +from ..Helpers import write_bookmark, transform_json, get_bookmark class TapGenerator(ABC): diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index afc8b45..232aa30 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -6,8 +6,7 @@ from typing import List from .. import TapGenerator -from ..Helpers import transform_datetime -from singer import get_bookmark +from ..Helpers import transform_datetime, get_bookmark class LoanAccountsGenerator(TapGenerator): @@ -31,7 +30,7 @@ def _init_endpoint_config(self): "field": "", "operator": "AFTER", "value": transform_datetime( - get_bookmark(self.state, 'deposit_accounts', 'self', self.start_date))[:10] + get_bookmark(self.state, 'loan_accounts', 'self', self.start_date))[:10] } ] }, From 80ab4f6929fc6dac620303aecc9aad2e34730789 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 6 Dec 2021 18:43:06 +0200 Subject: [PATCH 27/38] Small fixes suggested by Alex via PR Comments --- tap_mambu/sync.py | 1 - .../tap_mambu_refactor/TapGenerators/generator.py | 10 +++------- .../TapGenerators/loan_accounts_generator.py | 5 ----- .../tap_mambu_refactor/TapProcessors/processor.py | 15 +++++++-------- tap_mambu/tap_mambu_refactor/__init__.py | 9 ++++----- 5 files changed, 14 insertions(+), 26 deletions(-) diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index b0eb320..2e3d741 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -907,7 +907,6 @@ def sync(client, config, catalog, state): catalog=catalog, state=state, stream_name=stream_name, - endpoint_config=endpoint_config, sub_type=sub_type, config=config ) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index ad16027..650bba6 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -1,8 +1,4 @@ -import time from abc import ABC - -import requests - from typing import List from singer import utils @@ -65,8 +61,8 @@ def __next__(self): raw_batch = self.fetch_batch() self.buffer = self.transform_batch(raw_batch) self.last_batch_size = len(self.buffer) - if not self.buffer: - raise StopIteration() + if not self.buffer: + raise StopIteration() return self.buffer.pop(0) def write_bookmark(self): @@ -83,7 +79,7 @@ def prepare_batch(self): **self.static_params } - def fetch_batch(self): # TODO: Take Bookmark into consideration + def fetch_batch(self): if self.bookmark_query_field: self.params[self.bookmark_query_field] = self.last_bookmark_value response = self.client.request( diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index 232aa30..8abae82 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -1,10 +1,5 @@ import abc -import time -from operator import itemgetter -import requests - -from typing import List from .. import TapGenerator from ..Helpers import transform_datetime, get_bookmark diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index 88b85c3..d1bf358 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -1,17 +1,15 @@ -import json from abc import ABC -from typing import List from singer import write_record, Transformer, metadata, write_schema from singer.utils import strptime_to_utc -from ..Helpers import transform_datetime, convert, write_bookmark +from ..Helpers import transform_datetime, convert from ..TapGenerators.generator import TapGenerator class TapProcessor(ABC): - def __init__(self, generators: List[TapGenerator], catalog, stream_name): - self.generators = generators + def __init__(self, catalog, stream_name): + self.generators = list() self.generator_values = dict() self.catalog = catalog self.stream_name = stream_name @@ -25,13 +23,14 @@ def write_schema(self): schema = stream.schema.to_dict() write_schema(self.stream_name, schema, stream.key_properties) - def process_streams_from_generators(self): + def process_streams_from_generators(self, generators): + self.generators = generators self.write_schema() for generator in self.generators: self.generator_values[iter(generator)] = None while True: # Populate list of values from generators (if any were removed) - for iterator in list(self.generator_values.keys()): + for iterator in list(self.generator_values): if self.generator_values[iterator] is None: self.generator_values[iterator] = next(iterator, None) if self.generator_values[iterator] is None: @@ -41,7 +40,7 @@ def process_streams_from_generators(self): # Find lowest value in the list min_record_key: TapGenerator = None min_record_value = None - for iterator in self.generator_values.keys(): + for iterator in self.generator_values: if min_record_value is None \ or min_record_value > self.generator_values[iterator][self.deduplication_key]: min_record_key = iterator diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index da8b4ca..193b998 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -14,8 +14,8 @@ } -def sync_endpoint_refactor(client, catalog, state, stream_name, - endpoint_config, sub_type, config): +def sync_endpoint_refactor(client, catalog, state, + stream_name, sub_type, config): generator_classes, processor_class = stream_generator_processor_dict[stream_name] generators = [generator_class(stream_name=stream_name, client=client, @@ -23,8 +23,7 @@ def sync_endpoint_refactor(client, catalog, state, stream_name, state=state, sub_type=sub_type) for generator_class in generator_classes] - processor = processor_class(generators=generators, - catalog=catalog, + processor = processor_class(catalog=catalog, stream_name=stream_name) - processor.process_streams_from_generators() + processor.process_streams_from_generators(generators=generators) From c9afcbea3aa72676ae5f653889113d9f160df06e Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Mon, 6 Dec 2021 18:52:09 +0200 Subject: [PATCH 28/38] Solved __iter__ method and __next__ method duplicated code according to PR comment --- .../tap_mambu_refactor/TapGenerators/generator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 650bba6..99ff325 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -44,11 +44,14 @@ def _init_params(self): self.limit = self.client.page_size self.params = self.static_params - def __iter__(self): + def __all_fetch_batch_steps(self): self.prepare_batch() raw_batch = self.fetch_batch() self.buffer = self.transform_batch(raw_batch) self.last_batch_size = len(self.buffer) + + def __iter__(self): + self.__all_fetch_batch_steps() return self def __next__(self): @@ -57,10 +60,7 @@ def __next__(self): raise StopIteration() self.offset += self.limit # self.write_bookmark() - self.prepare_batch() - raw_batch = self.fetch_batch() - self.buffer = self.transform_batch(raw_batch) - self.last_batch_size = len(self.buffer) + self.__all_fetch_batch_steps() if not self.buffer: raise StopIteration() return self.buffer.pop(0) From 0bbfd3accda6f9ebe36f49e1cf2335583cc327fb Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Tue, 7 Dec 2021 11:53:51 +0200 Subject: [PATCH 29/38] Added a comment so we don't forget about the deduplication_key in the future --- tap_mambu/tap_mambu_refactor/TapProcessors/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index d1bf358..388e2a6 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -13,7 +13,7 @@ def __init__(self, catalog, stream_name): self.generator_values = dict() self.catalog = catalog self.stream_name = stream_name - self.deduplication_key = "encoded_key" + self.deduplication_key = "encoded_key" # To be replaced with 'id_fields' from endpoint config self.stream = self.catalog.get_stream(stream_name) self.schema = self.stream.schema.to_dict() self.stream_metadata = metadata.to_map(self.stream.metadata) From 2a5e45b5afd1fab362b23141a17c15ed5be829bb Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Tue, 7 Dec 2021 13:09:49 +0200 Subject: [PATCH 30/38] Also added record_count --- tap_mambu/tap_mambu_refactor/TapProcessors/processor.py | 3 +++ tap_mambu/tap_mambu_refactor/__init__.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index 388e2a6..f4511b8 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -26,6 +26,7 @@ def write_schema(self): def process_streams_from_generators(self, generators): self.generators = generators self.write_schema() + record_count = 0 for generator in self.generators: self.generator_values[iter(generator)] = None while True: @@ -49,6 +50,7 @@ def process_streams_from_generators(self, generators): # Process the record record = self.generator_values[min_record_key] self.process_record(record, min_record_key.time_extracted) + record_count += 1 # Remove any record with the same deduplication_key from the list # (so we don't process the same record twice) @@ -57,6 +59,7 @@ def process_streams_from_generators(self, generators): self.generator_values[iterator] = None self.generators[0].write_bookmark() + return record_count def __is_record_past_bookmark(self, transformed_record): is_record_past_bookmark = False diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 193b998..ca49828 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -26,4 +26,4 @@ def sync_endpoint_refactor(client, catalog, state, processor = processor_class(catalog=catalog, stream_name=stream_name) - processor.process_streams_from_generators(generators=generators) + return processor.process_streams_from_generators(generators=generators) From 532e55b638b99243a603bcca482b97347238fda9 Mon Sep 17 00:00:00 2001 From: DownstreamDataTeam Date: Thu, 16 Dec 2021 12:37:05 +0200 Subject: [PATCH 31/38] adjusted the last_datetime var from sync_endpoint func to use the lookback_window for the loan_transactions stream --- tap_mambu/sync.py | 30 +++++++++++------------------- tap_mambu/transform.py | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index 2e3d741..b6951be 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -1,16 +1,14 @@ -from _datetime import timedelta -from datetime import datetime - import singer from singer import Transformer, metadata, metrics, utils from singer.utils import strftime, strptime_to_utc from tap_mambu.tap_mambu_refactor import sync_endpoint_refactor -from tap_mambu.transform import transform_json, transform_activities +from tap_mambu.transform import transform_json, transform_activities, transform_datetime, check_loan_transaction_date LOGGER = singer.get_logger() LOOKBACK_DEFAULT = 14 + def write_schema(catalog, stream_name): stream = catalog.get_stream(stream_name) schema = stream.schema.to_dict() @@ -54,12 +52,6 @@ def write_bookmark(state, stream, sub_type, value): singer.write_state(state) -def transform_datetime(this_dttm): - with Transformer() as transformer: - new_dttm = transformer._transform_datetime(this_dttm) - return new_dttm - - def process_records(catalog, #pylint: disable=too-many-branches stream_name, records, @@ -137,7 +129,8 @@ def sync_endpoint(client, #pylint: disable=too-many-branches id_fields=None, parent=None, parent_id=None, - apikey_type=None): + apikey_type=None, + **kwargs): # Get the latest bookmark for the stream and set the last_integer/datetime @@ -152,6 +145,9 @@ def sync_endpoint(client, #pylint: disable=too-many-branches audit_trail_bookmark = get_bookmark(state, stream_name, sub_type, [start_date, 0]) last_datetime, number_last_occurrence = audit_trail_bookmark if type(audit_trail_bookmark) == list \ else (audit_trail_bookmark, 0) + elif stream_name == 'loan_transactions': + last_datetime = check_loan_transaction_date(kwargs.get('lookback_window', LOOKBACK_DEFAULT) or LOOKBACK_DEFAULT, + get_bookmark(state, stream_name, sub_type, start_date)) else: last_datetime = get_bookmark(state, stream_name, sub_type, start_date) max_bookmark_value = last_datetime @@ -400,9 +396,8 @@ def sync(client, config, catalog, state): # LOGGER.info('deposit_transactions bookmark_date = {}'.format(deposit_transactions_dt_str)) loan_transactions_dttm_str = get_bookmark(state, 'loan_transactions', 'self', start_date) - loan_transactions_dt_str = transform_datetime(loan_transactions_dttm_str)[:10] - loan_transactions_dttm = strptime_to_utc(loan_transactions_dt_str) - + loan_transactions_dt_str = check_loan_transaction_date(int(config.get('lookback_window', LOOKBACK_DEFAULT)), + loan_transactions_dttm_str)[:10] clients_dttm_str = get_bookmark(state, 'clients', 'self', start_date) clients_dt_str = transform_datetime(clients_dttm_str)[:10] @@ -415,10 +410,6 @@ def sync(client, config, catalog, state): deposit_accounts_dttm_str = get_bookmark(state, 'deposit_accounts', 'self', start_date) deposit_accounts_dt_str = transform_datetime(deposit_accounts_dttm_str)[:10] - lookback_days = int(config.get('lookback_window', LOOKBACK_DEFAULT)) - lookback_date = utils.now() - timedelta(lookback_days) - if loan_transactions_dttm > lookback_date: - loan_transactions_dt_str = transform_datetime(strftime(lookback_date))[:10] # LOGGER.info('loan_transactions bookmark_date = {}'.format(loan_transactions_dt_str)) # endpoints: API URL endpoints to be called @@ -929,7 +920,8 @@ def sync(client, config, catalog, state): data_key=endpoint_config.get('data_key', None), body=endpoint_config.get('body', None), id_fields=endpoint_config.get('id_fields'), - apikey_type=endpoint_config.get('apikey_type', None) + apikey_type=endpoint_config.get('apikey_type', None), + lookback_window=config.get('lookback_window', None) ) update_currently_syncing(state, None) diff --git a/tap_mambu/transform.py b/tap_mambu/transform.py index b2939a0..201fdc4 100644 --- a/tap_mambu/transform.py +++ b/tap_mambu/transform.py @@ -1,4 +1,8 @@ import re +from _datetime import timedelta +from singer import Transformer +from singer.utils import strftime, strptime_to_utc, now + # Convert camelCase to snake_case def convert(name): @@ -83,3 +87,17 @@ def transform_activities(this_json): record[key] = value del record['activity'] return this_json + + +def transform_datetime(this_dttm): + with Transformer() as transformer: + new_dttm = transformer._transform_datetime(this_dttm) + return new_dttm + + +def check_loan_transaction_date(lookback_window, dttm_str): + dttm_str = transform_datetime(dttm_str) + lookback_dt = now() - timedelta(lookback_window) + if strptime_to_utc(dttm_str) > lookback_dt: + return transform_datetime(strftime(lookback_dt)[:10]) + return dttm_str From e7e36530fa1260c4ca54e5d819d31f3c72989aae Mon Sep 17 00:00:00 2001 From: Alexandru Rosca Date: Mon, 20 Dec 2021 11:52:36 +0000 Subject: [PATCH 32/38] Revert "Merge branch 'feature/ECDCC-500_lookback_window_bugfix' into 'release/32'" This reverts merge request !10 --- tap_mambu/sync.py | 30 +++++++++++++++++++----------- tap_mambu/transform.py | 18 ------------------ 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/tap_mambu/sync.py b/tap_mambu/sync.py index b6951be..2e3d741 100644 --- a/tap_mambu/sync.py +++ b/tap_mambu/sync.py @@ -1,14 +1,16 @@ +from _datetime import timedelta +from datetime import datetime + import singer from singer import Transformer, metadata, metrics, utils from singer.utils import strftime, strptime_to_utc from tap_mambu.tap_mambu_refactor import sync_endpoint_refactor -from tap_mambu.transform import transform_json, transform_activities, transform_datetime, check_loan_transaction_date +from tap_mambu.transform import transform_json, transform_activities LOGGER = singer.get_logger() LOOKBACK_DEFAULT = 14 - def write_schema(catalog, stream_name): stream = catalog.get_stream(stream_name) schema = stream.schema.to_dict() @@ -52,6 +54,12 @@ def write_bookmark(state, stream, sub_type, value): singer.write_state(state) +def transform_datetime(this_dttm): + with Transformer() as transformer: + new_dttm = transformer._transform_datetime(this_dttm) + return new_dttm + + def process_records(catalog, #pylint: disable=too-many-branches stream_name, records, @@ -129,8 +137,7 @@ def sync_endpoint(client, #pylint: disable=too-many-branches id_fields=None, parent=None, parent_id=None, - apikey_type=None, - **kwargs): + apikey_type=None): # Get the latest bookmark for the stream and set the last_integer/datetime @@ -145,9 +152,6 @@ def sync_endpoint(client, #pylint: disable=too-many-branches audit_trail_bookmark = get_bookmark(state, stream_name, sub_type, [start_date, 0]) last_datetime, number_last_occurrence = audit_trail_bookmark if type(audit_trail_bookmark) == list \ else (audit_trail_bookmark, 0) - elif stream_name == 'loan_transactions': - last_datetime = check_loan_transaction_date(kwargs.get('lookback_window', LOOKBACK_DEFAULT) or LOOKBACK_DEFAULT, - get_bookmark(state, stream_name, sub_type, start_date)) else: last_datetime = get_bookmark(state, stream_name, sub_type, start_date) max_bookmark_value = last_datetime @@ -396,8 +400,9 @@ def sync(client, config, catalog, state): # LOGGER.info('deposit_transactions bookmark_date = {}'.format(deposit_transactions_dt_str)) loan_transactions_dttm_str = get_bookmark(state, 'loan_transactions', 'self', start_date) - loan_transactions_dt_str = check_loan_transaction_date(int(config.get('lookback_window', LOOKBACK_DEFAULT)), - loan_transactions_dttm_str)[:10] + loan_transactions_dt_str = transform_datetime(loan_transactions_dttm_str)[:10] + loan_transactions_dttm = strptime_to_utc(loan_transactions_dt_str) + clients_dttm_str = get_bookmark(state, 'clients', 'self', start_date) clients_dt_str = transform_datetime(clients_dttm_str)[:10] @@ -410,6 +415,10 @@ def sync(client, config, catalog, state): deposit_accounts_dttm_str = get_bookmark(state, 'deposit_accounts', 'self', start_date) deposit_accounts_dt_str = transform_datetime(deposit_accounts_dttm_str)[:10] + lookback_days = int(config.get('lookback_window', LOOKBACK_DEFAULT)) + lookback_date = utils.now() - timedelta(lookback_days) + if loan_transactions_dttm > lookback_date: + loan_transactions_dt_str = transform_datetime(strftime(lookback_date))[:10] # LOGGER.info('loan_transactions bookmark_date = {}'.format(loan_transactions_dt_str)) # endpoints: API URL endpoints to be called @@ -920,8 +929,7 @@ def sync(client, config, catalog, state): data_key=endpoint_config.get('data_key', None), body=endpoint_config.get('body', None), id_fields=endpoint_config.get('id_fields'), - apikey_type=endpoint_config.get('apikey_type', None), - lookback_window=config.get('lookback_window', None) + apikey_type=endpoint_config.get('apikey_type', None) ) update_currently_syncing(state, None) diff --git a/tap_mambu/transform.py b/tap_mambu/transform.py index 201fdc4..b2939a0 100644 --- a/tap_mambu/transform.py +++ b/tap_mambu/transform.py @@ -1,8 +1,4 @@ import re -from _datetime import timedelta -from singer import Transformer -from singer.utils import strftime, strptime_to_utc, now - # Convert camelCase to snake_case def convert(name): @@ -87,17 +83,3 @@ def transform_activities(this_json): record[key] = value del record['activity'] return this_json - - -def transform_datetime(this_dttm): - with Transformer() as transformer: - new_dttm = transformer._transform_datetime(this_dttm) - return new_dttm - - -def check_loan_transaction_date(lookback_window, dttm_str): - dttm_str = transform_datetime(dttm_str) - lookback_dt = now() - timedelta(lookback_window) - if strptime_to_utc(dttm_str) > lookback_dt: - return transform_datetime(strftime(lookback_dt)[:10]) - return dttm_str From b4dec658fab24f9421e01e63ab1571aee22ab316 Mon Sep 17 00:00:00 2001 From: DownstreamDataTeam Date: Wed, 5 Jan 2022 13:42:26 +0200 Subject: [PATCH 33/38] Started the loan_repayments refactor --- .../TapGenerators/generator.py | 6 ++++-- .../TapProcessors/loan_accounts_processor.py | 17 ++++++++++++++++- .../TapProcessors/processor.py | 4 ++++ tap_mambu/tap_mambu_refactor/__init__.py | 18 ++++++++++-------- 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py index 99ff325..e2504b9 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/generator.py @@ -6,12 +6,13 @@ class TapGenerator(ABC): - def __init__(self, stream_name, client, config, state, sub_type): + def __init__(self, stream_name, client, config, state, sub_type, endpoint_config=None): self.stream_name = stream_name self.client = client self.config = config self.state = state self.sub_type = sub_type + self.endpoint_config = endpoint_config self._init_config() self._init_endpoint_config() self._init_buffers() @@ -22,7 +23,8 @@ def _init_config(self): self.start_date = self.config.get('start_date') def _init_endpoint_config(self): - self.endpoint_config = {} + if self.endpoint_config is None: + self.endpoint_config = {} def _init_buffers(self): self.buffer: List = list() diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py index 15e51fd..590c0de 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py @@ -2,4 +2,19 @@ class LoanAccountsProcessor(TapProcessor): - pass + def __process_child_records(self, record): + super().__process_child_records(self, record) + + from .. import sync_endpoint_refactor + children_configs = self.generators[0].endpoint_config.get('children', {}) + for child_config_key in children_configs: + child_config = children_configs[child_config_key] + child_config = dict(child_config) + child_config.get('path').format(record['id']) + sync_endpoint_refactor(client=self.generators[0].client, + catalog=self.catalog, + state=self.generators[0].state, + stream_name=child_config_key, + sub_type='', + config=self.generators[0].config, + endpoint_config=child_config) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index f4511b8..c20dfcf 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -51,6 +51,7 @@ def process_streams_from_generators(self, generators): record = self.generator_values[min_record_key] self.process_record(record, min_record_key.time_extracted) record_count += 1 + record_count += self.__process_child_records(record) # Remove any record with the same deduplication_key from the list # (so we don't process the same record twice) @@ -61,6 +62,9 @@ def process_streams_from_generators(self, generators): self.generators[0].write_bookmark() return record_count + def __process_child_records(self, record): + return 0 + def __is_record_past_bookmark(self, transformed_record): is_record_past_bookmark = False bookmark_type = self.generators[0].endpoint_config.get('bookmark_type') diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index ca49828..4e136e4 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -2,26 +2,28 @@ from .TapGenerators.generator import TapGenerator from .TapGenerators.loan_accounts_generator import LoanAccountsADGenerator, LoanAccountsLMGenerator +from .TapGenerators.loan_repayments_generator import LoanRepaymentsGenerator from .TapProcessors.loan_accounts_processor import LoanAccountsProcessor +from .TapProcessors.loan_repayments_processor import LoanRepaymentsProcessor from .TapProcessors.processor import TapProcessor - LOGGER = singer.get_logger() - stream_generator_processor_dict = { - "loan_accounts": ((LoanAccountsLMGenerator, LoanAccountsADGenerator), LoanAccountsProcessor) + "loan_accounts": ((LoanAccountsLMGenerator, LoanAccountsADGenerator), LoanAccountsProcessor), + "loan_repayments": ((LoanRepaymentsGenerator,), LoanRepaymentsProcessor) } def sync_endpoint_refactor(client, catalog, state, - stream_name, sub_type, config): + stream_name, sub_type, config, endpoint_config=None): generator_classes, processor_class = stream_generator_processor_dict[stream_name] generators = [generator_class(stream_name=stream_name, - client=client, - config=config, - state=state, - sub_type=sub_type) + client=client, + config=config, + state=state, + sub_type=sub_type, + endpoint_config=endpoint_config) for generator_class in generator_classes] processor = processor_class(catalog=catalog, stream_name=stream_name) From a9a4717cca6da0d19170899865cc1f0c4315132a Mon Sep 17 00:00:00 2001 From: DownstreamDataTeam Date: Wed, 5 Jan 2022 14:55:17 +0200 Subject: [PATCH 34/38] Finished the refactor --- .../TapGenerators/loan_accounts_generator.py | 2 +- .../TapProcessors/loan_accounts_processor.py | 23 ++++++++++--------- .../TapProcessors/processor.py | 9 ++++---- tap_mambu/tap_mambu_refactor/__init__.py | 2 -- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index 8abae82..44e8ccc 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -1,6 +1,6 @@ import abc -from .. import TapGenerator +from .generator import TapGenerator from ..Helpers import transform_datetime, get_bookmark diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py index 590c0de..132fd8f 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py @@ -2,19 +2,20 @@ class LoanAccountsProcessor(TapProcessor): - def __process_child_records(self, record): - super().__process_child_records(self, record) - + def _process_child_records(self, record): from .. import sync_endpoint_refactor + + total_records = super()._process_child_records(record) children_configs = self.generators[0].endpoint_config.get('children', {}) for child_config_key in children_configs: child_config = children_configs[child_config_key] child_config = dict(child_config) - child_config.get('path').format(record['id']) - sync_endpoint_refactor(client=self.generators[0].client, - catalog=self.catalog, - state=self.generators[0].state, - stream_name=child_config_key, - sub_type='', - config=self.generators[0].config, - endpoint_config=child_config) + child_config['path'] = child_config['path'].format(str(record['id'])) + total_records += sync_endpoint_refactor(client=self.generators[0].client, + catalog=self.catalog, + state=self.generators[0].state, + stream_name=child_config_key, + sub_type='', + config=self.generators[0].config, + endpoint_config=child_config) + return total_records diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index c20dfcf..c3bf8d7 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -4,7 +4,6 @@ from singer.utils import strptime_to_utc from ..Helpers import transform_datetime, convert -from ..TapGenerators.generator import TapGenerator class TapProcessor(ABC): @@ -39,7 +38,7 @@ def process_streams_from_generators(self, generators): if not self.generator_values: break # Find lowest value in the list - min_record_key: TapGenerator = None + min_record_key = None min_record_value = None for iterator in self.generator_values: if min_record_value is None \ @@ -51,7 +50,7 @@ def process_streams_from_generators(self, generators): record = self.generator_values[min_record_key] self.process_record(record, min_record_key.time_extracted) record_count += 1 - record_count += self.__process_child_records(record) + record_count += self._process_child_records(record) # Remove any record with the same deduplication_key from the list # (so we don't process the same record twice) @@ -62,13 +61,13 @@ def process_streams_from_generators(self, generators): self.generators[0].write_bookmark() return record_count - def __process_child_records(self, record): + def _process_child_records(self, record): return 0 def __is_record_past_bookmark(self, transformed_record): is_record_past_bookmark = False bookmark_type = self.generators[0].endpoint_config.get('bookmark_type') - bookmark_field = convert(self.generators[0].endpoint_config.get('bookmark_field')) + bookmark_field = convert(self.generators[0].endpoint_config.get('bookmark_field', '')) # Reset max_bookmark_value to new value if higher if bookmark_field and (bookmark_field in transformed_record): diff --git a/tap_mambu/tap_mambu_refactor/__init__.py b/tap_mambu/tap_mambu_refactor/__init__.py index 4e136e4..915a221 100644 --- a/tap_mambu/tap_mambu_refactor/__init__.py +++ b/tap_mambu/tap_mambu_refactor/__init__.py @@ -1,11 +1,9 @@ import singer -from .TapGenerators.generator import TapGenerator from .TapGenerators.loan_accounts_generator import LoanAccountsADGenerator, LoanAccountsLMGenerator from .TapGenerators.loan_repayments_generator import LoanRepaymentsGenerator from .TapProcessors.loan_accounts_processor import LoanAccountsProcessor from .TapProcessors.loan_repayments_processor import LoanRepaymentsProcessor -from .TapProcessors.processor import TapProcessor LOGGER = singer.get_logger() From 776292157712fa300859eb8773f55ee3ba250e74 Mon Sep 17 00:00:00 2001 From: DownstreamDataTeam Date: Tue, 11 Jan 2022 17:55:02 +0200 Subject: [PATCH 35/38] Fixed missing deduplication_key error --- .../TapProcessors/loan_accounts_processor.py | 2 +- tap_mambu/tap_mambu_refactor/TapProcessors/processor.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py index 132fd8f..aaa0b41 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py @@ -15,7 +15,7 @@ def _process_child_records(self, record): catalog=self.catalog, state=self.generators[0].state, stream_name=child_config_key, - sub_type='', + sub_type='self', config=self.generators[0].config, endpoint_config=child_config) return total_records diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index c3bf8d7..2fcab15 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -12,7 +12,6 @@ def __init__(self, catalog, stream_name): self.generator_values = dict() self.catalog = catalog self.stream_name = stream_name - self.deduplication_key = "encoded_key" # To be replaced with 'id_fields' from endpoint config self.stream = self.catalog.get_stream(stream_name) self.schema = self.stream.schema.to_dict() self.stream_metadata = metadata.to_map(self.stream.metadata) @@ -25,6 +24,8 @@ def write_schema(self): def process_streams_from_generators(self, generators): self.generators = generators self.write_schema() + id_fields = self.generators[0].endpoint_config['id_fields'] + deduplication_key = 'id' if 'id' in id_fields else id_fields[0] record_count = 0 for generator in self.generators: self.generator_values[iter(generator)] = None @@ -42,9 +43,9 @@ def process_streams_from_generators(self, generators): min_record_value = None for iterator in self.generator_values: if min_record_value is None \ - or min_record_value > self.generator_values[iterator][self.deduplication_key]: + or min_record_value > self.generator_values[iterator][deduplication_key]: min_record_key = iterator - min_record_value = self.generator_values[iterator][self.deduplication_key] + min_record_value = self.generator_values[iterator][deduplication_key] # Process the record record = self.generator_values[min_record_key] @@ -55,7 +56,7 @@ def process_streams_from_generators(self, generators): # Remove any record with the same deduplication_key from the list # (so we don't process the same record twice) for iterator in self.generator_values.keys(): - if min_record_value == self.generator_values[iterator][self.deduplication_key]: + if min_record_value == self.generator_values[iterator][deduplication_key]: self.generator_values[iterator] = None self.generators[0].write_bookmark() From cd8d18a410e747c3832f0e0dff923606e6713187 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Fri, 14 Jan 2022 11:09:33 +0200 Subject: [PATCH 36/38] Fixed up loan repayments refactor --- .../tap_mambu_refactor/Helpers/__init__.py | 13 ++++++++++- .../TapGenerators/loan_accounts_generator.py | 2 +- .../loan_repayments_generator.py | 5 ++++ .../TapProcessors/loan_accounts_processor.py | 22 ++++++++++-------- .../loan_repayments_processor.py | 5 ++++ .../TapProcessors/processor.py | 23 ++++++++++++------- 6 files changed, 50 insertions(+), 20 deletions(-) create mode 100644 tap_mambu/tap_mambu_refactor/TapGenerators/loan_repayments_generator.py create mode 100644 tap_mambu/tap_mambu_refactor/TapProcessors/loan_repayments_processor.py diff --git a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py index e582a91..842a356 100644 --- a/tap_mambu/tap_mambu_refactor/Helpers/__init__.py +++ b/tap_mambu/tap_mambu_refactor/Helpers/__init__.py @@ -1,5 +1,5 @@ import re -from singer import write_state, Transformer +from singer import write_state, Transformer, metadata def get_bookmark(state, stream, sub_type, default): @@ -115,3 +115,14 @@ def transform_activities(this_json): record[key] = value del record['activity'] return this_json + + +# Review catalog and make a list of selected streams +def get_selected_streams(catalog): + selected_streams = set() + for stream in catalog.streams: + mdata = metadata.to_map(stream.metadata) + root_metadata = mdata.get(()) + if root_metadata and root_metadata.get('selected') is True: + selected_streams.add(stream.tap_stream_id) + return list(selected_streams) diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py index 44e8ccc..2af80ad 100644 --- a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_accounts_generator.py @@ -17,7 +17,7 @@ def _init_endpoint_config(self): }, 'body': { "sortingCriteria": { - "field": "encodedKey", + "field": "id", "order": "ASC" }, "filterCriteria": [ diff --git a/tap_mambu/tap_mambu_refactor/TapGenerators/loan_repayments_generator.py b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_repayments_generator.py new file mode 100644 index 0000000..38b78fd --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/TapGenerators/loan_repayments_generator.py @@ -0,0 +1,5 @@ +from .generator import TapGenerator + + +class LoanRepaymentsGenerator(TapGenerator): + pass diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py index aaa0b41..ee8a210 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_accounts_processor.py @@ -1,4 +1,5 @@ from .processor import TapProcessor +from ..Helpers import get_selected_streams class LoanAccountsProcessor(TapProcessor): @@ -8,14 +9,15 @@ def _process_child_records(self, record): total_records = super()._process_child_records(record) children_configs = self.generators[0].endpoint_config.get('children', {}) for child_config_key in children_configs: - child_config = children_configs[child_config_key] - child_config = dict(child_config) - child_config['path'] = child_config['path'].format(str(record['id'])) - total_records += sync_endpoint_refactor(client=self.generators[0].client, - catalog=self.catalog, - state=self.generators[0].state, - stream_name=child_config_key, - sub_type='self', - config=self.generators[0].config, - endpoint_config=child_config) + if child_config_key in get_selected_streams(self.catalog): + child_config = children_configs[child_config_key] + child_config = dict(child_config) + child_config['path'] = child_config['path'].format(str(record['id'])) + total_records += sync_endpoint_refactor(client=self.generators[0].client, + catalog=self.catalog, + state=self.generators[0].state, + stream_name=child_config_key, + sub_type='self', + config=self.generators[0].config, + endpoint_config=child_config) return total_records diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/loan_repayments_processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_repayments_processor.py new file mode 100644 index 0000000..19be22b --- /dev/null +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/loan_repayments_processor.py @@ -0,0 +1,5 @@ +from .processor import TapProcessor + + +class LoanRepaymentsProcessor(TapProcessor): + pass diff --git a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py index 2fcab15..b46395d 100644 --- a/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py +++ b/tap_mambu/tap_mambu_refactor/TapProcessors/processor.py @@ -15,17 +15,22 @@ def __init__(self, catalog, stream_name): self.stream = self.catalog.get_stream(stream_name) self.schema = self.stream.schema.to_dict() self.stream_metadata = metadata.to_map(self.stream.metadata) + self.deduplication_key = "id" def write_schema(self): stream = self.catalog.get_stream(self.stream_name) schema = stream.schema.to_dict() write_schema(self.stream_name, schema, stream.key_properties) + def configure_processor_for_generators(self): + id_fields = self.generators[0].endpoint_config['id_fields'] + if "id" not in id_fields: + self.deduplication_key = id_fields[0] + def process_streams_from_generators(self, generators): self.generators = generators + self.configure_processor_for_generators() self.write_schema() - id_fields = self.generators[0].endpoint_config['id_fields'] - deduplication_key = 'id' if 'id' in id_fields else id_fields[0] record_count = 0 for generator in self.generators: self.generator_values[iter(generator)] = None @@ -43,20 +48,20 @@ def process_streams_from_generators(self, generators): min_record_value = None for iterator in self.generator_values: if min_record_value is None \ - or min_record_value > self.generator_values[iterator][deduplication_key]: + or min_record_value > self.generator_values[iterator][self.deduplication_key]: min_record_key = iterator - min_record_value = self.generator_values[iterator][deduplication_key] + min_record_value = self.generator_values[iterator][self.deduplication_key] # Process the record record = self.generator_values[min_record_key] - self.process_record(record, min_record_key.time_extracted) - record_count += 1 - record_count += self._process_child_records(record) + if self.process_record(record, min_record_key.time_extracted): + record_count += 1 + record_count += self._process_child_records(record) # Remove any record with the same deduplication_key from the list # (so we don't process the same record twice) for iterator in self.generator_values.keys(): - if min_record_value == self.generator_values[iterator][deduplication_key]: + if min_record_value == self.generator_values[iterator][self.deduplication_key]: self.generator_values[iterator] = None self.generators[0].write_bookmark() @@ -106,3 +111,5 @@ def process_record(self, record, time_extracted): write_record(self.stream_name, transformed_record, time_extracted=time_extracted) + return True + return False From 96475ee86512c0861b764291f18fa454307425ee Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Tue, 18 Jan 2022 12:27:08 +0200 Subject: [PATCH 37/38] Added gitlab-ci config --- .gitlab-ci.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..d1d70b5 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,33 @@ +include: + - template: Code-Quality.gitlab-ci.yml + +image: + python:3.10-bullseye + +stages: + - test + +workflow: + rules: + - if: $CI_PIPELINE_SOURCE == 'merge_request_event' + +unit-test-job: + stage: test + before_script: + - python3 -m pip install --upgrade -r requirements.txt + - python3 -m pip install --upgrade -r requirements_ci.txt + script: + - echo "Running unit tests..." + - coverage run --source=tap_mambu/tap_mambu_refactor -m pytest tests/refactor_unittests + - coverage json + - python3 -c "import json; coverage_json = json.loads(open('coverage.json', 'r').read()); print('Coverage is', round(coverage_json['totals']['percent_covered'], 2))" + +lint-test-job: + stage: test + before_script: + - python3 -m pip install --upgrade -r requirements.txt + - python3 -m pip install --upgrade -r requirements_ci.txt + script: + - echo "Linting code..." + - pylint tap_mambu/tap_mambu_refactor --exit-zero + - pylint tap_mambu/tap_mambu_refactor -d C -d R -d W || true From 20632f38b67db1cf3f4a739a1c4a6e9038ca72e4 Mon Sep 17 00:00:00 2001 From: Radu Marinoiu Date: Tue, 18 Jan 2022 12:30:44 +0200 Subject: [PATCH 38/38] Revert "Added gitlab-ci config" This reverts commit 96475ee86512c0861b764291f18fa454307425ee. --- .gitlab-ci.yml | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index d1d70b5..0000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,33 +0,0 @@ -include: - - template: Code-Quality.gitlab-ci.yml - -image: - python:3.10-bullseye - -stages: - - test - -workflow: - rules: - - if: $CI_PIPELINE_SOURCE == 'merge_request_event' - -unit-test-job: - stage: test - before_script: - - python3 -m pip install --upgrade -r requirements.txt - - python3 -m pip install --upgrade -r requirements_ci.txt - script: - - echo "Running unit tests..." - - coverage run --source=tap_mambu/tap_mambu_refactor -m pytest tests/refactor_unittests - - coverage json - - python3 -c "import json; coverage_json = json.loads(open('coverage.json', 'r').read()); print('Coverage is', round(coverage_json['totals']['percent_covered'], 2))" - -lint-test-job: - stage: test - before_script: - - python3 -m pip install --upgrade -r requirements.txt - - python3 -m pip install --upgrade -r requirements_ci.txt - script: - - echo "Linting code..." - - pylint tap_mambu/tap_mambu_refactor --exit-zero - - pylint tap_mambu/tap_mambu_refactor -d C -d R -d W || true