-
Notifications
You must be signed in to change notification settings - Fork 301
/
task.py
87 lines (75 loc) · 3.02 KB
/
task.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from dataclasses import dataclass
from typing import Dict, Optional, Type
from flytekit.configuration import SerializationSettings
from flytekit.extend import SQLTask
from flytekit.models import task as _task_model
from flytekit.types.schema import FlyteSchema
_ACCOUNT_FIELD = "account"
_DATABASE_FIELD = "database"
_SCHEMA_FIELD = "schema"
_WAREHOUSE_FIELD = "warehouse"
@dataclass
class SnowflakeConfig(object):
"""
SnowflakeConfig should be used to configure a Snowflake Task.
"""
# The account to query against
account: Optional[str] = None
# The database to query against
database: Optional[str] = None
# The optional schema to separate query execution.
schema: Optional[str] = None
# The optional warehouse to set for the given Snowflake query
warehouse: Optional[str] = None
class SnowflakeTask(SQLTask[SnowflakeConfig]):
"""
This is the simplest form of a Snowflake Task, that can be used even for tasks that do not produce any output.
"""
# This task is executed using the snowflake handler in the backend.
_TASK_TYPE = "snowflake"
def __init__(
self,
name: str,
query_template: str,
task_config: Optional[SnowflakeConfig] = None,
inputs: Optional[Dict[str, Type]] = None,
output_schema_type: Optional[Type[FlyteSchema]] = None,
**kwargs,
):
"""
To be used to query Snowflake databases.
:param name: Name of this task, should be unique in the project
:param query_template: The actual query to run. We use Flyte's Golang templating format for Query templating.
Refer to the templating documentation
:param task_config: SnowflakeConfig object
:param inputs: Name and type of inputs specified as an ordered dictionary
:param output_schema_type: If some data is produced by this query, then you can specify the output schema type
:param kwargs: All other args required by Parent type - SQLTask
"""
outputs = None
if output_schema_type is not None:
outputs = {
"results": output_schema_type,
}
if task_config is None:
task_config = SnowflakeConfig()
super().__init__(
name=name,
task_config=task_config,
query_template=query_template,
inputs=inputs,
outputs=outputs,
task_type=self._TASK_TYPE,
**kwargs,
)
self._output_schema_type = output_schema_type
def get_config(self, settings: SerializationSettings) -> Dict[str, str]:
return {
_ACCOUNT_FIELD: self.task_config.account,
_DATABASE_FIELD: self.task_config.database,
_SCHEMA_FIELD: self.task_config.schema,
_WAREHOUSE_FIELD: self.task_config.warehouse,
}
def get_sql(self, settings: SerializationSettings) -> Optional[_task_model.Sql]:
sql = _task_model.Sql(statement=self.query_template, dialect=_task_model.Sql.Dialect.ANSI)
return sql