Skip to content

Commit

Permalink
Add FS Mapper
Browse files Browse the repository at this point in the history
  • Loading branch information
Kamil Breguła committed Apr 15, 2019
1 parent 44c3ce0 commit 95acf35
Show file tree
Hide file tree
Showing 16 changed files with 769 additions and 11 deletions.
2 changes: 2 additions & 0 deletions oozie-to-airflow/converter/mappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from mappers.decision_mapper import DecisionMapper
from mappers.dummy_mapper import DummyMapper
from mappers.end_mapper import EndMapper
from mappers.fs_mapper import FsMapper
from mappers.kill_mapper import KillMapper
from mappers.pig_mapper import PigMapper
from mappers.shell_mapper import ShellMapper
Expand All @@ -49,6 +50,7 @@
"ssh": SSHMapper,
"spark": SparkMapper,
"pig": PigMapper,
"fs": FsMapper,
"sub-workflow": SubworkflowMapper,
"shell": ShellMapper,
}
1 change: 1 addition & 0 deletions oozie-to-airflow/converter/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,5 @@ def __init__(self, input_directory_path, output_directory_path, dag_name=None) -
"import datetime",
"from airflow import models",
"from airflow.utils.trigger_rule import TriggerRule",
"from airflow.utils import dates",
}
16 changes: 16 additions & 0 deletions oozie-to-airflow/examples/fs/configuration-template.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

dataproc_cluster=CLUSTER-NAME
gcp_region=europe-west3
16 changes: 16 additions & 0 deletions oozie-to-airflow/examples/fs/configuration.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

dataproc_cluster=cluster-o2a
gcp_region=europe-west3
20 changes: 20 additions & 0 deletions oozie-to-airflow/examples/fs/job.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

nameNode=hdfs://localhost:8020
resourceManager=localhost:8032
queueName=default
examplesRoot=examples

oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/shell
98 changes: 98 additions & 0 deletions oozie-to-airflow/examples/fs/workflow.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
<!--
Copyright 2019 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<workflow-app xmlns="uri:oozie:workflow:1.0" name="fs-wf">
<start to="fs-node"/>
<fork name="fs-node">
<path start="mkdir"/>
<path start="delete"/>
<path start="move"/>
<path start="chmod"/>
<path start="touchz"/>
<path start="chgrp"/>
<path start="setrep"/>
</fork>
<action name="mkdir">
<fs>
<mkdir path='${nameNode}/user/pig/test-fs/test-mkdir-1'/>
</fs>
<ok to="end"/>
<error to="fail"/>
</action>
<action name="delete">
<fs>
<mkdir path='${nameNode}/user/pig/test-fs/test-delete-1'/>
<mkdir path='${nameNode}/user/pig/test-fs/test-delete-2'/>
<mkdir path='${nameNode}/user/pig/test-fs/test-delete-3'/>
<delete path='${nameNode}/user/pig/test-fs/test-delete-1' skip-trash='true'/>
<delete path='${nameNode}/user/pig/test-fs/test-delete-2' skip-trash='false'/>
<delete path='${nameNode}/user/pig/test-fs/test-delete-3'/>
</fs>
<ok to="end"/>
<error to="fail"/>
</action>
<action name="move">
<fs>
<mkdir path='${nameNode}/user/pig/test-fs/test-move-1'/>
<move source='${nameNode}/user/pig/test-fs/test-move-1' target='/home/pig/test-fs/test-move-2' />
</fs>
<ok to="end"/>
<error to="fail"/>
</action>
<action name="chmod">
<fs>
<mkdir path='${nameNode}/user/pig/test-fs/test-chmod-1'/>
<mkdir path='${nameNode}/user/pig/test-fs/test-chmod-2'/>
<mkdir path='${nameNode}/user/pig/test-fs/test-chmod-3'/>
<mkdir path='${nameNode}/user/pig/test-fs/test-chmod-4'/>
<chmod path='${nameNode}/user/pig/test-fs/test-chmod-1' permissions='777' dir-files='false' />
<chmod path='${nameNode}/user/pig/test-fs/test-chmod-2' permissions='777' dir-files='true' />
<chmod path='${nameNode}/user/pig/test-fs/test-chmod-3' permissions='777' />
<chmod path='${nameNode}/user/pig/test-fs/test-chmod-4' permissions='777' dir-files='false' >
<recursive/>
</chmod>
</fs>
<ok to="end"/>
<error to="fail"/>
</action>
<action name="touchz">
<fs>
<touchz path='${nameNode}/user/pig/test-fs/test-touchz-1' />
</fs>
<ok to="end"/>
<error to="fail"/>
</action>
<action name="chgrp">
<fs>
<mkdir path='${nameNode}/user/pig/test-fs/test-chgrp-1'/>
<chgrp path='${nameNode}/user/pig/test-fs/test-chgrp-1' group='hadoop' />
</fs>
<ok to="end"/>
<error to="fail"/>
</action>
<action name="setrep">
<fs>
<mkdir path='${nameNode}/user/pig/test-fs/test-setrep-1'/>
<setrep path='${nameNode}/user/pig/test-fs/test-setrep-1' replication-factor='2'/>
</fs>
<ok to="end"/>
<error to="fail"/>
</action>
<kill name="fail">
<message>Fs workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name="end"/>
</workflow-app>
39 changes: 39 additions & 0 deletions oozie-to-airflow/integration/run_fs_test_with_airflow.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash
MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
BASE_DIR=${MY_DIR}/..

LOCAL_APPLICATION_DIR=${BASE_DIR}/examples/fs

HADOOP_USER=pig
EXAMPLE_DIR=examples/fs
CLUSTER_MASTER=cluster-o2a-m
CLUSTER_NAME=cluster-o2a

COMPOSER_BUCKET=gs://europe-west1-o2a-integratio-f690ede2-bucket
COMPOSER_NAME=o2a-integration
COMPOSER_LOCATION=europe-west1

DAG_NAME=test_fs_dag

INPTU_DIR=${BASE_DIR}/examples/fs
OUTPUT_DIR=${BASE_DIR}/output/fs_test


if [[ ! -f ${LOCAL_APPLICATION_DIR}/configuration.properties ]]; then
echo
echo "Please copy ${LOCAL_APPLICATION_DIR}/configuration-template.properties to ${LOCAL_APPLICATION_DIR}/configuration.properties and update properties to match your case"
echo
exit 1
fi

python ${BASE_DIR}/o2a.py -i ${INPTU_DIR} -o ${OUTPUT_DIR} -u ${HADOOP_USER} -d ${DAG_NAME} $@

gsutil cp ${BASE_DIR}/scripts/prepare.sh ${COMPOSER_BUCKET}/data/
gsutil cp ${OUTPUT_DIR}/* ${COMPOSER_BUCKET}/dags/

gcloud dataproc jobs submit pig --cluster=${CLUSTER_NAME} --execute 'fs -rm -r /home/pig/test-fs' || true
gcloud dataproc jobs submit pig --cluster=${CLUSTER_NAME} --execute 'fs -mkdir /home/pig/test-fs'


gcloud composer environments run ${COMPOSER_NAME} --location ${COMPOSER_LOCATION} list_dags
gcloud composer environments run ${COMPOSER_NAME} --location ${COMPOSER_LOCATION} trigger_dag -- ${DAG_NAME}
Loading

0 comments on commit 95acf35

Please sign in to comment.