diff --git a/deployment/eks/flyte_generated.yaml b/deployment/eks/flyte_generated.yaml new file mode 100644 index 0000000000..ccaf510eab --- /dev/null +++ b/deployment/eks/flyte_generated.yaml @@ -0,0 +1,942 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: flyte +spec: + finalizers: + - kubernetes +status: + phase: Active +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: flyteworkflows.flyte.lyft.com +spec: + group: flyte.lyft.com + names: + kind: FlyteWorkflow + plural: flyteworkflows + shortNames: + - fly + singular: flyteworkflow + scope: Namespaced + version: v1alpha1 +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::111222333456:role/flyte-operator + name: datacatalog + namespace: flyte +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::111222333456:role/flyte-operator + name: flyteadmin + namespace: flyte +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::111222333456:role/flyte-operator + name: flytepropeller + namespace: flyte +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: flyteadmin + namespace: flyte +rules: +- apiGroups: + - "" + - flyte.lyft.com + - rbac.authorization.k8s.io + resources: + - configmaps + - flyteworkflows + - namespaces + - pods + - resourcequotas + - roles + - rolebindings + - secrets + - services + - serviceaccounts + - spark-role + verbs: + - '*' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: flytepropeller +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - create + - update + - delete + - patch +- apiGroups: + - '*' + resources: + - '*' + verbs: + - get + - list + - watch + - create + - update + - delete + - patch +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - get + - list + - watch + - create + - delete + - update +- apiGroups: + - flyte.lyft.com + resources: + - flyteworkflows + verbs: + - get + - list + - watch + - create + - update + - delete + - patch + - post + - deletecollection +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: flyteadmin-binding + namespace: flyte +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: flyteadmin +subjects: +- kind: ServiceAccount + name: flyteadmin + namespace: flyte +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: flytepropeller + namespace: flyte +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: flytepropeller +subjects: +- kind: ServiceAccount + name: flytepropeller + namespace: flyte +--- +apiVersion: v1 +data: + aa_namespace.yaml: | + apiVersion: v1 + kind: Namespace + metadata: + name: {{ namespace }} + spec: + finalizers: + - kubernetes + ab_project-resource-quota.yaml: "apiVersion: v1\nkind: ResourceQuota\nmetadata:\n + \ name: project-quota\n namespace: {{ namespace }} \nspec:\n hard:\n limits.cpu: + {{ projectQuotaCpu }} \n limits.memory: {{ projectQuotaMemory }}\n\n" +kind: ConfigMap +metadata: + name: clusterresource-template-m2kccc465f + namespace: flyte +--- +apiVersion: v1 +data: + datacatalog_config.yaml: |+ + logger: + show-source: true + level: 5 + datacatalog: + storage-prefix: metadata/datacatalog + metrics-scope: "datacatalog" + profiler-port: 10254 + application: + grpcPort: 8089 + database: + port: 5432 + username: flyteadmin + host: flyteadmin-cluster.cluster-456123e6ivib.us-west-2.rds.amazonaws.com + dbname: flytedatacatalog + password: spongebob + storage: + type: s3 + connection: + auth-type: iam + region: us-west-2 + cache: + max_size_mbs: 64 + target_gc_percent: 70 + container: "flyte-datacatalog" + +kind: ConfigMap +metadata: + name: datacatalog-config-86hg5m74g6 + namespace: flyte +--- +apiVersion: v1 +data: + flyteadmin_config.yaml: | + logger: + show-source: true + level: 5 + server: + httpPort: 8088 + grpcPort: 8089 + security: + secure: false + useAuth: false + allowCors: false + flyteadmin: + roleNameKey: "iam.amazonaws.com/role" + profilerPort: 10254 + metricsScope: "flyte:" + metadataStoragePrefix: + - "metadata" + - "admin" + database: + port: 5432 + username: flyteadmin + host: flyteadmin-cluster.cluster-456123e6ivib.us-west-2.rds.amazonaws.com + dbname: flyteadmin + password: spongebob + remoteData: + region: "us-west-2" + scheme: aws + signedUrls: + durationMinutes: 3 + storage: + type: s3 + connection: + auth-type: iam + region: us-west-2 + cache: + max_size_mbs: 64 + target_gc_percent: 70 + container: "flyte-admin" + task_resources: + defaults: + cpu: 100m + memory: 100Mi + storage: 5Mi + limits: + cpu: 2 + memory: 2G + storage: 20Mi + task_type_whitelist: + spark: + - project: flytekit + - project: flytetester + domains: + - id: development + name: development + - id: staging + name: staging + - id: production + name: production + cluster_resources: + templatePath: "/etc/flyte/clusterresource/templates" + customData: + production: + - projectQuotaCpu: + value: "5" + - projectQuotaMemory: + value: "4000Mi" + staging: + - projectQuotaCpu: + value: "1" + - projectQuotaMemory: + value: "2000Mi" + development: + - projectQuotaCpu: + value: "1" + - projectQuotaMemory: + value: "2000Mi" + refresh: 5m +kind: ConfigMap +metadata: + name: flyte-admin-config-g4cdd4724d + namespace: flyte +--- +apiVersion: v1 +data: + BASE_URL: /console + CONFIG_DIR: /etc/flyte/config +kind: ConfigMap +metadata: + name: flyte-console-config + namespace: flyte +--- +apiVersion: v1 +data: + config.yaml: "" +kind: ConfigMap +metadata: + name: flyte-container-config-5k78b9cm42 + namespace: flyte +--- +apiVersion: v1 +data: + config.yaml: | + plugins: + logs: + kubernetes-enabled: true + kubernetes-url: "http://localhost:30082" + k8s: + default-env-vars: + - AWS_RETRY_MODE: standard + - AWS_METADATA_SERVICE_TIMEOUT: 5 + - AWS_METADATA_SERVICE_NUM_ATTEMPTS: 20 +kind: ConfigMap +metadata: + name: flyte-plugin-config-dh9hm6bf64 + namespace: flyte +--- +apiVersion: v1 +data: + config.yaml: | + propeller: + rawoutput-prefix: s3://flyte-outputs + metadata-prefix: propeller/eks + workers: 4 + max-workflow-retries: 30 + workflow-reeval-duration: 30s + downstream-eval-duration: 30s + limit-namespace: "all" + prof-port: 10254 + metrics-prefix: flyte + enable-admin-launcher: true + leader-election: + lock-config-map: + name: propeller-leader + namespace: flyte + enabled: true + lease-duration: 15s + renew-deadline: 10s + retry-period: 2s + queue: + type: batch + batching-interval: 2s + batch-size: -1 + queue: + type: bucket + rate: 10 + capacity: 100 + sub-queue: + type: bucket + rate: 10 + capacity: 100 + resourcemanager: + type: redis + resourceMaxQuota: 10000 + redis: + hostPath: redis-resource-manager.flyte:6379 + hostKey: mypassword + logger: + show-source: true + level: 5 + storage: + type: s3 + connection: + auth-type: iam + region: us-west-2 + cache: + max_size_mbs: 1024 + target_gc_percent: 70 + container: "flyte-metadata" + limits: + maxDownloadMBs: 10 + event: + type: admin + rate: 500 + capacity: 1000 + admin: + endpoint: flyteadmin:81 + insecure: true + catalog-cache: + endpoint: datacatalog:89 + type: datacatalog + insecure: true + tasks: + task-plugins: + enabled-plugins: + - container + - sidecar + - k8s-array +kind: ConfigMap +metadata: + name: flyte-propeller-config-f55btmdgmg + namespace: flyte +--- +apiVersion: v1 +data: + config.yaml: | + plugins: + spark: + spark-config-default: + - spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version: "2" + - spark.kubernetes.allocation.batch.size: "50" + - spark.hadoop.fs.s3a.acl.default: "BucketOwnerFullControl" + - spark.hadoop.fs.s3n.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3n.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3a.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.multipart.threshold: "536870912" + - spark.blacklist.enabled: "true" + - spark.blacklist.timeout: "5m" + - spark.task.maxfailures: "8" +kind: ConfigMap +metadata: + name: flyte-spark-config-h72bmtggc2 + namespace: flyte +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + contour.heptio.com/upstream-protocol.h2c: grpc + name: datacatalog + namespace: flyte +spec: + ports: + - name: http + port: 88 + protocol: TCP + targetPort: 8088 + - name: grpc + port: 89 + protocol: TCP + targetPort: 8089 + selector: + app: datacatalog + type: NodePort +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + contour.heptio.com/upstream-protocol.h2c: grpc + service.beta.kubernetes.io/aws-load-balancer-internal: "true" + name: flyteadmin + namespace: flyte +spec: + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 8088 + - name: grpc + port: 81 + protocol: TCP + targetPort: 8089 + - name: redoc + port: 87 + protocol: TCP + targetPort: 8087 + selector: + app: flyteadmin + type: LoadBalancer +--- +apiVersion: v1 +kind: Service +metadata: + name: flyteconsole + namespace: flyte +spec: + ports: + - port: 80 + protocol: TCP + targetPort: 8080 + selector: + app: flyteconsole + type: NodePort +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: redis-resource-manager + name: redis-resource-manager + namespace: flyte +spec: + ports: + - name: redis + port: 6379 + protocol: TCP + targetPort: redis + selector: + app: redis-resource-manager + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: datacatalog + name: datacatalog + namespace: flyte +spec: + replicas: 1 + selector: + matchLabels: + app: datacatalog + template: + metadata: + annotations: + prometheus.io/path: /metrics + prometheus.io/port: "10254" + prometheus.io/scrape: "true" + labels: + app: datacatalog + app.kubernetes.io/name: datacatalog + app.kubernetes.io/version: 0.2.1 + spec: + containers: + - command: + - datacatalog + - --logtostderr + - --config + - /etc/datacatalog/config/datacatalog_config.yaml + - serve + image: docker.io/lyft/datacatalog:0.2.1 + imagePullPolicy: IfNotPresent + name: datacatalog + ports: + - containerPort: 8088 + - containerPort: 8089 + resources: + limits: + cpu: "0.1" + ephemeral-storage: 100Mi + memory: 200Mi + volumeMounts: + - mountPath: /etc/datacatalog/config + name: config-volume + initContainers: + - command: + - datacatalog + - --logtostderr + - --config + - /etc/datacatalog/config/datacatalog_config.yaml + - migrate + - run + image: docker.io/lyft/datacatalog:0.2.1 + imagePullPolicy: IfNotPresent + name: run-migrations + volumeMounts: + - mountPath: /etc/datacatalog/config + name: config-volume + serviceAccountName: datacatalog + volumes: + - emptyDir: {} + name: shared-data + - configMap: + name: datacatalog-config-86hg5m74g6 + name: config-volume +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: flyteadmin + name: flyteadmin + namespace: flyte +spec: + replicas: 1 + selector: + matchLabels: + app: flyteadmin + template: + metadata: + annotations: + prometheus.io/path: /metrics + prometheus.io/port: "10254" + prometheus.io/scrape: "true" + labels: + app: flyteadmin + app.kubernetes.io/name: flyteadmin + app.kubernetes.io/version: 0.2.6 + spec: + containers: + - command: + - flyteadmin + - --logtostderr + - --config + - /etc/flyte/config/flyteadmin_config.yaml + - serve + image: docker.io/lyft/flyteadmin:v0.2.6 + imagePullPolicy: IfNotPresent + name: flyteadmin + ports: + - containerPort: 8088 + - containerPort: 8089 + resources: + limits: + cpu: "0.1" + ephemeral-storage: 100Mi + memory: 200Mi + volumeMounts: + - mountPath: /srv/flyte + name: shared-data + - mountPath: /etc/flyte/config + name: config-volume + - command: + - sh + - -c + - ln -s /usr/share/nginx/html /usr/share/nginx/html/openapi && sh /usr/local/bin/docker-run.sh + env: + - name: PAGE_TITLE + value: Flyte Admin OpenAPI + - name: SPEC_URL + value: /api/v1/openapi + - name: PORT + value: "8087" + image: docker.io/redocly/redoc + imagePullPolicy: IfNotPresent + name: redoc + ports: + - containerPort: 8087 + resources: + limits: + cpu: "0.1" + memory: 200Mi + initContainers: + - command: + - flyteadmin + - --logtostderr + - --config + - /etc/flyte/config/flyteadmin_config.yaml + - migrate + - run + image: docker.io/lyft/flyteadmin:v0.2.6 + imagePullPolicy: IfNotPresent + name: run-migrations + volumeMounts: + - mountPath: /etc/flyte/config + name: config-volume + - command: + - flyteadmin + - --logtostderr + - --config + - /etc/flyte/config/flyteadmin_config.yaml + - migrate + - seed-projects + - flytesnacks + - flytetester + - flyteexamples + image: docker.io/lyft/flyteadmin:v0.2.6 + imagePullPolicy: IfNotPresent + name: seed-projects + volumeMounts: + - mountPath: /etc/flyte/config + name: config-volume + - command: + - flyteadmin + - --logtostderr + - --config + - /etc/flyte/config/flyteadmin_config.yaml + - clusterresource + - sync + image: docker.io/lyft/flyteadmin:v0.2.6 + imagePullPolicy: IfNotPresent + name: sync-cluster-resources + volumeMounts: + - mountPath: /etc/flyte/clusterresource/templates + name: resource-templates + - mountPath: /etc/flyte/config + name: config-volume + serviceAccountName: flyteadmin + volumes: + - configMap: + name: clusterresource-template-m2kccc465f + name: resource-templates + - emptyDir: {} + name: shared-data + - configMap: + name: flyte-admin-config-g4cdd4724d + name: config-volume +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: flyteconsole + name: flyteconsole + namespace: flyte +spec: + replicas: 1 + selector: + matchLabels: + app: flyteconsole + template: + metadata: + labels: + app: flyteconsole + app.kubernetes.io/name: flyteconsole + app.kubernetes.io/version: 0.4.0 + spec: + containers: + - envFrom: + - configMapRef: + name: flyte-console-config + image: docker.io/lyft/flyteconsole:v0.4.0 + name: flyteconsole + ports: + - containerPort: 8080 + resources: + limits: + cpu: "0.1" + ephemeral-storage: 100Mi + memory: 150Mi + volumeMounts: + - mountPath: /srv/flyte + name: shared-data + volumes: + - emptyDir: {} + name: shared-data +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: flytepropeller + name: flytepropeller + namespace: flyte +spec: + selector: + matchLabels: + app: flytepropeller + template: + metadata: + annotations: + prometheus.io/path: /metrics + prometheus.io/port: "10254" + prometheus.io/scrape: "true" + labels: + app: flytepropeller + app.kubernetes.io/name: flytepropeller + app.kubernetes.io/version: 0.2.20 + spec: + containers: + - args: + - --config + - /etc/flyte/config*/config.yaml + command: + - flytepropeller + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + image: docker.io/lyft/flytepropeller:v0.2.20 + imagePullPolicy: IfNotPresent + name: flytepropeller + ports: + - containerPort: 10254 + volumeMounts: + - mountPath: /etc/flyte/config-container + name: container-config-volume + - mountPath: /etc/flyte/config-spark + name: spark-config-volume + - mountPath: /etc/flyte/config + name: config-volume + - mountPath: /etc/flyte/config-plugin + name: plugin-config-volume + serviceAccountName: flytepropeller + volumes: + - configMap: + name: flyte-container-config-5k78b9cm42 + name: container-config-volume + - configMap: + name: flyte-spark-config-h72bmtggc2 + name: spark-config-volume + - configMap: + name: flyte-propeller-config-f55btmdgmg + name: config-volume + - configMap: + name: flyte-plugin-config-dh9hm6bf64 + name: plugin-config-volume +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: redis + namespace: flyte +spec: + replicas: 1 + selector: + matchLabels: + app: redis-resource-manager + serviceName: redis-resource-manager + template: + metadata: + labels: + app: redis-resource-manager + spec: + containers: + - env: + - name: REDIS_PASSWORD + value: mypassword + image: docker.io/bitnami/redis:4.0.2-r1 + imagePullPolicy: IfNotPresent + livenessProbe: + exec: + command: + - redis-cli + - ping + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + name: redis-resource-manager + ports: + - containerPort: 6379 + name: redis + protocol: TCP + readinessProbe: + exec: + command: + - redis-cli + - ping + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: + requests: + cpu: 200m + memory: 128Mi + volumeMounts: + - mountPath: /bitnami + name: redis-data + dnsPolicy: ClusterFirst + restartPolicy: Always + volumes: + - emptyDir: {} + name: redis-data +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: syncresources + namespace: flyte +spec: + jobTemplate: + spec: + template: + spec: + containers: + - command: + - flyteadmin + - --logtostderr + - --config + - /etc/flyte/config/flyteadmin_config.yaml + - clusterresource + - sync + image: docker.io/lyft/flyteadmin:v0.2.6 + imagePullPolicy: IfNotPresent + name: sync-cluster-resources + volumeMounts: + - mountPath: /etc/flyte/clusterresource/templates + name: resource-templates + - mountPath: /etc/flyte/config + name: config-volume + restartPolicy: OnFailure + serviceAccountName: flyteadmin + volumes: + - configMap: + name: clusterresource-template-m2kccc465f + name: resource-templates + - configMap: + name: flyte-admin-config-g4cdd4724d + name: config-volume + schedule: '*/1 * * * *' +--- +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + annotations: + alb.ingress.kubernetes.io/scheme: internet-facing + alb.ingress.kubernetes.io/tags: service_instance=production + kubernetes.io/ingress.class: alb + labels: + app: flyteadmin + name: flytesystem + namespace: flyte +spec: + rules: + - http: + paths: + - backend: + serviceName: flyteconsole + servicePort: 80 + path: /console + - backend: + serviceName: flyteconsole + servicePort: 80 + path: /console/* + - backend: + serviceName: flyteconsole + servicePort: 80 + path: /__webpack_hmr + - backend: + serviceName: flyteadmin + servicePort: 80 + path: /api/* + - backend: + serviceName: flyteadmin + servicePort: 80 + path: /healthcheck + - backend: + serviceName: flyteadmin + servicePort: 80 + path: /v1/* + - backend: + serviceName: flyteadmin + servicePort: 81 + path: /flyteidl.service.AdminService/* + - backend: + serviceName: flyteadmin + servicePort: 87 + path: /openapi/* diff --git a/eks/README.md b/eks/README.md new file mode 100644 index 0000000000..f0e7c0d611 --- /dev/null +++ b/eks/README.md @@ -0,0 +1,25 @@ + +Steps: +- Make sure kubectl and aws-cli are installed and working +- Make sure terraform is installed and working and references aws-cli permissions +- Run Terraform files. (There seems to be a race condition in one of the IAM role creation steps - you may need to run it twice.) +- Copy or update the kubectl config file and switch to that context. +- Create the webhook + - Create ECR repo for the webhook + - Build the image and push + - Run the make cluster-up command with the right image +- Create the example 2048 game on the EKS IAM page linked above. Keep in mind that even after an address shows up in the ingress, it may take a while to provision. +- Delete the game +- Create the spare datacatalog reference in the db. +- Follow the [Installation portion](https://github.com/aws/amazon-eks-pod-identity-webhook/blob/95808cffe6d801822dae122f2f2c87a258d70bb8/README.md#installation) of the webhook readme. You will need to make sure to use your own AWS account number, and will also need to build your own image and upload it to your ECR, which will probably require you to create that repository in your ECR. +- Go through all the overlays in the `kustomize/overlays/eks` folder and make sure all the service accounts and RDS addresses reference yours. (Do a grep for `111222333456` and `456123e6ivib`). +- Install Flyte with `kubectl apply -f deployment/eks/flyte_generated.yaml` + +This is the webhook used to inject IAM role credentials into pods. +https://github.com/aws/amazon-eks-pod-identity-webhook + +This is how you get pods to use the proper roles. (This is the KIAM replacement.) +https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts-technical-overview.html +The implementation of these steps is done for you in the `alb-ingress` submodule. + + diff --git a/eks/tf/main.tf b/eks/tf/main.tf new file mode 100644 index 0000000000..0e05fc6d6b --- /dev/null +++ b/eks/tf/main.tf @@ -0,0 +1,161 @@ +terraform { + required_version = ">= 0.12.0" +} + +provider "aws" { + profile = "default" + region = var.region +} + +# Use the internal module to create an RDS instance with a user-supplied VPC +module "flyte_rds" { + source = "./modules/flyte-rds" + + rds_vpc = var.rds_vpc +} + +# Use the internal module to create an EKS cluster, which has its own VPC +module "flyte_eks" { + source = "./modules/flyte-eks" + + eks_cluster_name = var.eks_cluster_name +} + +# Get information about the two VPCs +data "aws_vpc" "rds_vpc" { + id = var.rds_vpc +} + +data "aws_vpc" "eks_vpc" { + id = module.flyte_eks.eks_vpc_id +} + +# Get information about the RDS instance +data "aws_db_instance" "flyte_rds" { + db_instance_identifier = module.flyte_rds.admin_rds_instance_id +} + +resource "aws_vpc_peering_connection" "eks_to_main_peering" { + peer_vpc_id = var.rds_vpc + vpc_id = module.flyte_eks.eks_vpc_id + auto_accept = true + + tags = { + Name = "VPC peering connection between Flyte RDS and EKS" + } + + accepter { + allow_remote_vpc_dns_resolution = true + } + + requester { + allow_remote_vpc_dns_resolution = true + } +} + +data "aws_route_table" "eks_public_route_table" { + vpc_id = module.flyte_eks.eks_vpc_id + filter { + name = "tag:Name" + values = ["${var.eks_cluster_name}-vpc-public"] + } +} + +resource "aws_route" "route_rds_cidr" { + route_table_id = data.aws_route_table.eks_public_route_table.id + destination_cidr_block = data.aws_vpc.rds_vpc.cidr_block + vpc_peering_connection_id = aws_vpc_peering_connection.eks_to_main_peering.id +} + +# Add a rule to the RDS security group to allow access from the EKS VPC +resource "aws_security_group_rule" "allow_eks_to_rds" { + type = "ingress" + from_port = 5432 + to_port = 5432 + protocol = "tcp" + cidr_blocks = [data.aws_vpc.eks_vpc.cidr_block] + security_group_id = data.aws_db_instance.flyte_rds.vpc_security_groups[0] +} + +# The following implements the instructions set forth by: +# https://github.com/aws/amazon-eks-pod-identity-webhook/blob/95808cffe6d801822dae122f2f2c87a258d70bb8/README.md +# This is a webhook that will allow pods to assume arbitrarily constrained roles via their service account. +# TODO: This should be moved into a separate module probably but will require further refactoring as the assume role +# policy used is also used further below in the ALB ingress module. + +# Create an oidc provider using the EKS cluster's public OIDC discovery endpoint +resource "aws_iam_openid_connect_provider" "eks_oidc_connection" { + client_id_list = ["sts.amazonaws.com"] + thumbprint_list = ["9e99a48a9960b14926bb7f3b02e22da2b0ab7280"] + url = module.flyte_eks.eks_oidc_issuer +} + +locals { + issuer_parsed = regex("^arn.*(?Poidc.eks.*)", aws_iam_openid_connect_provider.eks_oidc_connection.arn) +} + +# This is the trust document that will allow pods to use roles that they specify in their service account +data "aws_iam_policy_document" "let_pods_assume_roles" { + statement { + actions = ["sts:AssumeRoleWithWebIdentity"] + + principals { + type = "Federated" + identifiers = [aws_iam_openid_connect_provider.eks_oidc_connection.arn] + } + + condition { + test = "StringLike" + variable = "${local.issuer_parsed.trailing}:sub" + + values = [ + "system:serviceaccount:*:*", + ] + } + } +} + +# Make a role for Flyte components themselves to use +resource "aws_iam_role" "flyte_operator" { + name = "flyte-operator" + assume_role_policy = data.aws_iam_policy_document.let_pods_assume_roles.json +} + + +# Make a policy document +# TODO: Scope this down later +data "aws_iam_policy_document" "all_s3_access" { + statement { + actions = [ + "s3:*", + ] + + resources = [ + "*", + ] + } +} + +# Use the policy document to create a policy +resource "aws_iam_policy" "flyte_operator_s3_access" { + name = "flyte_operator_s3_access" + path = "/" + policy = data.aws_iam_policy_document.all_s3_access.json +} + +# Attach the policy to the flyte operator role +resource "aws_iam_role_policy_attachment" "flyte_operator_s3_attach" { + role = aws_iam_role.flyte_operator.name + policy_arn = aws_iam_policy.flyte_operator_s3_access.arn +} + +module "alb_ingress" { + source = "./modules/alb-ingress" + + region = var.region + eks_cluster_name = var.eks_cluster_name + cluster_id = module.flyte_eks.cluster_id + eks_vpc_id = module.flyte_eks.eks_vpc_id + assume_role_policy_string = data.aws_iam_policy_document.let_pods_assume_roles.json +} + diff --git a/eks/tf/modules/alb-ingress/main.tf b/eks/tf/modules/alb-ingress/main.tf new file mode 100644 index 0000000000..2624ed9c43 --- /dev/null +++ b/eks/tf/modules/alb-ingress/main.tf @@ -0,0 +1,183 @@ +# As required by https://docs.aws.amazon.com/eks/latest/userguide/alb-ingress.html +data "http" "alb_ingress_policy" { + url = "https://raw.githubusercontent.com/kubernetes-sigs/aws-alb-ingress-controller/v1.1.4/docs/examples/iam-policy.json" + + request_headers = { + Accept = "application/json" + } +} + +resource "aws_iam_policy" "k8s_alb_ingress_controller" { + name = "ALBIngressControllerIAMPolicy" + path = "/" + policy = data.http.alb_ingress_policy.body +} + +resource "aws_iam_role" "eks_alb_ingress_controller" { + name = "eks-alb-ingress-controller" + assume_role_policy = var.assume_role_policy_string +} + +# Attach the policy to the flyte operator role +resource "aws_iam_role_policy_attachment" "eks_alb_attachment" { + role = aws_iam_role.eks_alb_ingress_controller.name + policy_arn = aws_iam_policy.k8s_alb_ingress_controller.arn +} + +data "aws_eks_cluster" "cluster" { + name = var.cluster_id +} + +data "aws_eks_cluster_auth" "cluster" { + name = var.cluster_id +} + +provider "kubernetes" { + host = data.aws_eks_cluster.cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data) + token = data.aws_eks_cluster_auth.cluster.token + load_config_file = false + version = "~> 1.9" +} + +resource "kubernetes_cluster_role" "alb_ingress_controller" { + metadata { + name = "alb-ingress-controller" + + labels = { + "app.kubernetes.io/name" : "alb-ingress-controller" + } + } + + rule { + api_groups = [ + "", + "extensions", + ] + resources = [ + "configmaps", + "endpoints", + "events", + "ingresses", + "ingresses/status", + "services", + ] + verbs = [ + "create", + "get", + "list", + "update", + "watch", + "patch", + ] + } + + rule { + api_groups = [ + "", + "extensions", + ] + resources = [ + "nodes", + "pods", + "secrets", + "services", + "namespaces", + ] + verbs = [ + "get", + "list", + "watch", + ] + } +} + +resource "kubernetes_service_account" "alb_ingress_controller" { + metadata { + name = "alb-ingress-controller" + namespace = "kube-system" + + labels = { + "app.kubernetes.io/name" = "alb-ingress-controller" + } + + annotations = { + "eks.amazonaws.com/role-arn" = aws_iam_role.eks_alb_ingress_controller.arn + } + } +} + +resource "kubernetes_cluster_role_binding" "alb_ingress_controller" { + metadata { + name = "alb-ingress-controller" + + labels = { + "app.kubernetes.io/name" = "alb-ingress-controller" + } + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = kubernetes_cluster_role.alb_ingress_controller.metadata[0].name + } + + subject { + kind = "ServiceAccount" + name = kubernetes_service_account.alb_ingress_controller.metadata[0].name + namespace = "kube-system" + } +} + +resource "kubernetes_deployment" "alb_ingress_controller" { + metadata { + name = "alb-ingress-controller" + namespace = "kube-system" + + labels = { + "app.kubernetes.io/name" = "alb-ingress-controller" + } + } + + spec { + selector { + match_labels = { + "app.kubernetes.io/name" = "alb-ingress-controller" + } + } + + template { + metadata { + name = "alb-ingress-controller" + namespace = "kube-system" + + labels = { + "app.kubernetes.io/name" = "alb-ingress-controller" + } + } + + spec { + container { + name = "alb-ingress-controller" + image = "docker.io/amazon/aws-alb-ingress-controller:v1.1.4" + args = [ + "--ingress-class=alb", + "--cluster-name=${var.eks_cluster_name}", + "--aws-vpc-id=${var.eks_vpc_id}", + "--aws-region=${var.region}", + "--feature-gates=waf=false", + ] + } + + service_account_name = kubernetes_service_account.alb_ingress_controller.metadata[0].name + automount_service_account_token = true + + node_selector = { + "beta.kubernetes.io/os" = "linux" + } + } + } + } +} + + diff --git a/eks/tf/modules/alb-ingress/outputs.tf b/eks/tf/modules/alb-ingress/outputs.tf new file mode 100644 index 0000000000..e69de29bb2 diff --git a/eks/tf/modules/alb-ingress/variables.tf b/eks/tf/modules/alb-ingress/variables.tf new file mode 100644 index 0000000000..43a0e9219c --- /dev/null +++ b/eks/tf/modules/alb-ingress/variables.tf @@ -0,0 +1,20 @@ +variable "region" { + type = string +} + +variable "eks_cluster_name" { + type = string +} + +variable "eks_vpc_id" { + type = string +} + +variable "cluster_id" { + type = string +} + +variable "assume_role_policy_string" { + type = string +} + diff --git a/eks/tf/modules/flyte-eks/main.tf b/eks/tf/modules/flyte-eks/main.tf new file mode 100644 index 0000000000..edecace66a --- /dev/null +++ b/eks/tf/modules/flyte-eks/main.tf @@ -0,0 +1,111 @@ +data "aws_eks_cluster" "cluster" { + name = module.eks.cluster_id +} + +data "aws_availability_zones" "available" { +} + +resource "aws_security_group" "worker_group_mgmt_one" { + name_prefix = "worker_group_mgmt_one" + vpc_id = module.vpc.vpc_id + + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + + cidr_blocks = [ + "10.0.0.0/8", + ] + } +} + +resource "aws_security_group" "worker_group_mgmt_two" { + name_prefix = "worker_group_mgmt_two" + vpc_id = module.vpc.vpc_id + + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + + cidr_blocks = [ + "192.168.0.0/16", + ] + } +} + +resource "aws_security_group" "all_worker_mgmt" { + name_prefix = "all_worker_management" + vpc_id = module.vpc.vpc_id + + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + + cidr_blocks = [ + "10.0.0.0/8", + "172.16.0.0/12", + "192.168.0.0/16", + ] + } +} + +module "vpc" { + source = "git::https://github.com/terraform-aws-modules/terraform-aws-vpc.git?ref=v2.6.0" + name = "${var.eks_cluster_name}-vpc" + + cidr = "10.0.0.0/16" + azs = data.aws_availability_zones.available.names + private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"] + public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] + enable_nat_gateway = true + single_nat_gateway = true + enable_dns_hostnames = true + + public_subnet_tags = { + "kubernetes.io/cluster/${var.eks_cluster_name}" = "shared" + "kubernetes.io/role/elb" = "1" + } + + private_subnet_tags = { + "kubernetes.io/cluster/${var.eks_cluster_name}" = "shared" + "kubernetes.io/role/internal-elb" = "1" + } +} + +module "eks" { + source = "git::https://github.com/terraform-aws-modules/terraform-aws-eks.git?ref=v11.0.0" + + cluster_name = var.eks_cluster_name + subnets = module.vpc.private_subnets + + tags = { + Environment = "test" + GithubRepo = "terraform-aws-eks" + GithubOrg = "terraform-aws-modules" + } + + vpc_id = module.vpc.vpc_id + + worker_groups = [ + { + name = "worker-group-1" + instance_type = "t2.small" + additional_userdata = "flyte t2.small" + asg_desired_capacity = 2 + additional_security_group_ids = [aws_security_group.worker_group_mgmt_one.id] + }, + { + name = "worker-group-2" + instance_type = "t2.medium" + additional_userdata = "flyte t2.medium" + additional_security_group_ids = [aws_security_group.worker_group_mgmt_two.id] + asg_desired_capacity = 1 + }, + ] + + worker_additional_security_group_ids = [aws_security_group.all_worker_mgmt.id] +} + diff --git a/eks/tf/modules/flyte-eks/outputs.tf b/eks/tf/modules/flyte-eks/outputs.tf new file mode 100644 index 0000000000..9e7cd30b94 --- /dev/null +++ b/eks/tf/modules/flyte-eks/outputs.tf @@ -0,0 +1,36 @@ +output "cluster_endpoint" { + description = "Endpoint for EKS control plane." + value = module.eks.cluster_endpoint +} + +output "cluster_security_group_id" { + description = "Security group ids attached to the cluster control plane." + value = module.eks.cluster_security_group_id +} + +output "cluster_id" { + description = "The id of the EKS cluster created" + value = module.eks.cluster_id +} + +output "kubectl_config" { + description = "kubectl config as generated by the module." + value = module.eks.kubeconfig +} + +output "config_map_aws_auth" { + description = "A kubernetes configuration to authenticate to this EKS cluster." + value = module.eks.config_map_aws_auth +} + +output "eks_vpc_id" { + description = "The id of the VPC created for the EKS cluster" + value = module.vpc.vpc_id +} + +output "eks_oidc_issuer" { + # Basically this is the output of this command + # aws --region us-west-2 eks describe-cluster --name your-cluster --query cluster.identity.oidc.issuer + description = "Issuer endpoint provided by EKS" + value = data.aws_eks_cluster.cluster.identity.0.oidc.0.issuer +} diff --git a/eks/tf/modules/flyte-eks/variables.tf b/eks/tf/modules/flyte-eks/variables.tf new file mode 100644 index 0000000000..810e6fd537 --- /dev/null +++ b/eks/tf/modules/flyte-eks/variables.tf @@ -0,0 +1,4 @@ +variable "eks_cluster_name" { + type = string +} + diff --git a/eks/tf/modules/flyte-rds/main.tf b/eks/tf/modules/flyte-rds/main.tf new file mode 100644 index 0000000000..adceaad42d --- /dev/null +++ b/eks/tf/modules/flyte-rds/main.tf @@ -0,0 +1,22 @@ +resource "aws_rds_cluster_instance" "flyte_instances" { + count = 1 + identifier = "flyteadmin-instances-${count.index}" + cluster_identifier = aws_rds_cluster.flyteadmin.id + instance_class = "db.t3.medium" + engine = "aurora-postgresql" + engine_version = "11.6" +} + +resource "aws_rds_cluster" "flyteadmin" { + cluster_identifier = "flyteadmin-cluster" + engine = "aurora-postgresql" + engine_version = "11.6" + availability_zones = ["us-west-2a", "us-west-2b", "us-west-2c"] + database_name = "flyteadmin" + master_username = "flyteadmin" + master_password = "spongebob" + backup_retention_period = 1 + preferred_backup_window = "07:00-09:00" + skip_final_snapshot = true +} + diff --git a/eks/tf/modules/flyte-rds/outputs.tf b/eks/tf/modules/flyte-rds/outputs.tf new file mode 100644 index 0000000000..10a40713e2 --- /dev/null +++ b/eks/tf/modules/flyte-rds/outputs.tf @@ -0,0 +1,8 @@ +output "admin_rds_name" { + value = aws_rds_cluster.flyteadmin.endpoint +} + +output "admin_rds_instance_id" { + value = aws_rds_cluster_instance.flyte_instances[0].identifier +} + diff --git a/eks/tf/modules/flyte-rds/variables.tf b/eks/tf/modules/flyte-rds/variables.tf new file mode 100644 index 0000000000..7f569fa0fc --- /dev/null +++ b/eks/tf/modules/flyte-rds/variables.tf @@ -0,0 +1,4 @@ +variable "rds_vpc" { + type = string +} + diff --git a/eks/tf/outputs.tf b/eks/tf/outputs.tf new file mode 100644 index 0000000000..7e7105832a --- /dev/null +++ b/eks/tf/outputs.tf @@ -0,0 +1,49 @@ +output "rds_vpc_cidr_block" { + value = data.aws_vpc.rds_vpc.cidr_block +} + +output "cluster_endpoint" { + description = "Endpoint for EKS control plane." + value = module.flyte_eks.cluster_endpoint +} + +output "cluster_security_group_id" { + description = "Security group ids attached to the cluster control plane." + value = module.flyte_eks.cluster_security_group_id +} + +output "kubectl_config" { + description = "kubectl config as generated by the module." + value = module.flyte_eks.kubectl_config +} + +output "config_map_aws_auth" { + description = "A kubernetes configuration to authenticate to this EKS cluster." + value = module.flyte_eks.config_map_aws_auth +} + +output "eks_vpc_id" { + description = "The id of the VPC created for the EKS cluster" + value = module.flyte_eks.eks_vpc_id +} + +output "admin_rds_name" { + value = module.flyte_rds.admin_rds_name +} + +output "admin_rds_instance_id" { + value = module.flyte_rds.admin_rds_instance_id +} + +output "eks_cluster_oidc_connection_arn" { + value = aws_iam_openid_connect_provider.eks_oidc_connection.arn +} + +output "eks_cluster_oidc" { + value = module.flyte_eks.eks_oidc_issuer +} + +output "role_arn" { + value = aws_iam_role.flyte_operator.arn +} + diff --git a/eks/tf/values.auto.tfvars b/eks/tf/values.auto.tfvars new file mode 100644 index 0000000000..68f3e57973 --- /dev/null +++ b/eks/tf/values.auto.tfvars @@ -0,0 +1,4 @@ +region = "us-west-2" +rds_vpc = "vpc-1234567a" +eks_cluster_name = "flyte-demo" + diff --git a/eks/tf/variables.tf b/eks/tf/variables.tf new file mode 100644 index 0000000000..640bb8c6da --- /dev/null +++ b/eks/tf/variables.tf @@ -0,0 +1,12 @@ +variable "region" { + type = string +} + +variable "rds_vpc" { + type = string +} + +variable "eks_cluster_name" { + type = string +} + diff --git a/kustomize/base/datacatalog/rbac.yaml b/kustomize/base/datacatalog/rbac.yaml index d7ab1676e3..208fb8d1ef 100644 --- a/kustomize/base/datacatalog/rbac.yaml +++ b/kustomize/base/datacatalog/rbac.yaml @@ -1,4 +1,3 @@ -# Create a Service Account for FltyeAdmin apiVersion: v1 kind: ServiceAccount metadata: diff --git a/kustomize/overlays/eks/admindeployment/admindeployment.yaml b/kustomize/overlays/eks/admindeployment/admindeployment.yaml new file mode 100644 index 0000000000..a01718f823 --- /dev/null +++ b/kustomize/overlays/eks/admindeployment/admindeployment.yaml @@ -0,0 +1,57 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flyteadmin + namespace: flyte +spec: + template: + spec: + volumes: + - name: resource-templates + configMap: + name: clusterresource-template + initContainers: + - name: run-migrations + image: docker.io/lyft/flyteadmin:v0.2.6 + imagePullPolicy: IfNotPresent + command: ["flyteadmin", "--logtostderr", "--config", "/etc/flyte/config/flyteadmin_config.yaml", + "migrate", "run"] + volumeMounts: + - name: config-volume + mountPath: /etc/flyte/config + - name: seed-projects + image: docker.io/lyft/flyteadmin:v0.2.6 + imagePullPolicy: IfNotPresent + command: ["flyteadmin", "--logtostderr", "--config", "/etc/flyte/config/flyteadmin_config.yaml", + "migrate", "seed-projects", "flytesnacks", "flytetester", "flyteexamples"] + volumeMounts: + - name: config-volume + mountPath: /etc/flyte/config + - name: sync-cluster-resources + image: docker.io/lyft/flyteadmin:v0.2.6 + imagePullPolicy: IfNotPresent + command: ["flyteadmin", "--logtostderr", "--config", "/etc/flyte/config/flyteadmin_config.yaml", "clusterresource", "sync"] + volumeMounts: + - name: resource-templates + mountPath: /etc/flyte/clusterresource/templates + - name: config-volume + mountPath: /etc/flyte/config + containers: + - name: flyteadmin + resources: + limits: + memory: "200Mi" + cpu: "0.1" + ephemeral-storage: "100Mi" +--- +apiVersion: v1 +kind: Service +metadata: + name: flyteadmin + namespace: flyte +spec: + ports: + - name: redoc + protocol: TCP + port: 87 + targetPort: 8087 diff --git a/kustomize/overlays/eks/admindeployment/clusterresource-templates/aa_namespace.yaml b/kustomize/overlays/eks/admindeployment/clusterresource-templates/aa_namespace.yaml new file mode 100644 index 0000000000..3075aa9f9e --- /dev/null +++ b/kustomize/overlays/eks/admindeployment/clusterresource-templates/aa_namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: {{ namespace }} +spec: + finalizers: + - kubernetes diff --git a/kustomize/overlays/eks/admindeployment/clusterresource-templates/ab_project-resource-quota.yaml b/kustomize/overlays/eks/admindeployment/clusterresource-templates/ab_project-resource-quota.yaml new file mode 100644 index 0000000000..ddfade3c29 --- /dev/null +++ b/kustomize/overlays/eks/admindeployment/clusterresource-templates/ab_project-resource-quota.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ResourceQuota +metadata: + name: project-quota + namespace: {{ namespace }} +spec: + hard: + limits.cpu: {{ projectQuotaCpu }} + limits.memory: {{ projectQuotaMemory }} + diff --git a/kustomize/overlays/eks/admindeployment/cron.yaml b/kustomize/overlays/eks/admindeployment/cron.yaml new file mode 100644 index 0000000000..435a5bb7a0 --- /dev/null +++ b/kustomize/overlays/eks/admindeployment/cron.yaml @@ -0,0 +1,30 @@ +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: syncresources + namespace: flyte +spec: + schedule: "*/1 * * * *" + jobTemplate: + spec: + template: + spec: + serviceAccountName: flyteadmin + containers: + - name: sync-cluster-resources + image: docker.io/lyft/flyteadmin:v0.2.6 + imagePullPolicy: IfNotPresent + command: ["flyteadmin", "--logtostderr", "--config", "/etc/flyte/config/flyteadmin_config.yaml", "clusterresource", "sync"] + volumeMounts: + - name: resource-templates + mountPath: /etc/flyte/clusterresource/templates + - name: config-volume + mountPath: /etc/flyte/config + volumes: + - name: resource-templates + configMap: + name: clusterresource-template + - name: config-volume + configMap: + name: flyte-admin-config + restartPolicy: OnFailure diff --git a/kustomize/overlays/eks/admindeployment/flyteadmin_config.yaml b/kustomize/overlays/eks/admindeployment/flyteadmin_config.yaml new file mode 100644 index 0000000000..d6c7a29af9 --- /dev/null +++ b/kustomize/overlays/eks/admindeployment/flyteadmin_config.yaml @@ -0,0 +1,76 @@ +logger: + show-source: true + level: 5 +server: + httpPort: 8088 + grpcPort: 8089 + security: + secure: false + useAuth: false + allowCors: false +flyteadmin: + roleNameKey: "iam.amazonaws.com/role" + profilerPort: 10254 + metricsScope: "flyte:" + metadataStoragePrefix: + - "metadata" + - "admin" +database: + port: 5432 + username: flyteadmin + host: flyteadmin-cluster.cluster-456123e6ivib.us-west-2.rds.amazonaws.com + dbname: flyteadmin + password: spongebob +remoteData: + region: "us-west-2" + scheme: aws + signedUrls: + durationMinutes: 3 +storage: + type: s3 + connection: + auth-type: iam + region: us-west-2 + cache: + max_size_mbs: 64 + target_gc_percent: 70 + container: "flyte-admin" +task_resources: + defaults: + cpu: 100m + memory: 100Mi + storage: 5Mi + limits: + cpu: 2 + memory: 2G + storage: 20Mi +task_type_whitelist: + spark: + - project: flytekit + - project: flytetester +domains: + - id: development + name: development + - id: staging + name: staging + - id: production + name: production +cluster_resources: + templatePath: "/etc/flyte/clusterresource/templates" + customData: + production: + - projectQuotaCpu: + value: "5" + - projectQuotaMemory: + value: "4000Mi" + staging: + - projectQuotaCpu: + value: "1" + - projectQuotaMemory: + value: "2000Mi" + development: + - projectQuotaCpu: + value: "1" + - projectQuotaMemory: + value: "2000Mi" + refresh: 5m diff --git a/kustomize/overlays/eks/admindeployment/kustomization.yaml b/kustomize/overlays/eks/admindeployment/kustomization.yaml new file mode 100644 index 0000000000..4af90e4d6f --- /dev/null +++ b/kustomize/overlays/eks/admindeployment/kustomization.yaml @@ -0,0 +1,26 @@ +bases: +- ../../../base/admindeployment +- ../../../base/adminserviceaccount + +namespace: flyte + +resources: +- cron.yaml + +configMapGenerator: +# the main admin configmap +- name: flyte-admin-config + files: + - flyteadmin_config.yaml +# cluster resource templates +- name: clusterresource-template + files: +# Files are read in alphabetical order. To ensure that we create the namespace first, prefix the file name with "aa". + - clusterresource-templates/aa_namespace.yaml + - clusterresource-templates/ab_project-resource-quota.yaml + +patches: +- admindeployment.yaml +- serviceaccount.yaml +- service.yaml + diff --git a/kustomize/overlays/eks/admindeployment/service.yaml b/kustomize/overlays/eks/admindeployment/service.yaml new file mode 100644 index 0000000000..f1a8e793bc --- /dev/null +++ b/kustomize/overlays/eks/admindeployment/service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + service.beta.kubernetes.io/aws-load-balancer-internal: 'true' + name: flyteadmin + namespace: flyte +spec: + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 8088 + - name: grpc + port: 81 + protocol: TCP + targetPort: 8089 + selector: + app: flyteadmin + type: LoadBalancer + diff --git a/kustomize/overlays/eks/admindeployment/serviceaccount.yaml b/kustomize/overlays/eks/admindeployment/serviceaccount.yaml new file mode 100644 index 0000000000..5e0f4b2982 --- /dev/null +++ b/kustomize/overlays/eks/admindeployment/serviceaccount.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: flyteadmin + namespace: flyte + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::111222333456:role/flyte-operator" + diff --git a/kustomize/overlays/eks/console/console.yaml b/kustomize/overlays/eks/console/console.yaml new file mode 100644 index 0000000000..9261654671 --- /dev/null +++ b/kustomize/overlays/eks/console/console.yaml @@ -0,0 +1,15 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flyteconsole + namespace: flyte +spec: + template: + spec: + containers: + - name: flyteconsole + resources: + limits: + memory: "150Mi" + cpu: "0.1" + ephemeral-storage: "100Mi" diff --git a/kustomize/overlays/eks/console/kustomization.yaml b/kustomize/overlays/eks/console/kustomization.yaml new file mode 100644 index 0000000000..d1a84577b7 --- /dev/null +++ b/kustomize/overlays/eks/console/kustomization.yaml @@ -0,0 +1,7 @@ +bases: +- ../../../base/console + +patches: +- console.yaml +- service.yaml + diff --git a/kustomize/overlays/eks/console/service.yaml b/kustomize/overlays/eks/console/service.yaml new file mode 100644 index 0000000000..489e8602e5 --- /dev/null +++ b/kustomize/overlays/eks/console/service.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Service +metadata: + name: flyteconsole + namespace: flyte +spec: + type: NodePort + diff --git a/kustomize/overlays/eks/datacatalog/datacatalog.yaml b/kustomize/overlays/eks/datacatalog/datacatalog.yaml new file mode 100644 index 0000000000..d68f803e63 --- /dev/null +++ b/kustomize/overlays/eks/datacatalog/datacatalog.yaml @@ -0,0 +1,22 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: datacatalog + namespace: flyte +spec: + template: + spec: + containers: + - name: datacatalog + resources: + limits: + memory: "200Mi" + cpu: "0.1" + ephemeral-storage: "100Mi" +--- +apiVersion: v1 +kind: Service +metadata: + name: datacatalog + namespace: flyte + diff --git a/kustomize/overlays/eks/datacatalog/datacatalog_config.yaml b/kustomize/overlays/eks/datacatalog/datacatalog_config.yaml new file mode 100644 index 0000000000..e746c62e69 --- /dev/null +++ b/kustomize/overlays/eks/datacatalog/datacatalog_config.yaml @@ -0,0 +1,25 @@ +logger: + show-source: true + level: 5 +datacatalog: + storage-prefix: metadata/datacatalog + metrics-scope: "datacatalog" + profiler-port: 10254 +application: + grpcPort: 8089 +database: + port: 5432 + username: flyteadmin + host: flyteadmin-cluster.cluster-456123e6ivib.us-west-2.rds.amazonaws.com + dbname: flytedatacatalog + password: spongebob +storage: + type: s3 + connection: + auth-type: iam + region: us-west-2 + cache: + max_size_mbs: 64 + target_gc_percent: 70 + container: "flyte-datacatalog" + diff --git a/kustomize/overlays/eks/datacatalog/kustomization.yaml b/kustomize/overlays/eks/datacatalog/kustomization.yaml new file mode 100644 index 0000000000..4b3dec4857 --- /dev/null +++ b/kustomize/overlays/eks/datacatalog/kustomization.yaml @@ -0,0 +1,15 @@ +bases: +- ../../../base/datacatalog + +namespace: flyte + +configMapGenerator: +- name: datacatalog-config + files: + - datacatalog_config.yaml + +patches: +- datacatalog.yaml +- serviceaccount.yaml +- service.yaml + diff --git a/kustomize/overlays/eks/datacatalog/service.yaml b/kustomize/overlays/eks/datacatalog/service.yaml new file mode 100644 index 0000000000..b6a0ae49c5 --- /dev/null +++ b/kustomize/overlays/eks/datacatalog/service.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Service +metadata: + name: datacatalog + namespace: flyte +spec: + type: NodePort + diff --git a/kustomize/overlays/eks/datacatalog/serviceaccount.yaml b/kustomize/overlays/eks/datacatalog/serviceaccount.yaml new file mode 100644 index 0000000000..e2dba9c418 --- /dev/null +++ b/kustomize/overlays/eks/datacatalog/serviceaccount.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: datacatalog + namespace: flyte + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::111222333456:role/flyte-operator" + diff --git a/kustomize/overlays/eks/flyte/ingress.yaml b/kustomize/overlays/eks/flyte/ingress.yaml new file mode 100644 index 0000000000..633206240a --- /dev/null +++ b/kustomize/overlays/eks/flyte/ingress.yaml @@ -0,0 +1,51 @@ +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: "flytesystem" + namespace: "flyte" + annotations: + kubernetes.io/ingress.class: alb + alb.ingress.kubernetes.io/tags: service_instance=production + alb.ingress.kubernetes.io/scheme: internet-facing + labels: + app: flyteadmin +spec: + rules: + - http: + paths: + - path: /console + backend: + serviceName: flyteconsole + servicePort: 80 + - path: /console/* + backend: + serviceName: flyteconsole + servicePort: 80 + # This is useful only for sandbox mode and should be templatized/removed in non-sandbox environments + - path: /__webpack_hmr + backend: + serviceName: flyteconsole + servicePort: 80 + - path: /api/* + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /healthcheck + backend: + serviceName: flyteadmin + servicePort: 80 + - path: /v1/* + backend: + serviceName: flyteadmin + servicePort: 80 + # NOTE: Port 81 in flyteadmin is the GRPC server port for + # FlyteAdmin. + - path: /flyteidl.service.AdminService/* + backend: + serviceName: flyteadmin + servicePort: 81 + # Port 87 in FlyteAdmin maps to the redoc container. + - path: /openapi/* + backend: + serviceName: flyteadmin + servicePort: 87 diff --git a/kustomize/overlays/eks/flyte/kustomization.yaml b/kustomize/overlays/eks/flyte/kustomization.yaml new file mode 100644 index 0000000000..4563a007e2 --- /dev/null +++ b/kustomize/overlays/eks/flyte/kustomization.yaml @@ -0,0 +1,15 @@ +bases: +# global resources +- ../../../base/namespace + +# user plane / control plane resources +- ../admindeployment +- ../datacatalog +- ../console + +# data plane resources +- ../../../base/wf_crd +- ../propeller +- ../redis + +- ingress.yaml diff --git a/kustomize/overlays/eks/propeller/config.yaml b/kustomize/overlays/eks/propeller/config.yaml new file mode 100644 index 0000000000..b2a205c287 --- /dev/null +++ b/kustomize/overlays/eks/propeller/config.yaml @@ -0,0 +1,68 @@ +propeller: + rawoutput-prefix: s3://flyte-outputs + metadata-prefix: propeller/eks + workers: 4 + max-workflow-retries: 30 + workflow-reeval-duration: 30s + downstream-eval-duration: 30s + limit-namespace: "all" + prof-port: 10254 + metrics-prefix: flyte + enable-admin-launcher: true + leader-election: + lock-config-map: + name: propeller-leader + namespace: flyte + enabled: true + lease-duration: 15s + renew-deadline: 10s + retry-period: 2s + queue: + type: batch + batching-interval: 2s + batch-size: -1 + queue: + type: bucket + rate: 10 + capacity: 100 + sub-queue: + type: bucket + rate: 10 + capacity: 100 + resourcemanager: + type: redis + resourceMaxQuota: 10000 + redis: + hostPath: redis-resource-manager.flyte:6379 + hostKey: mypassword +logger: + show-source: true + level: 5 +storage: + type: s3 + connection: + auth-type: iam + region: us-west-2 + cache: + max_size_mbs: 1024 + target_gc_percent: 70 + container: "flyte-metadata" + limits: + maxDownloadMBs: 10 +event: + type: admin + rate: 500 + capacity: 1000 +admin: + endpoint: flyteadmin:81 + insecure: true +catalog-cache: + endpoint: datacatalog:89 + type: datacatalog + insecure: true +tasks: + task-plugins: + enabled-plugins: + - container + - sidecar + - k8s-array diff --git a/kustomize/overlays/eks/propeller/kustomization.yaml b/kustomize/overlays/eks/propeller/kustomization.yaml new file mode 100644 index 0000000000..2826649424 --- /dev/null +++ b/kustomize/overlays/eks/propeller/kustomization.yaml @@ -0,0 +1,27 @@ +bases: +- ../../../base/propeller + +namespace: flyte + +configMapGenerator: +# the main propeller configmap +- name: flyte-propeller-config + files: + - config.yaml +# the plugin-configmap +- name: flyte-plugin-config + files: + - plugins/config.yaml +# a configmap for each plugin +- name: flyte-spark-config + files: + - plugins/spark/config.yaml +- name: flyte-container-config + files: + - plugins/container/config.yaml + +patches: +- serviceaccount.yaml +# add the volumemount for each plugin configmap +- plugins/spark/propeller-patch.yaml +- plugins/container/propeller-patch.yaml diff --git a/kustomize/overlays/eks/propeller/plugins/config.yaml b/kustomize/overlays/eks/propeller/plugins/config.yaml new file mode 100644 index 0000000000..a2237e1f9a --- /dev/null +++ b/kustomize/overlays/eks/propeller/plugins/config.yaml @@ -0,0 +1,9 @@ +plugins: + logs: + kubernetes-enabled: true + kubernetes-url: "http://localhost:30082" + k8s: + default-env-vars: + - AWS_RETRY_MODE: standard + - AWS_METADATA_SERVICE_TIMEOUT: 5 + - AWS_METADATA_SERVICE_NUM_ATTEMPTS: 20 diff --git a/kustomize/overlays/eks/propeller/plugins/container/config.yaml b/kustomize/overlays/eks/propeller/plugins/container/config.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kustomize/overlays/eks/propeller/plugins/container/propeller-patch.yaml b/kustomize/overlays/eks/propeller/plugins/container/propeller-patch.yaml new file mode 100644 index 0000000000..fe9e305e28 --- /dev/null +++ b/kustomize/overlays/eks/propeller/plugins/container/propeller-patch.yaml @@ -0,0 +1,17 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flytepropeller + namespace: flyte +spec: + template: + spec: + volumes: + - name: container-config-volume + configMap: + name: flyte-container-config + containers: + - name: flytepropeller + volumeMounts: + - name: container-config-volume + mountPath: /etc/flyte/config-container diff --git a/kustomize/overlays/eks/propeller/plugins/spark/config.yaml b/kustomize/overlays/eks/propeller/plugins/spark/config.yaml new file mode 100644 index 0000000000..c1ffb9209a --- /dev/null +++ b/kustomize/overlays/eks/propeller/plugins/spark/config.yaml @@ -0,0 +1,16 @@ +plugins: + spark: + spark-config-default: + - spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version: "2" + - spark.kubernetes.allocation.batch.size: "50" + - spark.hadoop.fs.s3a.acl.default: "BucketOwnerFullControl" + - spark.hadoop.fs.s3n.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3n.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.impl: "org.apache.hadoop.fs.s3a.S3AFileSystem" + - spark.hadoop.fs.AbstractFileSystem.s3a.impl: "org.apache.hadoop.fs.s3a.S3A" + - spark.hadoop.fs.s3a.multipart.threshold: "536870912" + - spark.blacklist.enabled: "true" + - spark.blacklist.timeout: "5m" + - spark.task.maxfailures: "8" diff --git a/kustomize/overlays/eks/propeller/plugins/spark/propeller-patch.yaml b/kustomize/overlays/eks/propeller/plugins/spark/propeller-patch.yaml new file mode 100644 index 0000000000..a1c2aacd10 --- /dev/null +++ b/kustomize/overlays/eks/propeller/plugins/spark/propeller-patch.yaml @@ -0,0 +1,17 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flytepropeller + namespace: flyte +spec: + template: + spec: + volumes: + - name: spark-config-volume + configMap: + name: flyte-spark-config + containers: + - name: flytepropeller + volumeMounts: + - name: spark-config-volume + mountPath: /etc/flyte/config-spark diff --git a/kustomize/overlays/eks/propeller/propeller.yaml b/kustomize/overlays/eks/propeller/propeller.yaml new file mode 100644 index 0000000000..498055e6fc --- /dev/null +++ b/kustomize/overlays/eks/propeller/propeller.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flytepropeller + namespace: flyte +spec: + template: + spec: + containers: + - name: flytepropeller + env: + - name: QUBOLE_API_KEY + value: notarealkey + resources: + limits: + memory: "100Mi" + cpu: "0.1" + ephemeral-storage: "100Mi" diff --git a/kustomize/overlays/eks/propeller/serviceaccount.yaml b/kustomize/overlays/eks/propeller/serviceaccount.yaml new file mode 100644 index 0000000000..9f10dc3935 --- /dev/null +++ b/kustomize/overlays/eks/propeller/serviceaccount.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: flytepropeller + namespace: flyte + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::111222333456:role/flyte-operator" diff --git a/kustomize/overlays/eks/redis/kustomization.yaml b/kustomize/overlays/eks/redis/kustomization.yaml new file mode 100644 index 0000000000..12ec57311d --- /dev/null +++ b/kustomize/overlays/eks/redis/kustomization.yaml @@ -0,0 +1,5 @@ +bases: +- ../../../dependencies/redis + +patches: +- storage.yaml diff --git a/kustomize/overlays/eks/redis/storage.yaml b/kustomize/overlays/eks/redis/storage.yaml new file mode 100644 index 0000000000..0c5aed931b --- /dev/null +++ b/kustomize/overlays/eks/redis/storage.yaml @@ -0,0 +1,11 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: redis + namespace: flyte +spec: + template: + spec: + volumes: + - name: redis-data + emptyDir: {} diff --git a/script/kustomize.sh b/script/kustomize.sh index ebb70d963f..320f5fd080 100755 --- a/script/kustomize.sh +++ b/script/kustomize.sh @@ -2,7 +2,7 @@ set -ex -DEPLOYMENT=${1:-sandbox test} +DEPLOYMENT=${1:-sandbox test eks} DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" KUSTOMIZE_IMAGE="lyft/kustomizer:v3.1.0"