Skip to content

Commit

Permalink
feat(ingestion): add groups to ldap users (#5470)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexey-kravtsov authored Aug 1, 2022
1 parent 2163d75 commit c182fe7
Show file tree
Hide file tree
Showing 5 changed files with 311 additions and 19 deletions.
64 changes: 45 additions & 19 deletions metadata-ingestion/src/datahub/ingestion/source/ldap.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""LDAP Source"""
import dataclasses
import re
from typing import Any, Dict, Iterable, List, Optional

import ldap
Expand All @@ -22,6 +23,7 @@
CorpGroupSnapshotClass,
CorpUserInfoClass,
CorpUserSnapshotClass,
GroupMembershipClass,
)

# default mapping for attrs
Expand All @@ -42,6 +44,7 @@
user_attrs_map["title"] = "title"
user_attrs_map["departmentName"] = "departmentNumber"
user_attrs_map["countryCode"] = "countryCode"
user_attrs_map["memberOf"] = "memberOf"

# group related attrs
group_attrs_map["urn"] = "cn"
Expand Down Expand Up @@ -94,6 +97,7 @@ class LDAPSourceConfig(ConfigModel):
# Extraction configuration.
base_dn: str = Field(description="LDAP DN.")
filter: str = Field(default="(objectClass=*)", description="LDAP extractor filter.")
attrs_list: List[str] = Field(default=None, description="Retrieved attributes list")

# If set to true, any users without first and last names will be dropped.
drop_missing_first_last_name: bool = Field(
Expand Down Expand Up @@ -201,6 +205,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
self.config.base_dn,
ldap.SCOPE_SUBTREE,
self.config.filter,
self.config.attrs_list,
serverctrls=[self.lc],
)
_rtype, rdata, _rmsgid, serverctrls = self.ldap_client.result3(msgid)
Expand Down Expand Up @@ -228,6 +233,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
elif (
b"posixGroup" in attrs["objectClass"]
or b"organizationalUnit" in attrs["objectClass"]
or b"groupOfNames" in attrs["objectClass"]
or b"group" in attrs["objectClass"]
):
yield from self.handle_group(dn, attrs)
Expand Down Expand Up @@ -301,6 +307,7 @@ def build_corp_user_mce(
full_name = attrs[self.config.user_attrs_map["fullName"]][0].decode()
first_name = attrs[self.config.user_attrs_map["firstName"]][0].decode()
last_name = attrs[self.config.user_attrs_map["lastName"]][0].decode()
groups = parse_groups(attrs, self.config.user_attrs_map["memberOf"])

email = (
(attrs[self.config.user_attrs_map["email"]][0]).decode()
Expand Down Expand Up @@ -334,27 +341,30 @@ def build_corp_user_mce(
)
manager_urn = f"urn:li:corpuser:{manager_ldap}" if manager_ldap else None

return MetadataChangeEvent(
proposedSnapshot=CorpUserSnapshotClass(
urn=f"urn:li:corpuser:{ldap_user}",
aspects=[
CorpUserInfoClass(
active=True,
email=email,
fullName=full_name,
firstName=first_name,
lastName=last_name,
departmentId=department_id,
departmentName=department_name,
displayName=display_name,
countryCode=country_code,
title=title,
managerUrn=manager_urn,
)
],
)
user_snapshot = CorpUserSnapshotClass(
urn=f"urn:li:corpuser:{ldap_user}",
aspects=[
CorpUserInfoClass(
active=True,
email=email,
fullName=full_name,
firstName=first_name,
lastName=last_name,
departmentId=department_id,
departmentName=department_name,
displayName=display_name,
countryCode=country_code,
title=title,
managerUrn=manager_urn,
)
],
)

if groups:
user_snapshot.aspects.append(GroupMembershipClass(groups=groups))

return MetadataChangeEvent(proposedSnapshot=user_snapshot)

def build_corp_group_mce(self, attrs: dict) -> Optional[MetadataChangeEvent]:
"""Creates a MetadataChangeEvent for LDAP groups."""
cn = attrs.get(self.config.group_attrs_map["urn"])
Expand Down Expand Up @@ -417,3 +427,19 @@ def strip_ldap_info(input_clean: bytes) -> str:
"""Converts a b'uid=username,ou=Groups,dc=internal,dc=machines'
format to username"""
return input_clean.decode().split(",")[0].lstrip("uid=")


def parse_groups(attrs: Dict[str, Any], filter_key: str) -> List[str]:
"""Converts a list of LDAP groups to Datahub corpgroup strings"""
if filter_key in attrs:
return [
f"urn:li:corpGroup:{strip_ldap_group_cn(ldap_group)}"
for ldap_group in attrs[filter_key]
]
return []


def strip_ldap_group_cn(input_clean: bytes) -> str:
"""Converts a b'cn=group_name,ou=Groups,dc=internal,dc=machines'
format to group name"""
return re.sub("cn=", "", input_clean.decode().split(",")[0], flags=re.IGNORECASE)
88 changes: 88 additions & 0 deletions metadata-ingestion/tests/integration/ldap/ldap_mces_golden.json
Original file line number Diff line number Diff line change
Expand Up @@ -151,5 +151,93 @@
"runId": "ldap-test",
"properties": null
}
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
"urn": "urn:li:corpuser:hbevan",
"aspects": [
{
"com.linkedin.pegasus2avro.identity.CorpUserInfo": {
"active": true,
"displayName": "Hester Bevan",
"email": "hbevan",
"title": null,
"managerUrn": null,
"departmentId": null,
"departmentName": null,
"firstName": "Hester",
"lastName": "Bevan",
"fullName": "Hester Bevan",
"countryCode": null
}
}
]
}
},
"proposedDelta": null,
"systemMetadata": {
"lastObserved": 1615443388097,
"runId": "ldap-test",
"properties": null
}
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
"urn": "urn:li:corpuser:ehaas",
"aspects": [
{
"com.linkedin.pegasus2avro.identity.CorpUserInfo": {
"active": true,
"displayName": "Evalyn Haas",
"email": "ehaas",
"title": null,
"managerUrn": null,
"departmentId": null,
"departmentName": null,
"firstName": "Evalyn",
"lastName": "Haas",
"fullName": "Evalyn Haas",
"countryCode": null
}
}
]
}
},
"proposedDelta": null,
"systemMetadata": {
"lastObserved": 1615443388097,
"runId": "ldap-test",
"properties": null
}
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": {
"urn": "urn:li:corpGroup:HR Department",
"aspects": [
{
"com.linkedin.pegasus2avro.identity.CorpGroupInfo": {
"displayName": null,
"email": "HR Department",
"admins": [],
"members": [],
"groups": [],
"description": null
}
}
]
}
},
"proposedDelta": null,
"systemMetadata": {
"lastObserved": 1615443388097,
"runId": "ldap-test",
"properties": null
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
[
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
"urn": "urn:li:corpuser:hbevan",
"aspects": [
{
"com.linkedin.pegasus2avro.identity.CorpUserInfo": {
"active": true,
"displayName": "Hester Bevan",
"email": "hbevan",
"title": null,
"managerUrn": null,
"departmentId": null,
"departmentName": null,
"firstName": "Hester",
"lastName": "Bevan",
"fullName": "Hester Bevan",
"countryCode": null
}
},
{
"com.linkedin.pegasus2avro.identity.GroupMembership": {
"groups": [
"urn:li:corpGroup:HR Department"
]
}
}
]
}
},
"proposedDelta": null,
"systemMetadata": {
"lastObserved": 1615443388097,
"runId": "ldap-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
"urn": "urn:li:corpuser:ehaas",
"aspects": [
{
"com.linkedin.pegasus2avro.identity.CorpUserInfo": {
"active": true,
"displayName": "Evalyn Haas",
"email": "ehaas",
"title": null,
"managerUrn": null,
"departmentId": null,
"departmentName": null,
"firstName": "Evalyn",
"lastName": "Haas",
"fullName": "Evalyn Haas",
"countryCode": null
}
},
{
"com.linkedin.pegasus2avro.identity.GroupMembership": {
"groups": [
"urn:li:corpGroup:HR Department"
]
}
}
]
}
},
"proposedDelta": null,
"systemMetadata": {
"lastObserved": 1615443388097,
"runId": "ldap-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
}
]
49 changes: 49 additions & 0 deletions metadata-ingestion/tests/integration/ldap/setup/custom/sample.ldif
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@

version: 1

# Enable memberOf overlay
dn: olcOverlay=memberof,olcDatabase={1}mdb,cn=config
objectClass: olcMemberOf
objectClass: olcOverlayConfig
objectClass: olcConfig
objectClass: top
olcOverlay: memberof
olcMemberOfDangling: ignore
olcMemberOfRefInt: TRUE
olcMemberOfGroupOC: groupOfNames
olcMemberOfMemberAD: member
olcMemberOfMemberOfAD: memberOf

# Entry 1: dc=example,dc=org
# Note: this is commented out because the containers bootstrap scripts already
# handle this for us.
Expand Down Expand Up @@ -113,3 +126,39 @@ dn: ou=Sales Department,dc=example,dc=org
objectclass: organizationalUnit
objectclass: top
ou: Sales Department

# Entry 10: cn=Hester Bevan,ou=people,dc=example,dc=org
dn: cn=Hester Bevan,ou=people,dc=example,dc=org
cn: Hester Bevan
gidnumber: 500
givenname: Hester
homedirectory: /home/users/hbevan
objectclass: inetOrgPerson
objectclass: posixAccount
objectclass: top
sn: Bevan
uid: hbevan
uidnumber: 1004
userpassword: {MD5}4QrcOUm6Wau+VuBX8g+IPg==

# Entry 11: cn=Evalyn Haas,ou=people,dc=example,dc=org
dn: cn=Evalyn Haas,ou=people,dc=example,dc=org
cn: Evalyn Haas
gidnumber: 500
givenname: Evalyn
homedirectory: /home/users/ehaas
objectclass: inetOrgPerson
objectclass: posixAccount
objectclass: top
sn: Haas
uid: ehaas
uidnumber: 1005
userpassword: {MD5}4QrcOUm6Wau+VuBX8g+IPg==

# Entry 12: cn=HR Department,ou=groups,dc=example,dc=org
dn: cn=HR Department,dc=example,dc=org
cn: HR Department
objectclass: groupOfNames
objectclass: top
member: cn=Hester Bevan,ou=people,dc=example,dc=org
member: cn=Evalyn Haas,ou=people,dc=example,dc=org
Loading

0 comments on commit c182fe7

Please sign in to comment.