Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(multi-tenancy): add support for multi-tenancy in bulk loader #7399

Merged
merged 7 commits into from
Feb 8, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 29 additions & 4 deletions chunker/json_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,9 @@ func parseScalarFacets(m map[string]interface{}, prefix string) ([]*api.Facet, e

// This is the response for a map[string]interface{} i.e. a struct.
type mapResponse struct {
uid string // uid retrieved or allocated for the node.
fcts []*api.Facet // facets on the edge connecting this node to the source if any.
uid string // uid retrieved or allocated for the node.
namespace uint64 // namespace to which the node belongs.
fcts []*api.Facet // facets on the edge connecting this node to the source if any.
}

func handleBasicType(k string, v interface{}, op int, nq *api.NQuad) error {
Expand Down Expand Up @@ -267,6 +268,7 @@ func (buf *NQuadBuffer) checkForDeletion(mr mapResponse, m map[string]interface{
buf.Push(&api.NQuad{
Subject: mr.uid,
Predicate: x.Star,
Namespace: mr.namespace,
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: x.Star}},
})
}
Expand Down Expand Up @@ -447,12 +449,29 @@ func (buf *NQuadBuffer) mapToNquads(m map[string]interface{}, op int, parentPred
mr.uid = getNextBlank()
}

namespace := x.DefaultNamespace
if ns, ok := m["namespace"]; ok {
switch ns := ns.(type) {
case json.Number:
nsi, err := ns.Int64()
if err != nil {
return mr, err
}
namespace = uint64(nsi)

// this int64 case is needed for FastParseJSON, which doesn't use json.Number
case int64:
namespace = uint64(ns)
}
}
mr.namespace = namespace

for pred, v := range m {
// We have already extracted the uid above so we skip that edge.
// v can be nil if user didn't set a value and if omitEmpty was not supplied as JSON
// option.
// We also skip facets here because we parse them with the corresponding predicate.
if pred == "uid" {
if pred == "uid" || pred == "namespace" {
continue
}

Expand All @@ -462,6 +481,7 @@ func (buf *NQuadBuffer) mapToNquads(m map[string]interface{}, op int, parentPred
nq := &api.NQuad{
Subject: mr.uid,
Predicate: pred,
Namespace: namespace,
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: x.Star}},
}
// Here we split predicate and lang directive (ex: "name@en"), if needed. With JSON
Expand All @@ -478,6 +498,7 @@ func (buf *NQuadBuffer) mapToNquads(m map[string]interface{}, op int, parentPred
nq := api.NQuad{
Subject: mr.uid,
Predicate: pred,
Namespace: namespace,
}

prefix := pred + x.FacetDelimeter
Expand Down Expand Up @@ -545,6 +566,7 @@ func (buf *NQuadBuffer) mapToNquads(m map[string]interface{}, op int, parentPred
nq := api.NQuad{
Subject: mr.uid,
Predicate: pred,
Namespace: namespace,
}

switch iv := item.(type) {
Expand Down Expand Up @@ -739,8 +761,11 @@ func (buf *NQuadBuffer) ParseJSON(b []byte, op int) error {
return nil
}
mr, err := buf.mapToNquads(ms, op, "")
if err != nil {
return err
}
buf.checkForDeletion(mr, ms, op)
return err
return nil
}

// ParseJSON is a convenience wrapper function to get all NQuads in one call. This can however, lead
Expand Down
28 changes: 16 additions & 12 deletions chunker/json_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,15 @@ type address struct {
}

type Person struct {
Uid string `json:"uid,omitempty"`
Name string `json:"name,omitempty"`
Age int `json:"age,omitempty"`
Married *bool `json:"married,omitempty"`
Now *time.Time `json:"now,omitempty"`
Address address `json:"address,omitempty"` // geo value
Friends []Person `json:"friend,omitempty"`
School *School `json:"school,omitempty"`
Uid string `json:"uid,omitempty"`
Namespace string `json:"namespace,omitempty"`
Name string `json:"name,omitempty"`
Age int `json:"age,omitempty"`
Married *bool `json:"married,omitempty"`
Now *time.Time `json:"now,omitempty"`
Address address `json:"address,omitempty"` // geo value
Friends []Person `json:"friend,omitempty"`
School *School `json:"school,omitempty"`
}

func Parse(b []byte, op int) ([]*api.NQuad, error) {
Expand All @@ -90,6 +91,7 @@ func (exp *Experiment) verify() {
exp.t.Fatalf("Error while getting a dgraph client: %v", err)
}

// TODO(Naman): Fix these tests, once the ACL is integrated.
ctx := context.Background()
require.NoError(exp.t, dg.Alter(ctx, &api.Operation{DropAll: true}), "drop all failed")
require.NoError(exp.t, dg.Alter(ctx, &api.Operation{Schema: exp.schema}),
Expand All @@ -116,10 +118,12 @@ func TestNquadsFromJson1(t *testing.T) {
tn := time.Now().UTC()
m := true
p := Person{
Name: "Alice",
Age: 26,
Married: &m,
Now: &tn,
Uid: "1",
Namespace: "0x2",
Name: "Alice",
Age: 26,
Married: &m,
Now: &tn,
Address: address{
Type: "Point",
Coords: []float64{1.1, 2.0},
Expand Down
10 changes: 10 additions & 0 deletions chunker/rdf_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,16 @@ L:
return rnq, errors.Errorf("NQuad failed sanity check:%+v", rnq)
}

// TODO(Naman): Ensure the label contains valid namespace.
// Append the namespace to the predicate before returning NQuad.
if rnq.Label != "" {
ns, err := strconv.ParseUint(rnq.Label, 0, 64)
if err != nil {
return rnq, err
}
rnq.Namespace = ns
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a separate field in NQuad just for the namespace? We can use the label field?
The label is string and Namespace is uint64.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the current implementation, the user cannot pass non-namespace strings in the label.
We can allow it in the following way:
The user tells that his RDFs contain a label and also tell us to load the data into a specific namespace. Then while parsing the RDFs we will not treat the label as namespace.

}

return rnq, nil
}

Expand Down
27 changes: 16 additions & 11 deletions chunker/rdf_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,40 +346,43 @@ var testNQuads = []struct {
expectedErr: false,
},
{
input: `_:alice <knows> "stuff"^^<xs:string> <label> .`,
input: `_:alice <knows> "stuff"^^<xs:string> <0xf2> .`,
nq: api.NQuad{
Subject: "_:alice",
Predicate: "knows",
ObjectId: "",
ObjectValue: &api.Value{Val: &api.Value_StrVal{StrVal: "stuff"}},
Label: "label",
Label: "0xf2",
Namespace: 0xf2,
},
expectedErr: false,
},
{
input: `_:alice <knows> "stuff"^^<xs:string> _:label .`,
input: `_:alice <knows> "stuff"^^<xs:string> <0xf2> .`,
nq: api.NQuad{
Subject: "_:alice",
Predicate: "knows",
ObjectId: "",
ObjectValue: &api.Value{Val: &api.Value_StrVal{StrVal: "stuff"}},
Label: "_:label",
Label: "0xf2",
Namespace: 0xf2,
},
expectedErr: false,
},
{
input: `_:alice <knows> "stuff"^^<xs:string> _:label . # comment`,
input: `_:alice <knows> "stuff"^^<xs:string> <10> . # comment`,
nq: api.NQuad{
Subject: "_:alice",
Predicate: "knows",
ObjectId: "",
ObjectValue: &api.Value{Val: &api.Value_StrVal{StrVal: "stuff"}},
Label: "_:label",
Label: "10",
Namespace: 10,
},
expectedErr: false,
},
{
input: `_:alice <knows> "stuff"^^<xs:string> "label" .`,
input: `_:alice <knows> "stuff"^^<xs:string> "0xf2" .`,
expectedErr: true,
},
{
Expand Down Expand Up @@ -599,13 +602,14 @@ var testNQuads = []struct {

// Edge Facets test.
{
input: `_:alice <knows> "stuff" _:label (key1="val1",key2=13) .`,
input: `_:alice <knows> "stuff" <0x10> (key1="val1",key2=13) .`,
nq: api.NQuad{
Subject: "_:alice",
Predicate: "knows",
ObjectId: "",
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: "stuff"}},
Label: "_:label",
Label: "0x10",
Namespace: 0x10,
Facets: []*api.Facet{
{
Key: "key1",
Expand All @@ -623,13 +627,14 @@ var testNQuads = []struct {
expectedErr: false,
},
{
input: `_:alice <knows> "stuff" _:label (key1=,key2=13) .`,
input: `_:alice <knows> "stuff" <0x12> (key1=,key2=13) .`,
nq: api.NQuad{
Subject: "_:alice",
Predicate: "knows",
ObjectId: "",
ObjectValue: &api.Value{Val: &api.Value_DefaultVal{DefaultVal: "stuff"}},
Label: "_:label",
Label: "0x12",
Namespace: 0x12,
Facets: []*api.Facet{
{
Key: "key1",
Expand Down
35 changes: 34 additions & 1 deletion dgraph/cmd/bulk/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ type options struct {
MapShards int
ReduceShards int

Namespace uint64

shardOutputDirs []string

// ........... Badger options ..........
Expand All @@ -100,6 +102,7 @@ type state struct {
dbs []*badger.DB
tmpDbs []*badger.DB // Temporary DB to write the split lists to avoid ordering issues.
writeTs uint64 // All badger writes use this timestamp
namespaces *sync.Map // To store the encountered namespaces.
}

type loader struct {
Expand Down Expand Up @@ -136,6 +139,7 @@ func newLoader(opt *options) *loader {
// Lots of gz readers, so not much channel buffer needed.
readerChunkCh: make(chan *bytes.Buffer, opt.NumGoroutines),
writeTs: getWriteTimestamp(zero),
namespaces: &sync.Map{},
}
st.schema = newSchemaStore(readSchema(opt), opt, st)
ld := &loader{
Expand Down Expand Up @@ -164,6 +168,30 @@ func getWriteTimestamp(zero *grpc.ClientConn) uint64 {
}
}

func (ld *loader) leaseNamespaces() {
var maxNs uint64
ld.namespaces.Range(func(key, value interface{}) bool {
if ns := key.(uint64); ns > maxNs {
maxNs = ns
}
return true
})

client := pb.NewZeroClient(ld.zero)
for {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
// TODO(Naman): Maybe lease maxNs-1 with some handling. Using maxNs for simplicity for now.
ns, err := client.AssignIds(ctx, &pb.Num{Val: maxNs, Type: pb.Num_NS_ID})
cancel()
if err == nil {
fmt.Printf("Assigned namespaces till %d", ns.GetEndId())
return
}
fmt.Printf("Error communicating with dgraph zero, retrying: %v", err)
time.Sleep(time.Second)
}
}

func readSchema(opt *options) *schema.ParsedSchema {
f, err := filestore.Open(opt.SchemaFile)
x.Check(err)
Expand All @@ -183,7 +211,7 @@ func readSchema(opt *options) *schema.ParsedSchema {
buf, err := ioutil.ReadAll(r)
x.Check(err)

result, err := schema.Parse(string(buf))
result, err := schema.ParseWithNamespace(string(buf), opt.Namespace)
x.Check(err)
return result
}
Expand Down Expand Up @@ -260,6 +288,7 @@ func (ld *loader) mapStage() {
x.Check(thr.Finish())

// Send the graphql triples
// TODO(Naman): Handle this.
ld.processGqlSchema(loadType)

close(ld.readerChunkCh)
Expand All @@ -276,6 +305,7 @@ func (ld *loader) mapStage() {
ld.xids = nil
}

// TODO(Naman): Fix this for multi-tenancy.
func (ld *loader) processGqlSchema(loadType chunker.InputFormat) {
if ld.opt.GqlSchemaFile == "" {
return
Expand All @@ -299,6 +329,7 @@ func (ld *loader) processGqlSchema(loadType chunker.InputFormat) {
buf, err := ioutil.ReadAll(r)
x.Check(err)

// TODO(Naman): We will nedd this for all the namespaces.
rdfSchema := `_:gqlschema <dgraph.type> "dgraph.graphql" .
_:gqlschema <dgraph.graphql.xid> "dgraph.graphql.schema" .
_:gqlschema <dgraph.graphql.schema> %s .
Expand All @@ -310,6 +341,8 @@ func (ld *loader) processGqlSchema(loadType chunker.InputFormat) {
"dgraph.graphql.schema": %s
}`

// TODO(Naman): Process the GQL schema here.

gqlBuf := &bytes.Buffer{}
schema := strconv.Quote(string(buf))
switch loadType {
Expand Down
Loading