-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[feature][pulsar-io-mongo] Add support for full message synchronizati…
…on (#16003) ### Motivation Now, the MongoDB source connector only supports the incremental message synchronization. This PR adds support for full message synchronization. Since MongDB 4.0, we can set the starting point for the change stream by the `startAtOperationTime` field. So, we can set it to `0` to make start point the earliest. See https://www.mongodb.com/docs/v4.2/reference/method/db.collection.watch/ for more information.
- Loading branch information
Showing
20 changed files
with
888 additions
and
233 deletions.
There are no files selected for viewing
102 changes: 102 additions & 0 deletions
102
pulsar-io/mongo/src/main/java/org/apache/pulsar/io/mongodb/MongoAbstractConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.pulsar.io.mongodb; | ||
|
||
import static com.google.common.base.Preconditions.checkArgument; | ||
import com.fasterxml.jackson.annotation.JsonCreator; | ||
import com.fasterxml.jackson.annotation.JsonProperty; | ||
import java.io.Serializable; | ||
import lombok.Data; | ||
import lombok.experimental.Accessors; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.apache.pulsar.io.core.annotations.FieldDoc; | ||
|
||
/** | ||
* Configuration object for all MongoDB components. | ||
*/ | ||
@Data | ||
@Accessors(chain = true) | ||
public abstract class MongoAbstractConfig implements Serializable { | ||
|
||
private static final long serialVersionUID = -3830568531897300005L; | ||
|
||
public static final int DEFAULT_BATCH_SIZE = 100; | ||
|
||
public static final long DEFAULT_BATCH_TIME_MS = 1000; | ||
|
||
@FieldDoc( | ||
required = true, | ||
defaultValue = "", | ||
help = "The URI of MongoDB that the connector connects to " | ||
+ "(see: https://docs.mongodb.com/manual/reference/connection-string/)" | ||
) | ||
private final String mongoUri; | ||
|
||
@FieldDoc( | ||
defaultValue = "", | ||
help = "The database name to which the collection belongs " | ||
+ "and which must be watched for the source connector " | ||
+ "(required for the sink connector)" | ||
) | ||
private final String database; | ||
|
||
@FieldDoc( | ||
defaultValue = "", | ||
help = "The collection name where the messages are written " | ||
+ "or which is watched for the source connector " | ||
+ "(required for the sink connector)" | ||
) | ||
private final String collection; | ||
|
||
@FieldDoc( | ||
defaultValue = "" + DEFAULT_BATCH_SIZE, | ||
help = "The batch size of write to or read from the database" | ||
) | ||
private final int batchSize; | ||
|
||
@FieldDoc( | ||
defaultValue = "" + DEFAULT_BATCH_TIME_MS, | ||
help = "The batch operation interval in milliseconds") | ||
private final long batchTimeMs; | ||
|
||
public MongoAbstractConfig() { | ||
this(null, null, null, DEFAULT_BATCH_SIZE, DEFAULT_BATCH_TIME_MS); | ||
} | ||
|
||
@JsonCreator | ||
public MongoAbstractConfig( | ||
@JsonProperty("mongoUri") String mongoUri, | ||
@JsonProperty("database") String database, | ||
@JsonProperty("collection") String collection, | ||
@JsonProperty("batchSize") int batchSize, | ||
@JsonProperty("batchTimeMs") long batchTimeMs | ||
) { | ||
this.mongoUri = mongoUri; | ||
this.database = database; | ||
this.collection = collection; | ||
this.batchSize = batchSize; | ||
this.batchTimeMs = batchTimeMs; | ||
} | ||
|
||
public void validate() { | ||
checkArgument(!StringUtils.isEmpty(getMongoUri()), "Required MongoDB URI is not set."); | ||
checkArgument(getBatchSize() > 0, "batchSize must be a positive integer."); | ||
checkArgument(getBatchTimeMs() > 0, "batchTimeMs must be a positive long."); | ||
} | ||
} |
105 changes: 0 additions & 105 deletions
105
pulsar-io/mongo/src/main/java/org/apache/pulsar/io/mongodb/MongoConfig.java
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
pulsar-io/mongo/src/main/java/org/apache/pulsar/io/mongodb/MongoSinkConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.pulsar.io.mongodb; | ||
|
||
import static com.google.common.base.Preconditions.checkArgument; | ||
import com.fasterxml.jackson.annotation.JsonCreator; | ||
import com.fasterxml.jackson.annotation.JsonProperty; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; | ||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.Map; | ||
import lombok.Data; | ||
import lombok.EqualsAndHashCode; | ||
import lombok.experimental.Accessors; | ||
import org.apache.commons.lang3.StringUtils; | ||
|
||
/** | ||
* Configuration class for the MongoDB Sink Connectors. | ||
*/ | ||
@Data | ||
@EqualsAndHashCode(callSuper = false) | ||
@Accessors(chain = true) | ||
public class MongoSinkConfig extends MongoAbstractConfig { | ||
|
||
private static final long serialVersionUID = 8805978990904614084L; | ||
|
||
@JsonCreator | ||
public MongoSinkConfig( | ||
@JsonProperty("mongoUri") String mongoUri, | ||
@JsonProperty("database") String database, | ||
@JsonProperty("collection") String collection, | ||
@JsonProperty("batchSize") int batchSize, | ||
@JsonProperty("batchTimeMs") long batchTimeMs | ||
) { | ||
super(mongoUri, database, collection, batchSize, batchTimeMs); | ||
} | ||
|
||
public static MongoSinkConfig load(String yamlFile) throws IOException { | ||
final ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); | ||
final MongoSinkConfig cfg = mapper.readValue(new File(yamlFile), MongoSinkConfig.class); | ||
|
||
return cfg; | ||
} | ||
|
||
public static MongoSinkConfig load(Map<String, Object> map) throws IOException { | ||
final ObjectMapper mapper = new ObjectMapper(); | ||
final MongoSinkConfig cfg = mapper.readValue(new ObjectMapper().writeValueAsString(map), MongoSinkConfig.class); | ||
|
||
return cfg; | ||
} | ||
|
||
@Override | ||
public void validate() { | ||
super.validate(); | ||
checkArgument(!StringUtils.isEmpty(getDatabase()), "Required MongoDB database name is not set."); | ||
checkArgument(!StringUtils.isEmpty(getCollection()), "Required MongoDB collection name is not set."); | ||
} | ||
} |
Oops, something went wrong.