Skip to content
This repository has been archived by the owner on Jul 10, 2024. It is now read-only.

Commit

Permalink
temp commit
Browse files Browse the repository at this point in the history
  • Loading branch information
JackLiu00521 committed Jun 27, 2022
1 parent 5feb168 commit 301b210
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public class ExperimentTaskSpec {
private String cmd;
private Map<String, String> envVars;

// should ignored in JSON Serialization
// should be ignored in JSON Serialization
private Map<String, String> resourceMap;

public ExperimentTaskSpec() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ public String getContainerMemMB() {
V1PodTemplateSpec podSpec = getTemplate();
return String.join(" ",
podSpec.getSpec().getContainers().get(0)
.getResources().getLimits().get("memory").
// .getResources().getLimits().get("memory").
.getResources().getRequests().get("memory").
getNumber().divide(BigDecimal.valueOf(1000000)).toString() + "M");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ public static XGBoostJobSpec parseXGBoostJobSpec(ExperimentSpec experimentSpec)
for (Map.Entry<String, ExperimentTaskSpec> entry : experimentSpec.getSpec().entrySet()) {
String replicaType = entry.getKey();
ExperimentTaskSpec taskSpec = entry.getValue();

if (TFJobReplicaType.isSupportedReplicaType(replicaType)) {
MLJobReplicaSpec replicaSpec = new MLJobReplicaSpec();
replicaSpec.setReplicas(taskSpec.getReplicas());
V1PodTemplateSpec podTemplateSpec = parseTemplateSpec(taskSpec, experimentSpec);

replicaSpec.setTemplate(podTemplateSpec);
replicaSpecMap.put(XGBoostJobReplicaType.valueOf(replicaType), replicaSpec);
} else {
Expand Down Expand Up @@ -140,7 +140,7 @@ public static PyTorchJobSpec parsePyTorchJobSpec(ExperimentSpec experimentSpec)
MLJobReplicaSpec replicaSpec = new MLJobReplicaSpec();
replicaSpec.setReplicas(taskSpec.getReplicas());
V1PodTemplateSpec podTemplateSpec = parseTemplateSpec(taskSpec, experimentSpec);

replicaSpec.setTemplate(podTemplateSpec);
replicaSpecMap.put(PyTorchJobReplicaType.valueOf(replicaType), replicaSpec);
} else {
Expand Down Expand Up @@ -181,12 +181,12 @@ private static TFJobSpec parseTFJobSpec(ExperimentSpec experimentSpec)
for (Map.Entry<String, ExperimentTaskSpec> entry : experimentSpec.getSpec().entrySet()) {
String replicaType = entry.getKey();
ExperimentTaskSpec taskSpec = entry.getValue();

if (TFJobReplicaType.isSupportedReplicaType(replicaType)) {
MLJobReplicaSpec replicaSpec = new MLJobReplicaSpec();
replicaSpec.setReplicas(taskSpec.getReplicas());
V1PodTemplateSpec podTemplateSpec = parseTemplateSpec(taskSpec, experimentSpec);

replicaSpec.setTemplate(podTemplateSpec);
replicaSpecMap.put(TFJobReplicaType.valueOf(replicaType), replicaSpec);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,30 +61,23 @@ public class ExperimentSpecParserTest extends SpecBuilder {
private static SubmarineConfiguration conf =
SubmarineConfiguration.getInstance();


@Test
public void testValidTensorFlowExperiment() {
int x = 100;
Assert.assertTrue(x == (int)100.0);
}
public void testValidTensorFlowExperiment() throws IOException,
URISyntaxException, InvalidSpecException {
ExperimentSpec experimentSpec = (ExperimentSpec) buildFromJsonFile(ExperimentSpec.class, tfJobReqFile);
TFJob tfJob = (TFJob) ExperimentSpecParser.parseJob(experimentSpec);
validateMetadata(experimentSpec.getMeta(), tfJob.getMetadata(),
ExperimentMeta.SupportedMLFramework.TENSORFLOW.getName().toLowerCase()
);
// Validate ExperimentMeta without envVars. Related to SUBMARINE-534.
experimentSpec.getMeta().setEnvVars(null);
validateMetadata(experimentSpec.getMeta(), tfJob.getMetadata(),
ExperimentMeta.SupportedMLFramework.TENSORFLOW.getName().toLowerCase()
);

// @Test
// public void testValidTensorFlowExperiment() throws IOException,
// URISyntaxException, InvalidSpecException {
// ExperimentSpec experimentSpec = (ExperimentSpec) buildFromJsonFile(ExperimentSpec.class, tfJobReqFile);
// TFJob tfJob = (TFJob) ExperimentSpecParser.parseJob(experimentSpec);
// validateMetadata(experimentSpec.getMeta(), tfJob.getMetadata(),
// ExperimentMeta.SupportedMLFramework.TENSORFLOW.getName().toLowerCase()
// );
// // Validate ExperimentMeta without envVars. Related to SUBMARINE-534.
// experimentSpec.getMeta().setEnvVars(null);
// validateMetadata(experimentSpec.getMeta(), tfJob.getMetadata(),
// ExperimentMeta.SupportedMLFramework.TENSORFLOW.getName().toLowerCase()
// );

// validateReplicaSpec(experimentSpec, tfJob, TFJobReplicaType.Ps);
// validateReplicaSpec(experimentSpec, tfJob, TFJobReplicaType.Worker);
// }
validateReplicaSpec(experimentSpec, tfJob, TFJobReplicaType.Ps);
validateReplicaSpec(experimentSpec, tfJob, TFJobReplicaType.Worker);
}

@Test
public void testInvalidTensorFlowExperiment() throws IOException,
Expand Down Expand Up @@ -160,7 +153,8 @@ public void testInvalidPyTorchJobSpec() throws IOException,
@Test
public void testValidXGBoostExperiment() throws IOException,
URISyntaxException, InvalidSpecException {
ExperimentSpec experimentSpec = (ExperimentSpec) buildFromJsonFile(ExperimentSpec.class, xgboostJobReqFile);
ExperimentSpec experimentSpec = (ExperimentSpec) buildFromJsonFile(ExperimentSpec.class,
xgboostJobReqFile);
XGBoostJob xgboostJob = (XGBoostJob) ExperimentSpecParser.parseJob(experimentSpec);
validateMetadata(experimentSpec.getMeta(), xgboostJob.getMetadata(),
ExperimentMeta.SupportedMLFramework.XGBOOST.getName().toLowerCase()
Expand All @@ -173,7 +167,8 @@ public void testValidXGBoostExperiment() throws IOException,
@Test
public void testInvalidXGBoostExperiment() throws IOException,
URISyntaxException {
ExperimentSpec experimentSpec = (ExperimentSpec) buildFromJsonFile(ExperimentSpec.class, xgboostJobReqFile);
ExperimentSpec experimentSpec = (ExperimentSpec) buildFromJsonFile(ExperimentSpec.class,
xgboostJobReqFile);
// Case 1. Invalid framework name
experimentSpec.getMeta().setFramework("fooframework");
try {
Expand All @@ -185,7 +180,8 @@ public void testInvalidXGBoostExperiment() throws IOException,

// Case 2. Invalid XGBoost Replica name. It can only be "master" and "worker"
experimentSpec = (ExperimentSpec) buildFromJsonFile(ExperimentSpec.class, xgboostJobReqFile);
experimentSpec.getSpec().put("foo", experimentSpec.getSpec().get(XGBoostJobReplicaType.Master.getTypeName()));
experimentSpec.getSpec().put("foo", experimentSpec.getSpec().get(
XGBoostJobReplicaType.Master.getTypeName()));
experimentSpec.getSpec().remove(XGBoostJobReplicaType.Master.getTypeName());
try {
ExperimentSpecParser.parseJob(experimentSpec);
Expand Down Expand Up @@ -213,14 +209,15 @@ private void validateReplicaSpec(ExperimentSpec experimentSpec,
mlJobReplicaSpec = ((XGBoostJob) mlJob).getSpec().getReplicaSpecs().get(type);
}
Assert.assertNotNull(mlJobReplicaSpec);

ExperimentTaskSpec definedPyTorchMasterTask = experimentSpec.getSpec().
get(type.getTypeName());

// replica
int expectedMasterReplica = definedPyTorchMasterTask.getReplicas();
Assert.assertEquals(expectedMasterReplica,
(int) mlJobReplicaSpec.getReplicas());
// Image
// image
String expectedMasterImage = definedPyTorchMasterTask.getImage() == null ?
experimentSpec.getEnvironment().getImage() : definedPyTorchMasterTask.getImage();
String actualMasterImage = mlJobReplicaSpec.getContainerImageName();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"meta": {
"name": "xgboost-dist-mnist",
"namespace": "submarine",
"framework": "XGBoost",
"cmd": "python /var/mnist.py --backend gloo",
"envVars": {
"ENV_1": "ENV1"
}
},
"environment": {
"image": "apache/submarine:pytorch-dist-mnist-1.0"
},
"spec": {
"Master": {
"name": "master",
"replicas": 1,
"resources": "cpu=2,memory=2048M"
},
"Worker": {
"name": "worker",
"replicas": 2,
"resources": "cpu=1,memory=1024M"
}
}
}

0 comments on commit 301b210

Please sign in to comment.