Create
Creates a job for a cluster.
- TypeScript
- Python
import {
cloudApi,
decodeMessage,
serviceClients,
Session,
waitForOperation,
} from "@yandex-cloud/nodejs-sdk";
const CreateJobRequest = cloudApi.dataproc.job_service.CreateJobRequest;
const Job = cloudApi.dataproc.manager_job.Job;
(async () => {
const authToken = process.env["YC_OAUTH_TOKEN"];
const session = new Session({ oauthToken: authToken });
const client = session.client(serviceClients.JobServiceClient);
const operation = await client.create(
CreateJobRequest.fromPartial({
clusterId: "clusterId",
// name: "name",
// mapreduceJob: {
// args: ["args"],
// jarFileUris: ["jarFileUris"],
// fileUris: ["fileUris"],
// archiveUris: ["archiveUris"],
// properties: {"key": "properties"},
// mainJarFileUri: "mainJarFileUri",
// mainClass: "mainClass"
// },
// sparkJob: {
// args: ["args"],
// jarFileUris: ["jarFileUris"],
// fileUris: ["fileUris"],
// archiveUris: ["archiveUris"],
// properties: {"key": "properties"},
// mainJarFileUri: "mainJarFileUri",
// mainClass: "mainClass",
// packages: ["packages"],
// repositories: ["repositories"],
// excludePackages: ["excludePackages"]
// },
// pysparkJob: {
// args: ["args"],
// jarFileUris: ["jarFileUris"],
// fileUris: ["fileUris"],
// archiveUris: ["archiveUris"],
// properties: {"key": "properties"},
// mainPythonFileUri: "mainPythonFileUri",
// pythonFileUris: ["pythonFileUris"],
// packages: ["packages"],
// repositories: ["repositories"],
// excludePackages: ["excludePackages"]
// },
// hiveJob: {
// properties: {"key": "properties"},
// continueOnFailure: true,
// scriptVariables: {"key": "scriptVariables"},
// jarFileUris: ["jarFileUris"],
// queryFileUri: "queryFileUri",
// queryList: {
// queries: ["queries"]
// }
// }
})
);
const finishedOp = await waitForOperation(operation, session);
if (finishedOp.response) {
const result = decodeMessage<typeof Job>(finishedOp.response);
console.log(result);
}
})();
import os
import grpc
import yandexcloud
from yandex.cloud.dataproc.v1.job_service_pb2 import CreateJobMetadata
from yandex.cloud.dataproc.v1.job_service_pb2 import CreateJobRequest
from yandex.cloud.dataproc.v1.job_pb2 import HiveJob
from yandex.cloud.dataproc.manager.v1.job_pb2 import Job
from yandex.cloud.dataproc.manager.v1.job_service_pb2_grpc import JobServiceStub
from yandex.cloud.dataproc.v1.job_pb2 import MapreduceJob
from yandex.cloud.dataproc.v1.job_pb2 import PysparkJob
from yandex.cloud.dataproc.v1.job_pb2 import QueryList
from yandex.cloud.dataproc.v1.job_pb2 import SparkJob
token = os.getenv("YC_OAUTH_TOKEN")
sdk = yandexcloud.SDK(token=token)
service = sdk.client(JobServiceStub)
operation = service.Create(
CreateJobRequest(
cluster_id="clusterId",
# name = "name",
# mapreduce_job = MapreduceJob(
# args = ["args"],
# jar_file_uris = ["jarFileUris"],
# file_uris = ["fileUris"],
# archive_uris = ["archiveUris"],
# properties = {"key": "properties"},
# main_jar_file_uri = "mainJarFileUri",
# main_class = "mainClass"
# ),
# spark_job = SparkJob(
# args = ["args"],
# jar_file_uris = ["jarFileUris"],
# file_uris = ["fileUris"],
# archive_uris = ["archiveUris"],
# properties = {"key": "properties"},
# main_jar_file_uri = "mainJarFileUri",
# main_class = "mainClass",
# packages = ["packages"],
# repositories = ["repositories"],
# exclude_packages = ["excludePackages"]
# ),
# pyspark_job = PysparkJob(
# args = ["args"],
# jar_file_uris = ["jarFileUris"],
# file_uris = ["fileUris"],
# archive_uris = ["archiveUris"],
# properties = {"key": "properties"},
# main_python_file_uri = "mainPythonFileUri",
# python_file_uris = ["pythonFileUris"],
# packages = ["packages"],
# repositories = ["repositories"],
# exclude_packages = ["excludePackages"]
# ),
# hive_job = HiveJob(
# properties = {"key": "properties"},
# continue_on_failure = true,
# script_variables = {"key": "scriptVariables"},
# jar_file_uris = ["jarFileUris"],
# query_file_uri = "queryFileUri",
# query_list = QueryList(
# queries = ["queries"]
# )
# )
)
)
operation_result = sdk.wait_operation_and_get_result(
operation,
response_type=Job,
meta_type=CreateJobMetadata,
)
print(operation_result)
CreateJobRequest
clusterId
: string
ID of the cluster to create a job for.
name
: string
Name of the job.
One of jobSpec
Specification for the job.
mapreduceJob
: MapreduceJobSpecification for a MapReduce job.
sparkJob
: SparkJobSpecification for a Spark job.
pysparkJob
: PysparkJobSpecification for a PySpark job.
hiveJob
: HiveJobSpecification for a Hive job.
MapreduceJob
args
: string
Optional arguments to pass to the driver.
jarFileUris
: string
JAR file URIs to add to CLASSPATH of the Data Proc driver and each task.
fileUris
: string
URIs of resource files to be copied to the working directory of Data Proc drivers and distributed Hadoop tasks.
archiveUris
: string
URIs of archives to be extracted to the working directory of Data Proc drivers and tasks.
properties
: string
Property names and values, used to configure Data Proc and MapReduce.
One of driver
mainJarFileUri
: stringHCFS URI of the .jar file containing the driver class.
mainClass
: stringThe name of the driver class.
SparkJob
args
: string
Optional arguments to pass to the driver.
jarFileUris
: string
JAR file URIs to add to CLASSPATH of the Data Proc driver and each task.
fileUris
: string
URIs of resource files to be copied to the working directory of Data Proc drivers and distributed Hadoop tasks.
archiveUris
: string
URIs of archives to be extracted to the working directory of Data Proc drivers and tasks.
properties
: string
Property names and values, used to configure Data Proc and Spark.
mainJarFileUri
: string
The HCFS URI of the JAR file containing the main
class for the job.
mainClass
: string
The name of the driver class.
packages
: string
List of maven coordinates of jars to include on the driver and executor classpaths.
repositories
: string
List of additional remote repositories to search for the maven coordinates given with --packages.
excludePackages
: string
List of groupId:artifactId, to exclude while resolving the dependencies provided in --packages to avoid dependency conflicts.
PysparkJob
args
: string
Optional arguments to pass to the driver.
jarFileUris
: string
JAR file URIs to add to CLASSPATH of the Data Proc driver and each task.
fileUris
: string
URIs of resource files to be copied to the working directory of Data Proc drivers and distributed Hadoop tasks.
archiveUris
: string
URIs of archives to be extracted to the working directory of Data Proc drivers and tasks.
properties
: string
Property names and values, used to configure Data Proc and PySpark.
mainPythonFileUri
: string
URI of the file with the driver code. Must be a .py file.
pythonFileUris
: string
URIs of Python files to pass to the PySpark framework.
packages
: string
List of maven coordinates of jars to include on the driver and executor classpaths.
repositories
: string
List of additional remote repositories to search for the maven coordinates given with --packages.
excludePackages
: string
List of groupId:artifactId, to exclude while resolving the dependencies provided in --packages to avoid dependency conflicts.
HiveJob
properties
: string
Property names and values, used to configure Data Proc and Hive.
continueOnFailure
: bool
Flag indicating whether a job should continue to run if a query fails.
scriptVariables
: string
Query variables and their values.
jarFileUris
: string
JAR file URIs to add to CLASSPATH of the Hive driver and each task.
One of queryType
queryFileUri
: stringURI of the script with all the necessary Hive queries.
queryList
: QueryListList of Hive queries to be used in the job.
QueryList
queries
: string
List of Hive queries.
Operation
An Operation resource. For more information, see Operation.
id
: string
ID of the operation.
description
: string
Description of the operation. 0-256 characters long.
createdAt
: google.protobuf.Timestamp
Creation timestamp.
createdBy
: string
ID of the user or service account who initiated the operation.
modifiedAt
: google.protobuf.Timestamp
The time when the Operation resource was last modified.
done
: bool
If the value is false
, it means the operation is still in progress.
If true
, the operation is completed, and either error
or response
is available.
metadata
: google.protobuf.Any
Service-specific metadata associated with the operation. It typically contains the ID of the target resource that the operation is performed on. Any method that returns a long-running operation should document the metadata type, if any.
One of result
The operation result.
If done == false
and there was no failure detected, neither error
nor response
is set.
If done == false
and there was a failure detected, error
is set.
If done == true
, exactly one of error
or response
is set.
error
: google.rpc.StatusThe error result of the operation in case of failure or cancellation.
response
: google.protobuf.AnyThe normal response of the operation in case of success.
If the original method returns no data on success, such as Delete, the response is google.protobuf.Empty. If the original method is the standard Create/Update, the response should be the target resource of the operation. Any method that returns a long-running operation should document the response type, if any.