Create

Creates a job for a cluster.

TypeScript
Python

import {
  cloudApi,
  decodeMessage,
  serviceClients,
  Session,
  waitForOperation,
} from "@yandex-cloud/nodejs-sdk";

const CreateJobRequest = cloudApi.dataproc.job_service.CreateJobRequest;
const Job = cloudApi.dataproc.manager_job.Job;

(async () => {
  const authToken = process.env["YC_OAUTH_TOKEN"];
  const session = new Session({ oauthToken: authToken });
  const client = session.client(serviceClients.JobServiceClient);

  const operation = await client.create(
    CreateJobRequest.fromPartial({
      clusterId: "clusterId",
      // name: "name",
      // mapreduceJob: {
      // args: ["args"],
      // jarFileUris: ["jarFileUris"],
      // fileUris: ["fileUris"],
      // archiveUris: ["archiveUris"],
      // properties: {"key": "properties"},
      // mainJarFileUri: "mainJarFileUri",
      // mainClass: "mainClass"
      // },
      // sparkJob: {
      // args: ["args"],
      // jarFileUris: ["jarFileUris"],
      // fileUris: ["fileUris"],
      // archiveUris: ["archiveUris"],
      // properties: {"key": "properties"},
      // mainJarFileUri: "mainJarFileUri",
      // mainClass: "mainClass",
      // packages: ["packages"],
      // repositories: ["repositories"],
      // excludePackages: ["excludePackages"]
      // },
      // pysparkJob: {
      // args: ["args"],
      // jarFileUris: ["jarFileUris"],
      // fileUris: ["fileUris"],
      // archiveUris: ["archiveUris"],
      // properties: {"key": "properties"},
      // mainPythonFileUri: "mainPythonFileUri",
      // pythonFileUris: ["pythonFileUris"],
      // packages: ["packages"],
      // repositories: ["repositories"],
      // excludePackages: ["excludePackages"]
      // },
      // hiveJob: {
      // properties: {"key": "properties"},
      // continueOnFailure: true,
      // scriptVariables: {"key": "scriptVariables"},
      // jarFileUris: ["jarFileUris"],
      // queryFileUri: "queryFileUri",
      // queryList: {
      // queries: ["queries"]
      // }
      // }
    })
  );
  const finishedOp = await waitForOperation(operation, session);

  if (finishedOp.response) {
    const result = decodeMessage<typeof Job>(finishedOp.response);
    console.log(result);
  }
})();

import os
import grpc
import yandexcloud

from yandex.cloud.dataproc.v1.job_service_pb2 import CreateJobMetadata
from yandex.cloud.dataproc.v1.job_service_pb2 import CreateJobRequest
from yandex.cloud.dataproc.v1.job_pb2 import HiveJob
from yandex.cloud.dataproc.manager.v1.job_pb2 import Job
from yandex.cloud.dataproc.manager.v1.job_service_pb2_grpc import JobServiceStub
from yandex.cloud.dataproc.v1.job_pb2 import MapreduceJob
from yandex.cloud.dataproc.v1.job_pb2 import PysparkJob
from yandex.cloud.dataproc.v1.job_pb2 import QueryList
from yandex.cloud.dataproc.v1.job_pb2 import SparkJob

token = os.getenv("YC_OAUTH_TOKEN")

sdk = yandexcloud.SDK(token=token)

service = sdk.client(JobServiceStub)
operation = service.Create(
    CreateJobRequest(
        cluster_id="clusterId",
        # name = "name",
        # mapreduce_job = MapreduceJob(
        # args = ["args"],
        # jar_file_uris = ["jarFileUris"],
        # file_uris = ["fileUris"],
        # archive_uris = ["archiveUris"],
        # properties = {"key": "properties"},
        # main_jar_file_uri = "mainJarFileUri",
        # main_class = "mainClass"
        # ),
        # spark_job = SparkJob(
        # args = ["args"],
        # jar_file_uris = ["jarFileUris"],
        # file_uris = ["fileUris"],
        # archive_uris = ["archiveUris"],
        # properties = {"key": "properties"},
        # main_jar_file_uri = "mainJarFileUri",
        # main_class = "mainClass",
        # packages = ["packages"],
        # repositories = ["repositories"],
        # exclude_packages = ["excludePackages"]
        # ),
        # pyspark_job = PysparkJob(
        # args = ["args"],
        # jar_file_uris = ["jarFileUris"],
        # file_uris = ["fileUris"],
        # archive_uris = ["archiveUris"],
        # properties = {"key": "properties"},
        # main_python_file_uri = "mainPythonFileUri",
        # python_file_uris = ["pythonFileUris"],
        # packages = ["packages"],
        # repositories = ["repositories"],
        # exclude_packages = ["excludePackages"]
        # ),
        # hive_job = HiveJob(
        # properties = {"key": "properties"},
        # continue_on_failure = true,
        # script_variables = {"key": "scriptVariables"},
        # jar_file_uris = ["jarFileUris"],
        # query_file_uri = "queryFileUri",
        # query_list = QueryList(
        # queries = ["queries"]
        # )
        # )
    )
)
operation_result = sdk.wait_operation_and_get_result(
    operation,
    response_type=Job,
    meta_type=CreateJobMetadata,
)
print(operation_result)

CreateJobRequest

`clusterId` : string

ID of the cluster to create a job for.

`name` : string

Name of the job.

One of jobSpec

Specification for the job.

mapreduceJob : MapreduceJob
Specification for a MapReduce job.
sparkJob : SparkJob
Specification for a Spark job.
pysparkJob : PysparkJob
Specification for a PySpark job.
hiveJob : HiveJob
Specification for a Hive job.

MapreduceJob

`args` : string

Optional arguments to pass to the driver.

`jarFileUris` : string

JAR file URIs to add to CLASSPATH of the Data Proc driver and each task.

`fileUris` : string

URIs of resource files to be copied to the working directory of Data Proc drivers and distributed Hadoop tasks.

`archiveUris` : string

URIs of archives to be extracted to the working directory of Data Proc drivers and tasks.

`properties` : string

Property names and values, used to configure Data Proc and MapReduce.

One of driver

mainJarFileUri : string
HCFS URI of the .jar file containing the driver class.
mainClass : string
The name of the driver class.

SparkJob

`args` : string

Optional arguments to pass to the driver.

`jarFileUris` : string

JAR file URIs to add to CLASSPATH of the Data Proc driver and each task.

`fileUris` : string

URIs of resource files to be copied to the working directory of Data Proc drivers and distributed Hadoop tasks.

`archiveUris` : string

URIs of archives to be extracted to the working directory of Data Proc drivers and tasks.

`properties` : string

Property names and values, used to configure Data Proc and Spark.

`mainJarFileUri` : string

The HCFS URI of the JAR file containing the main class for the job.

`mainClass` : string

The name of the driver class.

`packages` : string

List of maven coordinates of jars to include on the driver and executor classpaths.

`repositories` : string

List of additional remote repositories to search for the maven coordinates given with --packages.

`excludePackages` : string

List of groupId:artifactId, to exclude while resolving the dependencies provided in --packages to avoid dependency conflicts.

PysparkJob

`args` : string

Optional arguments to pass to the driver.

`jarFileUris` : string

JAR file URIs to add to CLASSPATH of the Data Proc driver and each task.

`fileUris` : string

URIs of resource files to be copied to the working directory of Data Proc drivers and distributed Hadoop tasks.

`archiveUris` : string

URIs of archives to be extracted to the working directory of Data Proc drivers and tasks.

`properties` : string

Property names and values, used to configure Data Proc and PySpark.

`mainPythonFileUri` : string

URI of the file with the driver code. Must be a .py file.

`pythonFileUris` : string

URIs of Python files to pass to the PySpark framework.

`packages` : string

List of maven coordinates of jars to include on the driver and executor classpaths.

`repositories` : string

List of additional remote repositories to search for the maven coordinates given with --packages.

`excludePackages` : string

List of groupId:artifactId, to exclude while resolving the dependencies provided in --packages to avoid dependency conflicts.

HiveJob

`properties` : string

Property names and values, used to configure Data Proc and Hive.

`continueOnFailure` : bool

Flag indicating whether a job should continue to run if a query fails.

`scriptVariables` : string

Query variables and their values.

`jarFileUris` : string

JAR file URIs to add to CLASSPATH of the Hive driver and each task.

One of queryType

queryFileUri : string
URI of the script with all the necessary Hive queries.
queryList : QueryList
List of Hive queries to be used in the job.

QueryList

`queries` : string

List of Hive queries.

Operation

An Operation resource. For more information, see Operation.

`id` : string

ID of the operation.

`description` : string

Description of the operation. 0-256 characters long.

`createdAt` : google.protobuf.Timestamp

Creation timestamp.

`createdBy` : string

ID of the user or service account who initiated the operation.

`modifiedAt` : google.protobuf.Timestamp

The time when the Operation resource was last modified.

`done` : bool

If the value is false, it means the operation is still in progress. If true, the operation is completed, and either error or response is available.

`metadata` : google.protobuf.Any

Service-specific metadata associated with the operation. It typically contains the ID of the target resource that the operation is performed on. Any method that returns a long-running operation should document the metadata type, if any.

One of result

The operation result. If done == false and there was no failure detected, neither error nor response is set. If done == false and there was a failure detected, error is set. If done == true, exactly one of error or response is set.

error : google.rpc.Status
The error result of the operation in case of failure or cancellation.
response : google.protobuf.Any
```
The normal response of the operation in case of success.
```
If the original method returns no data on success, such as Delete, the response is google.protobuf.Empty. If the original method is the standard Create/Update, the response should be the target resource of the operation. Any method that returns a long-running operation should document the response type, if any.

Create

CreateJobRequest​

clusterId : string​

name : string​

One of jobSpec​

mapreduceJob : MapreduceJob​

sparkJob : SparkJob​

pysparkJob : PysparkJob​

hiveJob : HiveJob​

MapreduceJob​

args : string​

jarFileUris : string​

fileUris : string​

archiveUris : string​

properties : string​

One of driver​

mainJarFileUri : string​

mainClass : string​

SparkJob​

args : string​

jarFileUris : string​

fileUris : string​

archiveUris : string​

properties : string​

mainJarFileUri : string​

mainClass : string​

packages : string​

repositories : string​

excludePackages : string​

PysparkJob​

args : string​

jarFileUris : string​

fileUris : string​

archiveUris : string​

properties : string​

mainPythonFileUri : string​

pythonFileUris : string​

packages : string​

repositories : string​

excludePackages : string​

HiveJob​

properties : string​

continueOnFailure : bool​

scriptVariables : string​

jarFileUris : string​

One of queryType​

queryFileUri : string​

queryList : QueryList​

QueryList​

queries : string​

Operation​

id : string​

description : string​

createdAt : google.protobuf.Timestamp​

createdBy : string​

modifiedAt : google.protobuf.Timestamp​

done : bool​

metadata : google.protobuf.Any​

One of result​

error : google.rpc.Status​

response : google.protobuf.Any​

CreateJobRequest

`clusterId` : string

`name` : string

One of jobSpec

`mapreduceJob` : MapreduceJob

`sparkJob` : SparkJob

`pysparkJob` : PysparkJob

`hiveJob` : HiveJob

MapreduceJob

`args` : string

`jarFileUris` : string

`fileUris` : string

`archiveUris` : string

`properties` : string

One of driver

`mainJarFileUri` : string

`mainClass` : string

SparkJob

`args` : string

`jarFileUris` : string

`fileUris` : string

`archiveUris` : string

`properties` : string

`mainJarFileUri` : string

`mainClass` : string

`packages` : string

`repositories` : string

`excludePackages` : string

PysparkJob

`args` : string

`jarFileUris` : string

`fileUris` : string

`archiveUris` : string

`properties` : string

`mainPythonFileUri` : string

`pythonFileUris` : string

`packages` : string

`repositories` : string

`excludePackages` : string

HiveJob

`properties` : string

`continueOnFailure` : bool

`scriptVariables` : string

`jarFileUris` : string

One of queryType

`queryFileUri` : string

`queryList` : QueryList

QueryList

`queries` : string

Operation

`id` : string

`description` : string

`createdAt` : google.protobuf.Timestamp

`createdBy` : string

`modifiedAt` : google.protobuf.Timestamp

`done` : bool

`metadata` : google.protobuf.Any

One of result

`error` : google.rpc.Status

`response` : google.protobuf.Any