Create
Creates a cluster in the specified folder.
- TypeScript
- Python
import {
cloudApi,
decodeMessage,
serviceClients,
Session,
waitForOperation,
} from "@yandex-cloud/nodejs-sdk";
const Cluster = cloudApi.dataproc.cluster.Cluster;
const CreateClusterRequest =
cloudApi.dataproc.cluster_service.CreateClusterRequest;
const HadoopConfig_Service = cloudApi.dataproc.cluster.HadoopConfig_Service;
const Role = cloudApi.dataproc.subcluster.Role;
(async () => {
const authToken = process.env["YC_OAUTH_TOKEN"];
const session = new Session({ oauthToken: authToken });
const client = session.client(serviceClients.ClusterServiceClient);
const operation = await client.create(
CreateClusterRequest.fromPartial({
folderId: "folderId",
// name: "name",
// description: "description",
// labels: {"key": "labels"},
configSpec: {
// versionId: "versionId",
// hadoop: {
// services: [HadoopConfig_Service.HDFS],
// properties: {"key": "properties"},
// sshPublicKeys: ["sshPublicKeys"],
// initializationActions: [{
// uri: "uri",
// args: ["args"],
// timeout: 0
// }]
// },
// subclustersSpec: [{
// name: "name",
// role: Role.MASTERNODE,
// resources: {
// resourcePresetId: "resourcePresetId",
// diskTypeId: "diskTypeId",
// diskSize: 0
// },
// subnetId: "subnetId",
// hostsCount: 0,
// assignPublicIp: true,
// autoscalingConfig: {
// maxHostsCount: 0,
// preemptible: true,
// measurementDuration: {
// seconds: 0,
// nanos: 0
// },
// warmupDuration: {
// seconds: 0,
// nanos: 0
// },
// stabilizationDuration: {
// seconds: 0,
// nanos: 0
// },
// cpuUtilizationTarget: 0,
// decommissionTimeout: 0
// }
// }]
},
zoneId: "zoneId",
serviceAccountId: "serviceAccountId",
// bucket: "bucket",
// uiProxy: true,
// securityGroupIds: ["securityGroupIds"],
// hostGroupIds: ["hostGroupIds"],
// deletionProtection: true,
// logGroupId: "logGroupId"
})
);
const finishedOp = await waitForOperation(operation, session);
if (finishedOp.response) {
const result = decodeMessage<typeof Cluster>(finishedOp.response);
console.log(result);
}
})();
import os
import grpc
import yandexcloud
from yandex.cloud.dataproc.v1.subcluster_pb2 import AutoscalingConfig
from yandex.cloud.dataproc.v1.cluster_pb2 import Cluster
from yandex.cloud.dataproc.v1.cluster_service_pb2_grpc import ClusterServiceStub
from yandex.cloud.dataproc.v1.cluster_service_pb2 import CreateClusterConfigSpec
from yandex.cloud.dataproc.v1.cluster_service_pb2 import CreateClusterMetadata
from yandex.cloud.dataproc.v1.cluster_service_pb2 import CreateClusterRequest
from yandex.cloud.dataproc.v1.cluster_service_pb2 import CreateSubclusterConfigSpec
from yandex.cloud.dataproc.v1.cluster_pb2 import HadoopConfig
from yandex.cloud.dataproc.v1.cluster_pb2 import InitializationAction
from yandex.cloud.dataproc.v1.common_pb2 import Resources
from yandex.cloud.dataproc.v1.subcluster_pb2 import Role
token = os.getenv("YC_OAUTH_TOKEN")
sdk = yandexcloud.SDK(token=token)
service = sdk.client(ClusterServiceStub)
operation = service.Create(
CreateClusterRequest(
folder_id="folderId",
# name = "name",
# description = "description",
# labels = {"key": "labels"},
config_spec=CreateClusterConfigSpec(
# version_id = "versionId",
# hadoop = HadoopConfig(
# services = [HadoopConfig.Service.HDFS],
# properties = {"key": "properties"},
# ssh_public_keys = ["sshPublicKeys"],
# initialization_actions = [InitializationAction(
# uri = "uri",
# args = ["args"],
# timeout = 0
# )]
# ),
# subclusters_spec = [CreateSubclusterConfigSpec(
# name = "name",
# role = Role.MASTERNODE,
# resources = Resources(
# resource_preset_id = "resourcePresetId",
# disk_type_id = "diskTypeId",
# disk_size = 0
# ),
# subnet_id = "subnetId",
# hosts_count = 0,
# assign_public_ip = true,
# autoscaling_config = AutoscalingConfig(
# max_hosts_count = 0,
# preemptible = true,
# measurement_duration = Duration(
# seconds = 0,
# nanos = 0
# ),
# warmup_duration = Duration(
# seconds = 0,
# nanos = 0
# ),
# stabilization_duration = Duration(
# seconds = 0,
# nanos = 0
# ),
# cpu_utilization_target = 0,
# decommission_timeout = 0
# )
# )]
),
zone_id="zoneId",
service_account_id="serviceAccountId",
# bucket = "bucket",
# ui_proxy = true,
# security_group_ids = ["securityGroupIds"],
# host_group_ids = ["hostGroupIds"],
# deletion_protection = true,
# log_group_id = "logGroupId"
)
)
operation_result = sdk.wait_operation_and_get_result(
operation,
response_type=Cluster,
meta_type=CreateClusterMetadata,
)
print(operation_result)
CreateClusterRequest
folderId
: string
ID of the folder to create a cluster in.
To get a folder ID make a yandex.cloud.resourcemanager.v1.FolderService.List request.
name
: string
Name of the cluster. The name must be unique within the folder. The name can't be changed after the Data Proc cluster is created.
description
: string
Description of the cluster.
labels
: string
Cluster labels as key:value
pairs.
configSpec
: CreateClusterConfigSpec
Configuration and resources for hosts that should be created with the cluster.
zoneId
: string
ID of the availability zone where the cluster should be placed.
To get the list of available zones make a yandex.cloud.compute.v1.ZoneService.List request.
serviceAccountId
: string
ID of the service account to be used by the Data Proc manager agent.
bucket
: string
Name of the Object Storage bucket to use for Data Proc jobs.
uiProxy
: bool
Enable UI Proxy feature.
securityGroupIds
: string
User security groups.
hostGroupIds
: string
Host groups to place VMs of cluster on.
deletionProtection
: bool
Deletion Protection inhibits deletion of the cluster
logGroupId
: string
ID of the cloud logging log group to write logs. If not set, logs will not be sent to logging service
CreateClusterConfigSpec
versionId
: string
Version of the image for cluster provisioning.
All available versions are listed in the documentation.
hadoop
: HadoopConfig
Data Proc specific options.
subclustersSpec
: CreateSubclusterConfigSpec
Specification for creating subclusters.
HadoopConfig
Hadoop configuration that describes services installed in a cluster, their properties and settings.
Service
SERVICE_UNSPECIFIED
HDFS
YARN
MAPREDUCE
HIVE
TEZ
ZOOKEEPER
HBASE
SQOOP
FLUME
SPARK
ZEPPELIN
OOZIE
LIVY
services
: Service
Set of services used in the cluster (if empty, the default set is used).
properties
: string
Properties set for all hosts in *-site.xml
configurations. The key should indicate
the service and the property.
For example, use the key 'hdfs:dfs.replication' to set the dfs.replication
property
in the file /etc/hadoop/conf/hdfs-site.xml
.
sshPublicKeys
: string
List of public SSH keys to access to cluster hosts.
initializationActions
: InitializationAction
Set of init-actions
CreateSubclusterConfigSpec
name
: string
Name of the subcluster.
role
: Role
Role of the subcluster in the Data Proc cluster.
resources
: Resources
Resource configuration for hosts in the subcluster.
subnetId
: string
ID of the VPC subnet used for hosts in the subcluster.
hostsCount
: int64
Number of hosts in the subcluster.
assignPublicIp
: bool
Assign public ip addresses for all hosts in subcluter.
autoscalingConfig
: AutoscalingConfig
Configuration for instance group based subclusters
InitializationAction
uri
: string
URI of the executable file
args
: string
Arguments to the initialization action
timeout
: int64
Execution timeout
Resources
resourcePresetId
: string
ID of the resource preset for computational resources available to a host (CPU, memory etc.). All available presets are listed in the documentation.
diskTypeId
: string
Type of the storage environment for the host. Possible values:
- network-hdd - network HDD drive,
- network-ssd - network SSD drive.
diskSize
: int64
Volume of the storage available to a host, in bytes.
AutoscalingConfig
maxHostsCount
: int64
Upper limit for total instance subcluster count.
preemptible
: bool
Preemptible instances are stopped at least once every 24 hours, and can be stopped at any time if their resources are needed by Compute. For more information, see Preemptible Virtual Machines.
measurementDuration
: google.protobuf.Duration
Time in seconds allotted for averaging metrics.
warmupDuration
: google.protobuf.Duration
The warmup time of the instance in seconds. During this time, traffic is sent to the instance, but instance metrics are not collected.
stabilizationDuration
: google.protobuf.Duration
Minimum amount of time in seconds allotted for monitoring before Instance Groups can reduce the number of instances in the group. During this time, the group size doesn't decrease, even if the new metric values indicate that it should.
cpuUtilizationTarget
: double
Defines an autoscaling rule based on the average CPU utilization of the instance group.
decommissionTimeout
: int64
Timeout to gracefully decommission nodes during downscaling. In seconds. Default value: 120
Operation
An Operation resource. For more information, see Operation.
id
: string
ID of the operation.
description
: string
Description of the operation. 0-256 characters long.
createdAt
: google.protobuf.Timestamp
Creation timestamp.
createdBy
: string
ID of the user or service account who initiated the operation.
modifiedAt
: google.protobuf.Timestamp
The time when the Operation resource was last modified.
done
: bool
If the value is false
, it means the operation is still in progress.
If true
, the operation is completed, and either error
or response
is available.
metadata
: google.protobuf.Any
Service-specific metadata associated with the operation. It typically contains the ID of the target resource that the operation is performed on. Any method that returns a long-running operation should document the metadata type, if any.
One of result
The operation result.
If done == false
and there was no failure detected, neither error
nor response
is set.
If done == false
and there was a failure detected, error
is set.
If done == true
, exactly one of error
or response
is set.
error
: google.rpc.StatusThe error result of the operation in case of failure or cancellation.
response
: google.protobuf.AnyThe normal response of the operation in case of success.
If the original method returns no data on success, such as Delete, the response is google.protobuf.Empty. If the original method is the standard Create/Update, the response should be the target resource of the operation. Any method that returns a long-running operation should document the response type, if any.