Create
Creates a new Apache Kafka® cluster in the specified folder.
- TypeScript
- Python
import {
cloudApi,
decodeMessage,
serviceClients,
Session,
waitForOperation,
} from "@yandex-cloud/nodejs-sdk";
const Cluster = cloudApi.dataproc.cluster.Cluster;
const CreateClusterRequest =
cloudApi.dataproc.cluster_service.CreateClusterRequest;
const HadoopConfig_Service = cloudApi.dataproc.cluster.HadoopConfig_Service;
const Role = cloudApi.dataproc.subcluster.Role;
(async () => {
const authToken = process.env["YC_OAUTH_TOKEN"];
const session = new Session({ oauthToken: authToken });
const client = session.client(serviceClients.ClusterServiceClient);
const operation = await client.create(
CreateClusterRequest.fromPartial({
folderId: "folderId",
// name: "name",
// description: "description",
// labels: {"key": "labels"},
configSpec: {
// versionId: "versionId",
// hadoop: {
// services: [HadoopConfig_Service.HDFS],
// properties: {"key": "properties"},
// sshPublicKeys: ["sshPublicKeys"],
// initializationActions: [{
// uri: "uri",
// args: ["args"],
// timeout: 0
// }]
// },
// subclustersSpec: [{
// name: "name",
// role: Role.MASTERNODE,
// resources: {
// resourcePresetId: "resourcePresetId",
// diskTypeId: "diskTypeId",
// diskSize: 0
// },
// subnetId: "subnetId",
// hostsCount: 0,
// assignPublicIp: true,
// autoscalingConfig: {
// maxHostsCount: 0,
// preemptible: true,
// measurementDuration: {
// seconds: 0,
// nanos: 0
// },
// warmupDuration: {
// seconds: 0,
// nanos: 0
// },
// stabilizationDuration: {
// seconds: 0,
// nanos: 0
// },
// cpuUtilizationTarget: 0,
// decommissionTimeout: 0
// }
// }]
},
zoneId: "zoneId",
serviceAccountId: "serviceAccountId",
// bucket: "bucket",
// uiProxy: true,
// securityGroupIds: ["securityGroupIds"],
// hostGroupIds: ["hostGroupIds"],
// deletionProtection: true,
// logGroupId: "logGroupId"
})
);
const finishedOp = await waitForOperation(operation, session);
if (finishedOp.response) {
const result = decodeMessage<typeof Cluster>(finishedOp.response);
console.log(result);
}
})();
import os
import grpc
import yandexcloud
from yandex.cloud.dataproc.v1.subcluster_pb2 import AutoscalingConfig
from yandex.cloud.dataproc.v1.cluster_pb2 import Cluster
from yandex.cloud.dataproc.v1.cluster_service_pb2_grpc import ClusterServiceStub
from yandex.cloud.dataproc.v1.cluster_service_pb2 import CreateClusterConfigSpec
from yandex.cloud.dataproc.v1.cluster_service_pb2 import CreateClusterMetadata
from yandex.cloud.dataproc.v1.cluster_service_pb2 import CreateClusterRequest
from yandex.cloud.dataproc.v1.cluster_service_pb2 import CreateSubclusterConfigSpec
from yandex.cloud.dataproc.v1.cluster_pb2 import HadoopConfig
from yandex.cloud.dataproc.v1.cluster_pb2 import InitializationAction
from yandex.cloud.dataproc.v1.common_pb2 import Resources
from yandex.cloud.dataproc.v1.subcluster_pb2 import Role
token = os.getenv("YC_OAUTH_TOKEN")
sdk = yandexcloud.SDK(token=token)
service = sdk.client(ClusterServiceStub)
operation = service.Create(
CreateClusterRequest(
folder_id="folderId",
# name = "name",
# description = "description",
# labels = {"key": "labels"},
config_spec=CreateClusterConfigSpec(
# version_id = "versionId",
# hadoop = HadoopConfig(
# services = [HadoopConfig.Service.HDFS],
# properties = {"key": "properties"},
# ssh_public_keys = ["sshPublicKeys"],
# initialization_actions = [InitializationAction(
# uri = "uri",
# args = ["args"],
# timeout = 0
# )]
# ),
# subclusters_spec = [CreateSubclusterConfigSpec(
# name = "name",
# role = Role.MASTERNODE,
# resources = Resources(
# resource_preset_id = "resourcePresetId",
# disk_type_id = "diskTypeId",
# disk_size = 0
# ),
# subnet_id = "subnetId",
# hosts_count = 0,
# assign_public_ip = true,
# autoscaling_config = AutoscalingConfig(
# max_hosts_count = 0,
# preemptible = true,
# measurement_duration = Duration(
# seconds = 0,
# nanos = 0
# ),
# warmup_duration = Duration(
# seconds = 0,
# nanos = 0
# ),
# stabilization_duration = Duration(
# seconds = 0,
# nanos = 0
# ),
# cpu_utilization_target = 0,
# decommission_timeout = 0
# )
# )]
),
zone_id="zoneId",
service_account_id="serviceAccountId",
# bucket = "bucket",
# ui_proxy = true,
# security_group_ids = ["securityGroupIds"],
# host_group_ids = ["hostGroupIds"],
# deletion_protection = true,
# log_group_id = "logGroupId"
)
)
operation_result = sdk.wait_operation_and_get_result(
operation,
response_type=Cluster,
meta_type=CreateClusterMetadata,
)
print(operation_result)
CreateClusterRequest
folderId
: string
ID of the folder to create the Apache Kafka® cluster in.
To get the folder ID, make a yandex.cloud.resourcemanager.v1.FolderService.List request.
name
: string
Name of the Apache Kafka® cluster. The name must be unique within the folder.
description
: string
Description of the Apache Kafka® cluster.
labels
: string
Custom labels for the Apache Kafka® cluster as key:value
pairs.
For example, "project": "mvp" or "source": "dictionary".
environment
: Cluster.Environment
Deployment environment of the Apache Kafka® cluster.
configSpec
: ConfigSpec
Kafka and hosts configuration the Apache Kafka® cluster.
topicSpecs
: TopicSpec
One or more configurations of topics to be created in the Apache Kafka® cluster.
userSpecs
: UserSpec
Configurations of accounts to be created in the Apache Kafka® cluster.
networkId
: string
ID of the network to create the Apache Kafka® cluster in.
subnetId
: string
IDs of subnets to create brokers in.
securityGroupIds
: string
User security groups
hostGroupIds
: string
Host groups to place VMs of cluster on.
deletionProtection
: bool
Deletion Protection inhibits deletion of the cluster
maintenanceWindow
: MaintenanceWindow
Window of maintenance operations.
ConfigSpec
Kafka
resources
: Resources
Resources allocated to Kafka brokers.
One of kafkaConfig
Kafka broker configuration.
kafkaConfig_2_8
: KafkaConfig2_8
kafkaConfig_3
: KafkaConfig3
Zookeeper
resources
: Resources
Resources allocated to ZooKeeper hosts.
KRaft
resources
: Resources
Resources allocated to KRaft controller hosts.
RestAPIConfig
enabled
: bool
Is REST API enabled for this cluster.
version
: string
Version of Apache Kafka® used in the cluster. Possible values: 2.8
, 3.0
, 3.1
, 3.2
, 3.3
, 3.4
, 3.5
, 3.6
.
kafka
: Kafka
Configuration and resource allocation for Kafka brokers.
zookeeper
: Zookeeper
Configuration and resource allocation for ZooKeeper hosts.
zoneId
: string
IDs of availability zones where Kafka brokers reside.
brokersCount
: google.protobuf.Int64Value
The number of Kafka brokers deployed in each availability zone.
assignPublicIp
: bool
The flag that defines whether a public IP address is assigned to the cluster.
If the value is true
, then Apache Kafka® cluster is available on the Internet via it's public IP address.
unmanagedTopics
: bool
Allows to manage topics via AdminAPI Deprecated. Feature enabled permanently.
schemaRegistry
: bool
Enables managed schema registry on cluster
access
: Access
Access policy for external services.
restApiConfig
: RestAPIConfig
Configuration of REST API.
diskSizeAutoscaling
: DiskSizeAutoscaling
DiskSizeAutoscaling settings
kraft
: KRaft
Configuration and resource allocation for KRaft-controller hosts.
TopicSpec
name
: string
Name of the topic.
partitions
: google.protobuf.Int64Value
The number of the topic's partitions.
replicationFactor
: google.protobuf.Int64Value
Amount of copies of a topic data kept in the cluster.
One of topicConfig
User-defined settings for the topic.
topicConfig_2_8
: TopicConfig2_8
topicConfig_3
: TopicConfig3
UserSpec
name
: string
Name of the Kafka user.
password
: string
Password of the Kafka user.
permissions
: Permission
Set of permissions granted to the user.
MaintenanceWindow
One of policy
anytime
: AnytimeMaintenanceWindow
weeklyMaintenanceWindow
: WeeklyMaintenanceWindow
Resources
resourcePresetId
: string
ID of the preset for computational resources available to a host (CPU, memory, etc.). All available presets are listed in the documentation.
diskSize
: int64
Volume of the storage available to a host, in bytes. Must be greater than 2 partition segment size in bytes partitions count, so each partition can have one active segment file and one closed segment file that can be deleted.
diskTypeId
: string
Type of the storage environment for the host.
KafkaConfig2_8
Kafka version 2.8 broker configuration.
compressionType
: CompressionType
Cluster topics compression type.
logFlushIntervalMessages
: google.protobuf.Int64Value
The number of messages accumulated on a log partition before messages are flushed to disk.
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig2_8.flush_messages setting.
logFlushIntervalMs
: google.protobuf.Int64Value
The maximum time (in milliseconds) that a message in any topic is kept in memory before flushed to disk. If not set, the value of log_flush_scheduler_interval_ms is used.
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig2_8.flush_ms setting.
logFlushSchedulerIntervalMs
: google.protobuf.Int64Value
The frequency of checks (in milliseconds) for any logs that need to be flushed to disk. This check is done by the log flusher.
logRetentionBytes
: google.protobuf.Int64Value
Partition size limit; Kafka will discard old log segments to free up space if delete
TopicConfig2_8.cleanup_policy is in effect.
This setting is helpful if you need to control the size of a log due to limited disk space.
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig2_8.retention_bytes setting.
logRetentionHours
: google.protobuf.Int64Value
The number of hours to keep a log segment file before deleting it.
logRetentionMinutes
: google.protobuf.Int64Value
The number of minutes to keep a log segment file before deleting it.
If not set, the value of log_retention_hours is used.
logRetentionMs
: google.protobuf.Int64Value
The number of milliseconds to keep a log segment file before deleting it.
If not set, the value of log_retention_minutes is used.
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig2_8.retention_ms setting.
logSegmentBytes
: google.protobuf.Int64Value
The maximum size of a single log file.
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig2_8.segment_bytes setting.
logPreallocate
: google.protobuf.BoolValue
Should pre allocate file when create new segment?
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig2_8.preallocate setting.
socketSendBufferBytes
: google.protobuf.Int64Value
The SO_SNDBUF buffer of the socket server sockets. If the value is -1, the OS default will be used.
socketReceiveBufferBytes
: google.protobuf.Int64Value
The SO_RCVBUF buffer of the socket server sockets. If the value is -1, the OS default will be used.
autoCreateTopicsEnable
: google.protobuf.BoolValue
Enable auto creation of topic on the server
numPartitions
: google.protobuf.Int64Value
Default number of partitions per topic on the whole cluster
defaultReplicationFactor
: google.protobuf.Int64Value
Default replication factor of the topic on the whole cluster
messageMaxBytes
: google.protobuf.Int64Value
The largest record batch size allowed by Kafka. Default value: 1048588.
replicaFetchMaxBytes
: google.protobuf.Int64Value
The number of bytes of messages to attempt to fetch for each partition. Default value: 1048576.
sslCipherSuites
: string
A list of cipher suites.
offsetsRetentionMinutes
: google.protobuf.Int64Value
Offset storage time after a consumer group loses all its consumers. Default: 10080.
saslEnabledMechanisms
: SaslMechanism
The list of SASL mechanisms enabled in the Kafka server. Default: SCRAM_SHA_512.
KafkaConfig3
Kafka version 3.x broker configuration.
compressionType
: CompressionType
Cluster topics compression type.
logFlushIntervalMessages
: google.protobuf.Int64Value
The number of messages accumulated on a log partition before messages are flushed to disk.
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig3.flush_messages setting.
logFlushIntervalMs
: google.protobuf.Int64Value
The maximum time (in milliseconds) that a message in any topic is kept in memory before flushed to disk. If not set, the value of log_flush_scheduler_interval_ms is used.
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig3.flush_ms setting.
logFlushSchedulerIntervalMs
: google.protobuf.Int64Value
The frequency of checks (in milliseconds) for any logs that need to be flushed to disk. This check is done by the log flusher.
logRetentionBytes
: google.protobuf.Int64Value
Partition size limit; Kafka will discard old log segments to free up space if delete
TopicConfig3.cleanup_policy is in effect.
This setting is helpful if you need to control the size of a log due to limited disk space.
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig3.retention_bytes setting.
logRetentionHours
: google.protobuf.Int64Value
The number of hours to keep a log segment file before deleting it.
logRetentionMinutes
: google.protobuf.Int64Value
The number of minutes to keep a log segment file before deleting it.
If not set, the value of log_retention_hours is used.
logRetentionMs
: google.protobuf.Int64Value
The number of milliseconds to keep a log segment file before deleting it.
If not set, the value of log_retention_minutes is used.
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig3.retention_ms setting.
logSegmentBytes
: google.protobuf.Int64Value
The maximum size of a single log file.
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig3.segment_bytes setting.
logPreallocate
: google.protobuf.BoolValue
Should pre allocate file when create new segment?
This is the global cluster-level setting that can be overridden on a topic level by using the TopicConfig3.preallocate setting.
socketSendBufferBytes
: google.protobuf.Int64Value
The SO_SNDBUF buffer of the socket server sockets. If the value is -1, the OS default will be used.
socketReceiveBufferBytes
: google.protobuf.Int64Value
The SO_RCVBUF buffer of the socket server sockets. If the value is -1, the OS default will be used.
autoCreateTopicsEnable
: google.protobuf.BoolValue
Enable auto creation of topic on the server
numPartitions
: google.protobuf.Int64Value
Default number of partitions per topic on the whole cluster
defaultReplicationFactor
: google.protobuf.Int64Value
Default replication factor of the topic on the whole cluster
messageMaxBytes
: google.protobuf.Int64Value
The largest record batch size allowed by Kafka. Default value: 1048588.
replicaFetchMaxBytes
: google.protobuf.Int64Value
The number of bytes of messages to attempt to fetch for each partition. Default value: 1048576.
sslCipherSuites
: string
A list of cipher suites.
offsetsRetentionMinutes
: google.protobuf.Int64Value
Offset storage time after a consumer group loses all its consumers. Default: 10080.
saslEnabledMechanisms
: SaslMechanism
The list of SASL mechanisms enabled in the Kafka server. Default: SCRAM_SHA_512.
Kafka
resources
: Resources
Resources allocated to Kafka brokers.
kafkaConfig_2_8
: KafkaConfig2_8
kafkaConfig_3
: KafkaConfig3
Zookeeper
resources
: Resources
Resources allocated to ZooKeeper hosts.
Access
dataTransfer
: bool
Allow access for DataTransfer.
RestAPIConfig
enabled
: bool
Is REST API enabled for this cluster.
DiskSizeAutoscaling
plannedUsageThreshold
: int64
Threshold of storage usage (in percent) that triggers automatic scaling of the storage during the maintenance window. Zero value means disabled threshold.
emergencyUsageThreshold
: int64
Threshold of storage usage (in percent) that triggers immediate automatic scaling of the storage. Zero value means disabled threshold.
diskSizeLimit
: int64
New storage size (in bytes) that is set when one of the thresholds is achieved.
KRaft
resources
: Resources
Resources allocated to KRaft controller hosts.
TopicConfig2_8
A topic settings for 2.8
CleanupPolicy
CLEANUP_POLICY_UNSPECIFIED
CLEANUP_POLICY_DELETE
This policy discards log segments when either their retention time or log size limit is reached. See also: [KafkaConfig2_8.log_retention_ms][24] and other similar parameters.
CLEANUP_POLICY_COMPACT
This policy compacts messages in log.
CLEANUP_POLICY_COMPACT_AND_DELETE
This policy use both compaction and deletion for messages and log segments.
cleanupPolicy
: CleanupPolicy
Retention policy to use on old log messages.
compressionType
: CompressionType
The compression type for a given topic.
deleteRetentionMs
: google.protobuf.Int64Value
The amount of time in milliseconds to retain delete tombstone markers for log compacted topics.
fileDeleteDelayMs
: google.protobuf.Int64Value
The time to wait before deleting a file from the filesystem.
flushMessages
: google.protobuf.Int64Value
The number of messages accumulated on a log partition before messages are flushed to disk.
This setting overrides the cluster-level KafkaConfig2_8.log_flush_interval_messages setting on the topic level.
flushMs
: google.protobuf.Int64Value
The maximum time in milliseconds that a message in the topic is kept in memory before flushed to disk.
This setting overrides the cluster-level KafkaConfig2_8.log_flush_interval_ms setting on the topic level.
minCompactionLagMs
: google.protobuf.Int64Value
The minimum time in milliseconds a message will remain uncompacted in the log.
retentionBytes
: google.protobuf.Int64Value
The maximum size a partition can grow to before Kafka will discard old log segments to free up space if the delete
cleanup_policy is in effect.
It is helpful if you need to control the size of log due to limited disk space.
This setting overrides the cluster-level KafkaConfig2_8.log_retention_bytes setting on the topic level.
retentionMs
: google.protobuf.Int64Value
The number of milliseconds to keep a log segment's file before deleting it.
This setting overrides the cluster-level KafkaConfig2_8.log_retention_ms setting on the topic level.
maxMessageBytes
: google.protobuf.Int64Value
The largest record batch size allowed in topic.
minInsyncReplicas
: google.protobuf.Int64Value
This configuration specifies the minimum number of replicas that must acknowledge a write to topic for the write to be considered successful (when a producer sets acks to "all").
segmentBytes
: google.protobuf.Int64Value
This configuration controls the segment file size for the log. Retention and cleaning is always done a file at a time so a larger segment size means fewer files but less granular control over retention.
This setting overrides the cluster-level KafkaConfig2_8.log_segment_bytes setting on the topic level.
preallocate
: google.protobuf.BoolValue
True if we should preallocate the file on disk when creating a new log segment.
This setting overrides the cluster-level KafkaConfig2_8.log_preallocate setting on the topic level.
TopicConfig3
A topic settings for 3.x
CleanupPolicy
CLEANUP_POLICY_UNSPECIFIED
CLEANUP_POLICY_DELETE
This policy discards log segments when either their retention time or log size limit is reached. See also: [KafkaConfig3.log_retention_ms][32] and other similar parameters.
CLEANUP_POLICY_COMPACT
This policy compacts messages in log.
CLEANUP_POLICY_COMPACT_AND_DELETE
This policy use both compaction and deletion for messages and log segments.
cleanupPolicy
: CleanupPolicy
Retention policy to use on old log messages.
compressionType
: CompressionType
The compression type for a given topic.
deleteRetentionMs
: google.protobuf.Int64Value
The amount of time in milliseconds to retain delete tombstone markers for log compacted topics.
fileDeleteDelayMs
: google.protobuf.Int64Value
The time to wait before deleting a file from the filesystem.
flushMessages
: google.protobuf.Int64Value
The number of messages accumulated on a log partition before messages are flushed to disk.
This setting overrides the cluster-level KafkaConfig3.log_flush_interval_messages setting on the topic level.
flushMs
: google.protobuf.Int64Value
The maximum time in milliseconds that a message in the topic is kept in memory before flushed to disk.
This setting overrides the cluster-level KafkaConfig3.log_flush_interval_ms setting on the topic level.
minCompactionLagMs
: google.protobuf.Int64Value
The minimum time in milliseconds a message will remain uncompacted in the log.
retentionBytes
: google.protobuf.Int64Value
The maximum size a partition can grow to before Kafka will discard old log segments to free up space if the delete
cleanup_policy is in effect.
It is helpful if you need to control the size of log due to limited disk space.
This setting overrides the cluster-level KafkaConfig3.log_retention_bytes setting on the topic level.
retentionMs
: google.protobuf.Int64Value
The number of milliseconds to keep a log segment's file before deleting it.
This setting overrides the cluster-level KafkaConfig3.log_retention_ms setting on the topic level.
maxMessageBytes
: google.protobuf.Int64Value
The largest record batch size allowed in topic.
minInsyncReplicas
: google.protobuf.Int64Value
This configuration specifies the minimum number of replicas that must acknowledge a write to topic for the write to be considered successful (when a producer sets acks to "all").
segmentBytes
: google.protobuf.Int64Value
This configuration controls the segment file size for the log. Retention and cleaning is always done a file at a time so a larger segment size means fewer files but less granular control over retention.
This setting overrides the cluster-level KafkaConfig3.log_segment_bytes setting on the topic level.
preallocate
: google.protobuf.BoolValue
True if we should preallocate the file on disk when creating a new log segment.
This setting overrides the cluster-level KafkaConfig3.log_preallocate setting on the topic level.
Permission
AccessRole
ACCESS_ROLE_UNSPECIFIED
ACCESS_ROLE_PRODUCER
Producer role for the user.
ACCESS_ROLE_CONSUMER
Consumer role for the user.
ACCESS_ROLE_ADMIN
Admin role for the user.
topicName
: string
Name or prefix-pattern with wildcard for the topic that the permission grants access to.
To get the topic name, make a TopicService.List request.
role
: AccessRole
Access role type to grant to the user.
allowHosts
: string
Lists hosts allowed for this permission. Only ip-addresses allowed as value of single host. When not defined, access from any host is allowed.
Bare in mind that the same host might appear in multiple permissions at the same time, hence removing individual permission doesn't automatically restricts access from the allow_hosts of the permission. If the same host(s) is listed for another permission of the same principal/topic, the host(s) remains allowed.
AnytimeMaintenanceWindow
WeeklyMaintenanceWindow
WeekDay
WEEK_DAY_UNSPECIFIED
MON
TUE
WED
THU
FRI
SAT
SUN
day
: WeekDay
hour
: int64
Hour of the day in UTC.
Operation
An Operation resource. For more information, see Operation.
id
: string
ID of the operation.
description
: string
Description of the operation. 0-256 characters long.
createdAt
: google.protobuf.Timestamp
Creation timestamp.
createdBy
: string
ID of the user or service account who initiated the operation.
modifiedAt
: google.protobuf.Timestamp
The time when the Operation resource was last modified.
done
: bool
If the value is false
, it means the operation is still in progress.
If true
, the operation is completed, and either error
or response
is available.
metadata
: google.protobuf.Any
Service-specific metadata associated with the operation. It typically contains the ID of the target resource that the operation is performed on. Any method that returns a long-running operation should document the metadata type, if any.
One of result
The operation result.
If done == false
and there was no failure detected, neither error
nor response
is set.
If done == false
and there was a failure detected, error
is set.
If done == true
, exactly one of error
or response
is set.
error
: google.rpc.StatusThe error result of the operation in case of failure or cancellation.
response
: google.protobuf.AnyThe normal response of the operation in case of success.
If the original method returns no data on success, such as Delete, the response is google.protobuf.Empty. If the original method is the standard Create/Update, the response should be the target resource of the operation. Any method that returns a long-running operation should document the response type, if any.