Connections
azureBlobStorage
Commentary
added in 0.6.1
Connects to Azure Blob storage.
Credentials are read through access keys embedded in a connectionString
. 1 Consider using env
to avoid putting credentials into your configuration file.
The target container must exist prior to writing data.
A new blob will be created following the default batch rate, which can be overriden by time, elements, or serialized bytes. 2
Blobs are created with the key name <key-prefix>-<ulid>.<file-suffix>
, where ulid
is a monotically increasing ULID. This means all blobs in the container are sortable by key name.
You can choose from a range of serialization formats and compression types 3.
Examples
Configuring the connection
Always set a connectionString
to target the right container.
{
"connections": {
"azure": {
"kind": "azureBlobStorage",
"connectionConfigs": {
"connectionString": "xxxxxx"
}
}
}
}
Set the batch rate
By default, a new blob will be created every 500
ms or 5000
elements, whichever happens first. You can also optionally create a new blob after a certain amount of serialized bytes have been accumulated.
To override these:
- use
lingerMs
to set the limit on time - use
batchElements
to set it on number of events - use
batchBytes
to set it on size
{
"connections": {
"azure": {
"kind": "azureBlobStorage",
"connectionConfigs": {
"connectionString": "xxxxxx"
},
"batchConfigs": {
"lingerMs": 2000,
"batchElements": 10000,
"batchBytes": 5242880
}
}
}
}
Set blob content
Use container
to set the container, containerConfigs
to set the blob format, and data
to set the content.
{
"generators": [
{
"container": "sandbox",
"containerConfigs": {
"keyPrefix": "foo-",
"format": "jsonl"
},
"data": {
"a": {
"_gen": "uuid"
},
"b": {
"_gen": "boolean"
}
},
"localConfigs": {
"throttleMs": 200
}
}
],
"connections": {
"azure": {
"kind": "azureBlobStorage",
"connectionConfigs": {
"connectionString": "xxxxxx"
}
}
}
}
Set the key and format
format
can be any of json
, jsonl
, and parquet
. Additionally, pretty
set to true
will cause json
to pretty print.
compression
can optionally be set to gzip
.
{
"generators": [
{
"bucket": "sandbox",
"bucketConfigs": {
"keyPrefix": "foo-",
"format": "json",
"pretty": true,
"compression": "gzip"
},
"data": {
"a": {
"_gen": "boolean"
}
}
}
]
}
Specification
Connection JSON schema
{
"type": "object",
"properties": {
"kind": {
"type": "string",
"const": "azureBlobStorage"
},
"batchConfigs": {
"type": "object",
"properties": {
"lingerMs": {
"type": "integer",
"minimum": 0
},
"batchElements": {
"type": "integer",
"minimum": 1
},
"batchBytes": {
"type": "integer",
"minimum": 1
}
}
},
"connectionConfigs": {
"type": "object",
"properties": {
"connectionString": {
"type": "string"
}
},
"required": [
"connectionString"
]
}
},
"required": [
"connectionConfigs"
]
}
Generator JSON schema
{
"type": "object",
"properties": {
"connection": {
"type": "string"
},
"name": {
"type": "string"
},
"container": {
"type": "string"
},
"data": {
"type": "object"
},
"localConfigs": {
"type": "object",
"properties": {
"throttleMs": {
"oneOf": [
{
"type": "number",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
},
"maxEvents": {
"oneOf": [
{
"type": "integer",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
},
"kafkaKeyProtobufHint": {
"type": "object",
"properties": {
"schemaFile": {
"type": "string"
},
"message": {
"type": "string"
}
},
"required": [
"schemaFile",
"message"
]
},
"jsonSchemaHint": {
"type": "object"
},
"maxBytes": {
"type": "integer",
"minimum": 1
},
"discard": {
"type": "object",
"properties": {
"rate": {
"type": "number",
"minimum": 0,
"maximum": 1
}
},
"required": [
"rate"
]
},
"repeat": {
"type": "object",
"properties": {
"rate": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"times": {
"oneOf": [
{
"type": "integer",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
}
},
"required": [
"rate",
"times"
]
},
"protobufSchemaHint": {
"type": "object",
"patternProperties": {
"^.*$": {
"type": "object",
"properties": {
"schemaFile": {
"type": "string"
},
"message": {
"type": "string"
}
},
"required": [
"schemaFile",
"message"
]
}
}
},
"maxHistoryEvents": {
"type": "integer",
"minimum": 0
},
"maxMs": {
"type": "integer",
"minimum": 0
},
"time": {
"type": "integer"
},
"events": {
"type": "object",
"properties": {
"exactly": {
"oneOf": [
{
"type": "integer",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
}
}
},
"delay": {
"type": "object",
"properties": {
"rate": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"ms": {
"oneOf": [
{
"type": "integer",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
}
},
"required": [
"rate",
"ms"
]
},
"history": {
"type": "object",
"properties": {
"events": {
"type": "object",
"properties": {
"max": {
"type": "integer",
"minimum": 0
}
}
}
}
},
"avroSchemaHint": {
"type": "object"
},
"throttle": {
"type": "object",
"properties": {
"ms": {
"oneOf": [
{
"type": "number",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
}
}
},
"throughput": {
"oneOf": [
{
"type": "integer",
"minimum": 1
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
},
"timeMultiplier": {
"oneOf": [
{
"type": "number"
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
},
"kafkaValueProtobufHint": {
"type": "object",
"properties": {
"schemaFile": {
"type": "string"
},
"message": {
"type": "string"
}
},
"required": [
"schemaFile",
"message"
]
}
}
},
"containerConfigs": {
"type": "object",
"properties": {
"keyPrefix": {
"oneOf": [
{
"type": "string"
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
},
"format": {
"type": "string",
"enum": [
"json",
"jsonl",
"parquet"
]
},
"pretty": {
"type": "boolean"
},
"compression": {
"type": "string",
"enum": [
"gzip"
]
}
},
"required": [
"keyPrefix",
"format"
]
}
},
"required": [
"container",
"data",
"containerConfigs"
]
}