Skip to main content

Connections

googleCloudStorage

Commentary

added in 0.9.0

Connects to Google Cloud Storage storage.

You must set the environment variable GOOGLE_APPLICATION_CREDENTIALS to the location of a credential configuration file within your container. You can set Docker environment variables with either -e or --env-file, similar to how the license environment variables are passed.

The target bucket must exist prior to writing data.

A new blob will be created every 500 ms or 5000 elements, whichever happens first. You can override object size/write timing with batchConfigs. 1

Blobs are created with the key name <blob-prefix>-<ulid>.<file-suffix>, where ulid is a monotically increasing ULID. This means all blobs in the bucket are sortable by key name.

You can also connect to GCS-compatible services like fake-gcs-server 2.


Examples

Configuring the connection

Set projectId to configure where the data is written.

{
"connections": {
"gcs": {
"kind": "googleCloudStorage",
"connectionConfigs": {
"projectId": "myProject"
}
}
}
}

Set the batch rate

Use lingerMs to control how long to wait before a new blob is written. Likewise, use batchElements to control how many events are generated before a new blob is written.

{
"connections": {
"gcs": {
"kind": "googleCloudStorage",
"connectionConfigs": {
"projectId": "myProject"
},
"batchConfigs": {
"lingerMs": 2000,
"batchElements": 10000
}
}
}
}

Set blob content

Use bucket to set the bucket, bucketConfigs to set the blob format, and data to set the content.

{
"generators": [
{
"bucket": "sandbox",
"bucketConfigs": {
"keyPrefix": "foo-",
"format": "jsonl"
},
"data": {
"a": {
"_gen": "uuid"
},
"b": {
"_gen": "boolean"
}
}
}
],
"connections": {
"gcs": {
"kind": "googleCloudStorage",
"connectionConfigs": {
"projectId": "myProject"
}
}
}
}

Overriding the host

Set host to override the endpoint, perhaps to use fake-gcs-server for local testing.

{
"connections": {
"gcs": {
"kind": "googleCloudStorage",
"connectionConfigs": {
"projectId": "myProject",
"host": "https://0.0.0.0:4443/storage/v1/b"
}
}
}
}

Serializing with Parquet

Set format to parquet. By default, ShadowTraffic will attempt to guess your Parquet schema. But if it can't, specify it manually with avroSchemaHint.

{
"generators": [
{
"bucket": "sandbox",
"bucketConfigs": {
"format": "parquet",
"blobPrefix": "part-"
},
"data": {
"luckyNumber": {
"_gen": "oneOf",
"choices": [
1,
2,
3
]
}
},
"localConfigs": {
"avroSchemaHint": {
"data": {
"type": "record",
"name": "MyRecordName",
"fields": [
{
"name": "luckyNumber",
"type": "int"
}
]
}
}
}
}
],
"connections": {
"gcs": {
"kind": "googleCloudStorage",
"connectionConfigs": {
"projectId": "myProject"
}
}
}
}

Specification

Connection JSON schema

{
"type": "object",
"properties": {
"kind": {
"type": "string",
"const": "googleCloudStorage"
},
"batchConfigs": {
"type": "object",
"properties": {
"lingerMs": {
"type": "integer",
"minimum": 0
},
"batchElements": {
"type": "integer",
"minimum": 1
}
}
},
"connectionConfigs": {
"type": "object",
"properties": {
"host": {
"type": "string"
},
"projectId": {
"type": "string"
}
},
"required": [
"projectId"
]
}
}
}

Generator JSON schema

{
"type": "object",
"properties": {
"connection": {
"type": "string"
},
"name": {
"type": "string"
},
"bucket": {
"type": "string"
},
"data": {
"type": "object"
},
"localConfigs": {
"type": "object",
"properties": {
"throttleMs": {
"oneOf": [
{
"type": "number",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
},
"maxEvents": {
"oneOf": [
{
"type": "integer",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
},
"kafkaKeyProtobufHint": {
"type": "object",
"properties": {
"schemaFile": {
"type": "string"
},
"message": {
"type": "string"
}
},
"required": [
"schemaFile",
"message"
]
},
"maxBytes": {
"type": "integer",
"minimum": 1
},
"discard": {
"type": "object",
"properties": {
"rate": {
"type": "number",
"minimum": 0,
"maximum": 1
}
},
"required": [
"rate"
]
},
"repeat": {
"type": "object",
"properties": {
"rate": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"times": {
"oneOf": [
{
"type": "integer",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
}
},
"required": [
"rate",
"times"
]
},
"maxHistoryEvents": {
"type": "integer",
"minimum": 0
},
"maxMs": {
"type": "integer",
"minimum": 0
},
"time": {
"type": "integer"
},
"events": {
"type": "object",
"properties": {
"exactly": {
"oneOf": [
{
"type": "integer",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
}
}
},
"delay": {
"type": "object",
"properties": {
"rate": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"ms": {
"oneOf": [
{
"type": "integer",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
}
},
"required": [
"rate",
"ms"
]
},
"history": {
"type": "object",
"properties": {
"events": {
"type": "object",
"properties": {
"max": {
"type": "integer",
"minimum": 0
}
}
}
}
},
"avroSchemaHint": {
"type": "object"
},
"throttle": {
"type": "object",
"properties": {
"ms": {
"oneOf": [
{
"type": "number",
"minimum": 0
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
}
}
},
"throughput": {
"oneOf": [
{
"type": "integer",
"minimum": 1
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
},
"timeMultiplier": {
"oneOf": [
{
"type": "number"
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
},
"kafkaValueProtobufHint": {
"type": "object",
"properties": {
"schemaFile": {
"type": "string"
},
"message": {
"type": "string"
}
},
"required": [
"schemaFile",
"message"
]
}
}
},
"bucketConfigs": {
"type": "object",
"properties": {
"blobPrefix": {
"oneOf": [
{
"type": "string"
},
{
"type": "object",
"properties": {
"_gen": {
"type": "string"
}
},
"required": [
"_gen"
]
}
]
},
"format": {
"type": "string",
"enum": [
"jsonl",
"parquet"
]
}
},
"required": [
"blobPrefix",
"format"
]
}
},
"required": [
"bucket",
"data",
"bucketConfigs"
]
}