展示一些完整的Benthos使用示例:
1.基础配置示例
YAML配置文件
yaml
# config.yaml input: gcp_pubsub: project: my-project subscription: my-subscription pipeline: processors: - bloblang: | # 转换数据 root = { "id": this.id, "processed_at": now(), "data": this.body.uppercase() } output: file: path: "./output/${! timestamp_unix() }.json" codec: lines2.CSV批量处理示例
yaml
# batch_csv.yaml input: file: paths: - ./data/*.csv codec: lines pipeline: processors: # 1. CSV解析 - parse_csv: schema: | [{"name":"id","type":"int"},{"name":"name","type":"string"},{"name":"value","type":"float"}] # 2. 批量聚合(每100条一批) - group_by_value: value: ${!batch_index()} period: 1s size: 100 # 3. 批量处理 - for_each: - bloblang: | root = this root.batch_id = uuid_v4() root.processed_at = now() output: sql_raw: driver: postgres dsn: postgres://user:pass@localhost:5432/db query: | INSERT INTO records (id, name, value, batch_id, processed_at) VALUES ($1, $2, $3, $4, $5) args_mapping: | [ this.id, this.name, this.value, this.batch_id, this.processed_at ]3.Go代码中嵌入Benthos
go
package main import ( "context" "fmt" "github.com/benthosdev/benthos/v4/public/service" _ "github.com/benthosdev/benthos/v4/public/components/all" // 导入所有组件 ) func main() { // 创建流 builder := service.NewStreamBuilder() // 配置输入 err := builder.SetYAML(` input: generate: mapping: | root.id = uuid_v4() root.timestamp = timestamp_unix() interval: 1s count: 100 pipeline: processors: - batch: count: 10 processors: - archive: format: lines - compress: algorithm: gzip output: http_client: url: http://localhost:8080/api/batch verb: POST headers: Content-Type: application/json `) if err != nil { panic(err) } // 添加自定义处理器 builder.AddProcessorFunc(func(ctx context.Context, m *service.Message) ([]*service.Message, error) { // 自定义处理逻辑 body, err := m.AsBytes() if err != nil { return nil, err } fmt.Printf("Processing: %s\n", string(body)) return []*service.Message{m}, nil }) // 运行流 stream, err := builder.Build() if err != nil { panic(err) } ctx := context.Background() if err := stream.Run(ctx); err != nil { panic(err) } }4.Kafka到Elasticsearch批量处理
yaml
# kafka_to_es.yaml input: kafka: addresses: - localhost:9092 topics: - logs consumer_group: batch-processor batching: count: 1000 # 每批1000条 period: 30s # 或每30秒 pipeline: threads: 4 # 并行处理 processors: # 批量处理器 - bloblang: | # 解析JSON日志 root = this.parse_json() root.processed = now() root.host = this.host.lowercase() # 批量过滤 - filter_parts: bloblang: | this.level != "DEBUG" # 过滤掉DEBUG日志 # 批量映射 - mapping: | root.index = "logs-${!timestamp_format(now(),"2006-01-02")}" root.id = this.id root.type = "_doc" root.body = this output: elasticsearch: urls: - http://localhost:9200 index: ${!json().index} id: ${!json().id} action: index max_in_flight: 10 # 并发数 batching: count: 500 # 输出批次大小 period: 10s check: | root = this.index.suffix("-bulk") != ""5.HTTP API批处理服务
go
// main.go package main import ( "context" "net/http" "github.com/benthosdev/benthos/v4/public/service" _ "github.com/benthosdev/benthos/v4/public/components/io" _ "github.com/benthosdev/benthos/v4/public/components/prometheus" ) func main() { // 创建HTTP输入和输出的流 httpStream := ` http: address: 0.0.0.0:8080 path: /ingest allowed_verbs: [POST] pipeline: processors: - batch: count: 50 period: 10s processors: - try: - bloblang: | root.items = this.map_each(item -> { "id": item.id, "processed": now(), "status": "pending" }) - catch: - bloblang: | root.error = "batch processing failed" root.items = [] - split: # 拆分为单个项目处理 - bloblang: | root = this root.batch_size = batch_size() output: broker: pattern: fan_out outputs: - redis_streams: url: tcp://localhost:6379 stream: processed_items body_key: "" max_length: 1000 - http_client: url: http://monitor:9090/metrics verb: POST headers: Content-Type: application/json batching: count: 100 - prometheus: metric_mapping: | batch_size: type: counter path: batch_size labels: source: http_ingest ` builder := service.NewStreamBuilder() // 从YAML配置 if err := builder.SetYAML(httpStream); err != nil { panic(err) } // 添加监控端点 builder.SetHTTPMux("/metrics", func(w http.ResponseWriter, r *http.Request) { // 自定义监控 w.WriteHeader(http.StatusOK) w.Write([]byte(`{"status":"healthy"}`)) }) stream, err := builder.Build() if err != nil { panic(err) } ctx := context.Background() if err := stream.Run(ctx); err != nil { panic(err) } }6.错误处理和重试
yaml
# error_handling.yaml input: kafka: addresses: [localhost:9092] topics: [orders] batching: count: 100 pipeline: processors: - try: - bloblang: | root = this.parse_json().catch(this) - cache: resource: redis_cache operator: set key: "${!this.order_id}" value: "${!content()}" - catch: - bloblang: | root.error = error() root.original = content() root.retry_count = meta("retry_count").or(0) + 1 - retry: max_retries: 3 backoff: initial_interval: 1s max_interval: 30s output: broker: pattern: try outputs: # 主要输出 - sql_raw: driver: postgres dsn: postgres://user:pass@localhost/db query: "INSERT INTO orders VALUES ($1, $2, $3)" args_mapping: "[this.id, this.amount, this.timestamp]" # 失败时输出到死信队列 - drop_on: ${!error()} - kafka: addresses: [localhost:9092] topic: dead_letter_queue key: "${!meta(\"kafka_key\")}"7.动态批处理配置
yaml
# dynamic_batching.yaml input: generate: count: 1000 interval: 100ms mapping: 'root = {"value": random_int()}' pipeline: processors: - batch: # 动态批次大小 count: ${!meta("batch_size").or(50)} processors: - bloblang: | root.items = this root.batch_id = uuid_v4() root.total = this.sum(item -> item.value) # 基于内容调整批次大小 - branch: processors: - bloblang: | new_size = if this.total > 1000 { 20 # 总值大时用小批次 } else { 100 # 总值小时用大批次 } meta_set("batch_size", new_size) result_map: 'root = deleted()' # 不修改内容 output: http_client: url: http://api:8080/process-batch verb: POST8.运行和监控
bash
# 1. 安装Benthos go install github.com/benthosdev/benthos/v4/cmd/benthos@latest # 2. 运行配置文件 benthos -c config.yaml # 3. 动态重载配置 benthos -c config.yaml --reload # 4. 测试配置 benthos lint config.yaml benthos test config.yaml # 5. 查看指标 curl http://localhost:4195/stats
9.实用技巧
go
// 自定义批处理插件 func init() { service.RegisterProcessor( "batch_transform", service.NewConfigSpec(). Field(service.NewStringField("prefix")), func(conf *service.ParsedConfig, mgr *service.Resources) ( service.Processor, error, ) { prefix, err := conf.FieldString("prefix") if err != nil { return nil, err } return service.NewBatchProcessorFunc(func(ctx context.Context, batch service.MessageBatch) ([]service.MessageBatch, error) { for _, msg := range batch { msg.SetStructured(map[string]interface{}{ "prefixed": prefix + msg.AsString(), }) } return []service.MessageBatch{batch}, nil }), nil }, ) }这些示例展示了Benthos在批处理场景下的灵活性和强大功能。你可以根据实际需求组合使用这些组件。