Manage Shards of the Stream

Sharding in HStreamDB

A stream is a logical concept for producer and consumer, and under the hood, these data passing through are stored in the shards of the stream in an append-only fashion.

A shard is essentially the primary storage unit which contains all the corresponding records with some partition keys. Every stream will contain multiple shards spread across multiple server nodes. Since we believe that stream on itself is a sufficiently concise and powerful abstraction, the sharding logic is minimally visible to the user. For example, during writing or consumption, each stream appears to be managed as an entity as far as the user is concerned.

However, for the cases where the user needs more fine-grained control and better flexibility, we offer interfaces to get into the details of shards of the stream and other interfaces to work with shards like Reader.

Specify the Number of Shards When Creating a Stream

To decide the number of shards which a stream should have, an attribute shardCount is provided when creating a stream.

List Shards

To list all the shards of one stream.

  1. // ListShardsExample.java
  2. package docs.code.examples;
  3. import io.hstream.HStreamClient;
  4. import io.hstream.Shard;
  5. import java.util.List;
  6. public class ListShardsExample {
  7. public static void main(String[] args) throws Exception {
  8. // TODO(developer): Replace these variables before running the sample.
  9. String serviceUrl = "127.0.0.1:6570";
  10. if (System.getenv("serviceUrl") != null) {
  11. serviceUrl = System.getenv("serviceUrl");
  12. }
  13. String streamName = "your_h_records_stream_name";
  14. HStreamClient client = HStreamClient.builder().serviceUrl(serviceUrl).build();
  15. listShardsExample(client, streamName);
  16. client.close();
  17. }
  18. public static void listShardsExample(HStreamClient client, String streamName) {
  19. List<Shard> shards = client.listShards(streamName);
  20. for (Shard shard : shards) {
  21. System.out.println(shard.getStreamName());
  22. }
  23. }
  24. }
  1. // ExampleListShards.go
  2. package examples
  3. import (
  4. "fmt"
  5. "github.com/hstreamdb/hstreamdb-go/hstream"
  6. "log"
  7. )
  8. func ExampleListShards() error {
  9. client, err := hstream.NewHStreamClient(YourHStreamServiceUrl)
  10. if err != nil {
  11. log.Fatalf("Create client error: %s", err)
  12. }
  13. defer client.Close()
  14. streamName := "testStream"
  15. shards, err := client.ListShards(streamName)
  16. if err != nil {
  17. log.Fatalf("Liste shards error: %s", err)
  18. }
  19. for _, shard := range shards {
  20. fmt.Printf("%+v\n", shard)
  21. }
  22. return nil
  23. }
  1. # https://github.com/hstreamdb/hstreamdb-py/blob/main/examples/snippets/guides.py
  2. import asyncio
  3. import hstreamdb
  4. import os
  5. # NOTE: Replace with your own host and port
  6. host = os.getenv("GUIDE_HOST", "127.0.0.1")
  7. port = os.getenv("GUIDE_PORT", 6570)
  8. stream_name = "your_stream"
  9. subscription = "your_subscription"
  10. # Run: asyncio.run(main(your_async_function))
  11. async def main(*funcs):
  12. async with await hstreamdb.insecure_client(host=host, port=port) as client:
  13. for f in funcs:
  14. await f(client)
  15. async def list_shards(client):
  16. shards = client.list_shards(stream_name)
  17. print(list(shards))