Fast Deploy NebulaGraph Cluster with Docker Swarm

This article is written by Henson Wu from SeeYII, a financial service technology company It is originally published on the NebulaGraph forum:https://discuss.nebula-graph.io/t/fast-deploy-nebula-graph-cluster-with-docker-swarm/693

This post describes in detail how to deploy a NebulaGraph cluster with Docker Swarm.

Deploy a NebulaGraph Cluster

2.1 Environment Preparation

Prepare hosts as below.

ip	Memory (GB)	CPU (# of Cores)
192.168.1.166	16	4
192.168.1.167	16	4
192.168.1.168	16	4

Please make sure that Docker has been installed on all the machines.

2.2 Initialize the Swarm Cluster

Execute the commands below on the host 192.168.1.166:

$ docker swarm init --advertise-addr 192.168.1.166
Swarm initialized: current node (dxn1zf6l61qsb1josjja83ngz) is now a manager.
To add a worker to this swarm, run the following command:
 docker swarm join \
 --token SWMTKN-1-49nj1cmql0jkz5s954yi3oex3nedyz0fb0xx14ie39trti4wxv-8vxv8rssmk743ojnwacrr2e7c \
 192.168.1.166:2377

To add a manager to this swarm, run 'docker swarm join-token manager' and follow the instructions.

2.3 Add a Worker Node

Add a Swarm worker node per the notification message of the init commands. Execute the following commands on 192.168.1.167 and 192.168.1.168 respectively.

docker swarm join \
 --token SWMTKN-1-49nj1cmql0jkz5s954yi3oex3nedyz0fb0xx14ie39trti4wxv-8vxv8rssmk743ojnwacrr2e7c \
 192.168.1.166:2377

2.4 Authenticate the Cluster

docker node ls

ID                            HOSTNAME            STATUS              AVAILABILITY        MANAGER STATUS      ENGINE VERSION
h0az2wzqetpwhl9ybu76yxaen *   KF2-DATA-166        Ready               Active              Reachable           18.06.1-ce
q6jripaolxsl7xqv3cmv5pxji     KF2-DATA-167        Ready               Active              Leader              18.06.1-ce
h1iql1uvm7123h3gon9so69dy     KF2-DATA-168        Ready               Active                                  18.06.1-ce

2.5 Configure Docker Stack

vi docker-stack.yml

Configure Docker Stack as below:

version: '3.6'
services:
  metad0:
    image: vesoft/nebula-metad:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --local_ip=192.168.1.166
      - --ws_ip=192.168.1.166
      - --port=45500
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-166
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.166:11000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 11000
        published: 11000
        protocol: tcp
        mode: host
      - target: 11002
        published: 11002
        protocol: tcp
        mode: host
      - target: 45500
        published: 45500
        protocol: tcp
        mode: host
    volumes:
      - data-metad0:/data/meta
      - logs-metad0:/logs
    networks:
      - nebula-net

  metad1:
    image: vesoft/nebula-metad:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --local_ip=192.168.1.167
      - --ws_ip=192.168.1.167
      - --port=45500
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-167
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.167:11000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 11000
        published: 11000
        protocol: tcp
        mode: host
      - target: 11002
        published: 11002
        protocol: tcp
        mode: host
      - target: 45500
        published: 45500
        protocol: tcp
        mode: host
    volumes:
      - data-metad1:/data/meta
      - logs-metad1:/logs
    networks:
      - nebula-net

  metad2:
    image: vesoft/nebula-metad:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --local_ip=192.168.1.168
      - --ws_ip=192.168.1.168
      - --port=45500
      - --data_path=/data/meta
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-168
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.168:11000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 11000
        published: 11000
        protocol: tcp
        mode: host
      - target: 11002
        published: 11002
        protocol: tcp
        mode: host
      - target: 45500
        published: 45500
        protocol: tcp
        mode: host
    volumes:
      - data-metad2:/data/meta
      - logs-metad2:/logs
    networks:
      - nebula-net

  storaged0:
    image: vesoft/nebula-storaged:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --local_ip=192.168.1.166
      - --ws_ip=192.168.1.166
      - --port=44500
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-166
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.166:12000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 12000
        published: 12000
        protocol: tcp
        mode: host
      - target: 12002
        published: 12002
        protocol: tcp
        mode: host
    volumes:
      - data-storaged0:/data/storage
      - logs-storaged0:/logs
    networks:
      - nebula-net
  storaged1:
    image: vesoft/nebula-storaged:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --local_ip=192.168.1.167
      - --ws_ip=192.168.1.167
      - --port=44500
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-167
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.167:12000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 12000
        published: 12000
        protocol: tcp
        mode: host
      - target: 12002
        published: 12004
        protocol: tcp
        mode: host
    volumes:
      - data-storaged1:/data/storage
      - logs-storaged1:/logs
    networks:
      - nebula-net

  storaged2:
    image: vesoft/nebula-storaged:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --local_ip=192.168.1.168
      - --ws_ip=192.168.1.168
      - --port=44500
      - --data_path=/data/storage
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-168
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.168:12000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 12000
        published: 12000
        protocol: tcp
        mode: host
      - target: 12002
        published: 12006
        protocol: tcp
        mode: host
    volumes:
      - data-storaged2:/data/storage
      - logs-storaged2:/logs
    networks:
      - nebula-net
  graphd1:
    image: vesoft/nebula-graphd:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --port=3699
      - --ws_ip=192.168.1.166
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-166
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.166:13000/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 3699
        published: 3699
        protocol: tcp
        mode: host
      - target: 13000
        published: 13000
        protocol: tcp
#        mode: host
      - target: 13002
        published: 13002
        protocol: tcp
        mode: host
    volumes:
      - logs-graphd:/logs
    networks:
      - nebula-net

  graphd2:
    image: vesoft/nebula-graphd:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --port=3699
      - --ws_ip=192.168.1.167
      - --log_dir=/logs
      - --v=2
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-167
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.167:13001/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 3699
        published: 3640
        protocol: tcp
        mode: host
      - target: 13000
        published: 13001
        protocol: tcp
        mode: host
      - target: 13002
        published: 13003
        protocol: tcp
#        mode: host
    volumes:
      - logs-graphd2:/logs
    networks:
      - nebula-net
  graphd3:
    image: vesoft/nebula-graphd:nightly
    env_file:
      - ./nebula.env
    command:
      - --meta_server_addrs=192.168.1.166:45500,192.168.1.167:45500,192.168.1.168:45500
      - --port=3699
      - --ws_ip=192.168.1.168
      - --log_dir=/logs
      - --v=0
      - --minloglevel=2
    deploy:
      replicas: 1
      restart_policy:
        condition: on-failure
      placement:
        constraints:
          - node.hostname == KF2-DATA-168
    depends_on:
      - metad0
      - metad1
      - metad2
    healthcheck:
      test: ["CMD", "curl", "-f", "http://192.168.1.168:13002/status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 20s
    ports:
      - target: 3699
        published: 3641
        protocol: tcp
        mode: host
      - target: 13000
        published: 13002
        protocol: tcp
#        mode: host
      - target: 13002
        published: 13004
        protocol: tcp
        mode: host
    volumes:
      - logs-graphd3:/logs
    networks:
      - nebula-net
networks:
  nebula-net:
    external: true
    attachable: true
    name: host
volumes:
  data-metad0:
  logs-metad0:
  data-metad1:
  logs-metad1:
  data-metad2:
  logs-metad2:
  data-storaged0:
  logs-storaged0:
  data-storaged1:
  logs-storaged1:
  data-storaged2:
  logs-storaged2:
  logs-graphd:
  logs-graphd2:
  logs-graphd3:

Edit the nebula.env file by adding the items below:

TZ=UTC
USER=root

2.6 Start the NebulaGraph Cluster

docker stack deploy nebula -c docker-stack.yml

Cluster Configuration for Load Balancing and High Availability

The NebulaGraph clients (1.X) don't provide load balancing capability currently. They randomly select any graphd to connect the database. Therefore, you need to configure load balancing and high availability on your own if you want to use NebulaGraph in production.

Cluster Configuration for Load Balancing and High Availability

Seen from the figure above, the entire deployment can be divided into three layers, i.e. data layer, load balance layer, and high availability layer.

The load balance layer is responsible for distributing the requests from the client to the data layer for load balancing purpose.

The HA layer is realized by HAProxy and it ensures that the load balancing service works properly so that the entire cluster works properly.

3.1 Load Balancing Configuration

HAProxy uses Docker compose for configuration. Edit the three files below respectively:

Dockerfile

FROM haproxy:1.7
COPY haproxy.cfg /usr/local/etc/haproxy/haproxy.cfg
EXPOSE 3640

docker-compose.yml

version: "3.2"
services:
  haproxy:
    container_name: haproxy
    build: .
    volumes:
      - ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg
    ports:
      - 3640:3640
    restart: always
    networks:
      - app_net
networks:
  app_net:
    external: true

haproxy.cfg

global
    daemon
    maxconn 30000
    log 127.0.0.1 local0 info
    log 127.0.0.1 local1 warning

defaults
    log-format %hr\ %ST\ %B\ %Ts
    log  global
    mode http
    option http-keep-alive
    timeout connect 5000ms
    timeout client 10000ms
    timeout server 50000ms
    timeout http-request 20000ms

# Customize your own frontends && backends && listen conf
# CUSTOM

listen graphd-cluster
    bind *:3640
    mode tcp
    maxconn 300
    balance roundrobin
    server server1 192.168.1.166:3699 maxconn 300 check
    server server2 192.168.1.167:3699 maxconn 300 check
    server server3 192.168.1.168:3699 maxconn 300 check

listen stats
    bind *:1080
    stats refresh 30s
    stats uri /stats

3.2 Start HAProxy

docker-compose up -d

3.3 HA Configuration

Follow the configuration steps on 192.168.1.166, 192.168.1.167, and 192.168.1.168.

Install Keepalived

apt-get update && apt-get upgrade && apt-get install keepalived -y

Modify Keepalived's configuration file, i.e. /etc/keepalived/keepalived.conf. Note that the priority item should be set to different values for the three hosts so that there is a priority among them.**

Note: Please be noted that vip (virtual IP) is required to configure Keepalive. In the configurations below, 192.168.1.99 is the virtual IP.

a. Configuration on 192.168.1.166

global_defs {
    router_id lb01 #An identifier；
}
vrrp_script chk_haproxy {
    script "killall -0 haproxy"    interval 2
}
vrrp_instance VI_1 {
    state MASTER
    interface ens160
    virtual_router_id 52
    priority 999
    # Set the interval (in seconds) for sync check between MASTER and BACKUP load balancers.
    advert_int 1
    # Configure the authentication type and password
    authentication {
    # Configure the authentication type, mainly PASS and AH
        auth_type PASS
    # Set the authentication password.The MASTER and BACKUP load balancers must use the same password within the same vrrp_instance to communicate properly.
        auth_pass amber1
    }
    virtual_ipaddress {
        # The virtual IP is 192.168.1.99/24; Bounded interface is ens160; Alias for both MASTER and BACKUP load balancers is ens160:1
        192.168.1.99/24 dev ens160 label ens160:1
    }
    track_script {
        chk_haproxy
    }
}

b. Configuration on 192.168.1.167

global_defs {
    router_id lb01 #An identifier；
}
vrrp_script chk_haproxy {
    script "killall -0 haproxy"    interval 2
}
vrrp_instance VI_1 {
    state BACKUP
    interface ens160
    virtual_router_id 52
    priority 888
    # Set the interval (in seconds) for sync check between MASTER and BACKUP load balancers.
    advert_int 1
    # Configure the authentication type and password
    authentication {
    # Configure the authentication type, mainly PASS and AH
        auth_type PASS
    # Set the authentication password.The MASTER and BACKUP load balancers must use the same password within the same vrrp_instance to communicate properly.
        auth_pass amber1
    }
    virtual_ipaddress {
        # The virtual IP is 192.168.1.99/24; Bounded interface is ens160; Alias for both MASTER and BACKUP load balancers is ens160:1
        192.168.1.99/24 dev ens160 label ens160:1
    }
    track_script {
        chk_haproxy
    }
}

c. Configuration on 192.168.1.168

global_defs {
    router_id lb01 #An identifier；
}
vrrp_script chk_haproxy {
    script "killall -0 haproxy"    interval 2
}
vrrp_instance VI_1 {
    state BACKUP
    interface ens160
    virtual_router_id 52
    priority 777
    # Set the interval (in seconds) for sync check between MASTER and BACKUP load balancers.
    advert_int 1
   # Configure the authentication type and password
    authentication {
    # Configure the authentication type, mainly PASS and AH
        auth_type PASS
    # Set the authentication password.The MASTER and BACKUP load balancers must use the same password within the same vrrp_instance to communicate properly.
        auth_pass amber1
    }
    virtual_ipaddress {
        # The virtual IP is 192.168.1.99/24; Bounded interface is ens160; Alias for both MASTER and BACKUP load balancers is ens160:1
        192.168.1.99/24 dev ens160 label ens160:1
    }
    track_script {
        chk_haproxy
    }
}

Relevant commands used in Keepalived:

# Start Keepalived
systemctl start keepalived
# Auto start Keepalived
systemctl enable keeplived
# Re-start Keepalived
systemctl restart keepalived

Deploy the Cluster Offline

How to deploy the NebulaGraph cluster with Docker Swarm offline? The answer is simple. Changing the Docker image to private image repo will do.

If you have any questions regarding deploying NebulaGraph with Docker Swarm, please leave your comments in this thread: https://discuss.nebula-graph.io/t/fast-deploy-nebula-graph-cluster-with-docker-swarm/693

Fast Deploy NebulaGraph Cluster with Docker Swarm

Deploy a NebulaGraph Cluster

2.1 Environment Preparation

2.2 Initialize the Swarm Cluster

2.3 Add a Worker Node

2.4 Authenticate the Cluster

2.5 Configure Docker Stack

2.6 Start the NebulaGraph Cluster

Cluster Configuration for Load Balancing and High Availability

3.1 Load Balancing Configuration

3.2 Start HAProxy

3.3 HA Configuration

Deploy the Cluster Offline

You might also like: