Nat support and fix monitoring issues
This commit is contained in:
63
README.md
Normal file
63
README.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# GoCast
|
||||
|
||||
Gocast is a tool that does controller BGP route advertisements from a host. It runs custom defined healthchecks and announces or withdraws routes (most commonly VIPs or Virtual IPs) to a BGP peer.
|
||||
The most common use case for this is anycast (vip) based load balancing for infrastructure services such as DNS, Syslog etc where several instances are available in geographically diverse regions that announce the same anycast VIP, and clients then get sent to the closest instance.
|
||||
|
||||
## Installation
|
||||
Use the docker container at mayuresh82/gocast or compile from source:
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install)
|
||||
2. [Setup your GOPATH](https://golang.org/doc/code.html#GOPATH)
|
||||
3. Run `go get -d github.com/mayuresh82/gocast`
|
||||
4. Run `cd $GOPATH/src/github.com/mayuresh82/gocast`
|
||||
5. Run `make`
|
||||
|
||||
## Design
|
||||
|
||||
GoCast uses [GoBGP](https://github.com/osrg/gobgp) as a library to peer with remote neighbors and announce/withdraw prefixes. It really is just a healthcheck based wrapper around GoBGP. Remote peers can be autodiscovered or statically configured. A peer will most commonly be a Top-Of-Rack (TOR) switch.
|
||||
|
||||
Typically you would run GoCast on the same hosts as the service that needs to be monitored.
|
||||
Once an application "registers" with GoCast, GoCast then runs the predefined health monitors/checks and if they fail (e.g a service listening on a specific port), the routes are withdrawn thereby taking the node out of service.
|
||||
|
||||
GoCast uses a config file to define agent parameters (http addr, consul server addr, timers etc) and BGP parameters (local/peer ASN, peer IP, origin/communnities). See example config.yaml.
|
||||
|
||||
### Registration
|
||||
An application can register with the GoCast instance running on the same host using one of the following methods:
|
||||
1. http call : Make an http get call with the required parameters. For example:
|
||||
```
|
||||
http://gocast-addr/register?name=<appName>&vip=<addr/mask>&monitor=port:tcp:5000
|
||||
```
|
||||
Multiple monitors can be defined and the healthcheck succeeds only when all the monitors pass.
|
||||
|
||||
2. Custom defined apps in config.yaml. See the example config.yaml for syntax examples ( not supported yet)
|
||||
|
||||
3. Consul based auto-discovery (see below)
|
||||
|
||||
## Monitors
|
||||
A health monitor can either be a port monitor, an exec monitor or consul. Port monitors are specified as *port:protocol:portnum* , exec monitors run a script or arbitrary command and pass on successful exit (status code 0), specified as *exec:command* and consul monitors use consul's own healthchecks, specifed simply as *consul*
|
||||
|
||||
## Consul Integration
|
||||
GoCast supports consul for automatic service discovery and healthchecking. For this to work, the following needs to be setup:
|
||||
- The host running GoCast needs to have the environment variable **CONSUL_NODE** set to the hostname in consul
|
||||
|
||||
- The following tags must be set in consul for autodiscovery to work:
|
||||
`enable_gocast` : required
|
||||
`gocast_vip=<addr/mask>`: required
|
||||
`gocast_monitor=monitor:params`: optional
|
||||
|
||||
If `gocast_monitor=consul` is specified, then GoCast uses the defined healthchecks in consul as the health monitors for the service.
|
||||
|
||||
## Docker support
|
||||
The docker image at mayuresh82/gocast can be used to run GoCast inside a container. In order for GoCast to manipulate the host network stack correctly, the container needs to run with NET_ADMIN capablity and host mode networking. For example:
|
||||
```
|
||||
docker run -d --cap=NET_ADMIN --net=host mayuresh82/gocast --config=/path/to/config.yaml
|
||||
```
|
||||
|
||||
**Caveats and workarounds**
|
||||
|
||||
The service to be monitored can also be run inside a container, provided the published service ports are set to listen on 0.0.0.0 (not a specific IP.)
|
||||
Certain orchestration solutions such as Nomad run the docker containers with published ports listening only on the physical IP address. This will cause all requests to the app to fail, because the host does not listen on the loopback interface any more (which GoCast uses and assigns the VIP IP to). To work=around this there are 2 options:
|
||||
|
||||
- Start the service container in host networking mode OR
|
||||
|
||||
- Register NAT rules for your service with GoCast for the required protocol/port(s). GoCast will then create iptables NAT rules that map traffic destined to the assigned VIP to the physical IP address. This is achieved by adding the `nat=protocol:port` tag(s) in consul or the http query.
|
||||
@@ -52,6 +52,7 @@ type App struct {
|
||||
Name string
|
||||
Vip *net.IPNet
|
||||
Monitors Monitors
|
||||
Nats []string
|
||||
}
|
||||
|
||||
func (a *App) Equal(other *App) bool {
|
||||
@@ -66,42 +67,37 @@ func (a *App) Equal(other *App) bool {
|
||||
return a.Name == other.Name && a.Vip.String() == other.Vip.String()
|
||||
}
|
||||
|
||||
func (a *App) needsNatRule() (bool, *Monitor) {
|
||||
for _, m := range a.Monitors {
|
||||
if m.Type == Monitor_CONSUL && m.Port != "" {
|
||||
return true, m
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func NewApp(appName, vip string, monitors []string) (*App, error) {
|
||||
func NewApp(appName, vip string, monitors []string, nats []string) (*App, error) {
|
||||
if appName == "" {
|
||||
return nil, fmt.Errorf("Invalid app name")
|
||||
}
|
||||
app := &App{Name: appName}
|
||||
app := &App{Name: appName, Nats: nats}
|
||||
_, ipnet, err := net.ParseCIDR(vip)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Invalid VIP specified, need ip/mask")
|
||||
}
|
||||
app.Vip = ipnet
|
||||
for _, m := range monitors {
|
||||
// valid monitor formats:
|
||||
// "port:tcp:123" , "exec:/local/check.sh", "consul"
|
||||
parts := strings.Split(m, ":")
|
||||
if len(parts) != 2 && len(parts) != 3 {
|
||||
glog.Errorf("Invalid monitor specified, ignoring")
|
||||
continue
|
||||
}
|
||||
mon := &Monitor{Type: MonitorMap[parts[0]]}
|
||||
switch mon.Type.String() {
|
||||
case "port":
|
||||
if len(parts) != 3 {
|
||||
return nil, fmt.Errorf("Invalid port monitor, must specify proto:port")
|
||||
}
|
||||
mon.Protocol = parts[1]
|
||||
mon.Port = parts[2]
|
||||
case "exec":
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("Invalid exec monitor, must specify command")
|
||||
}
|
||||
mon.Cmd = parts[1]
|
||||
case "consul":
|
||||
glog.V(2).Infof("Using consul health monitor")
|
||||
glog.V(2).Infof("Will use consul healthcheck monitor")
|
||||
default:
|
||||
glog.V(2).Infof("No monitor specified")
|
||||
glog.V(2).Infof("Invalid monitor specified")
|
||||
}
|
||||
app.Monitors = append(app.Monitors, mon)
|
||||
}
|
||||
|
||||
@@ -65,6 +65,7 @@ func (c *ConsulMon) queryServices() ([]*App, error) {
|
||||
var (
|
||||
vip string
|
||||
monitors []string
|
||||
nats []string
|
||||
)
|
||||
for _, tag := range service.Tags {
|
||||
// try to find the requires tags. Only vip is mandatory
|
||||
@@ -73,17 +74,19 @@ func (c *ConsulMon) queryServices() ([]*App, error) {
|
||||
continue
|
||||
}
|
||||
switch parts[0] {
|
||||
case "vip":
|
||||
case "gocast_vip":
|
||||
vip = parts[1]
|
||||
case "monitor":
|
||||
case "gocast_monitor":
|
||||
monitors = append(monitors, parts[1])
|
||||
case "gocast_nat":
|
||||
nats = append(nats, parts[1])
|
||||
}
|
||||
}
|
||||
if vip == "" {
|
||||
glog.Errorf("No vip Tag found in matched service :%s", service.Service)
|
||||
continue
|
||||
}
|
||||
app, err := NewApp(service.Service, vip, monitors)
|
||||
app, err := NewApp(service.Service, vip, monitors, nats)
|
||||
if err != nil {
|
||||
glog.Errorf("Unable to add consul app: %v", err)
|
||||
continue
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
api "github.com/osrg/gobgp/api"
|
||||
"net"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
@@ -137,6 +138,7 @@ func (m *MonitorMgr) Add(app *App) {
|
||||
defer m.Unlock()
|
||||
for _, appMon := range m.monitors {
|
||||
if appMon.app.Equal(app) && appMon.checkOn {
|
||||
glog.V(2).Infof("App %s already exists", app.Name)
|
||||
return
|
||||
}
|
||||
if appMon.app.Vip.String() == app.Vip.String() && appMon.app.Name != app.Name {
|
||||
@@ -161,16 +163,24 @@ func (m *MonitorMgr) Remove(appName string) {
|
||||
glog.Errorf("Failed to withdraw route: %v", err)
|
||||
}
|
||||
}
|
||||
deleteLoopback(a.app.Vip)
|
||||
if ok, mon := a.app.needsNatRule(); ok {
|
||||
natRule("D", a.app.Vip.IP, m.ctrl.localIP, mon.Port, mon.Protocol)
|
||||
if err := deleteLoopback(a.app.Vip); err != nil {
|
||||
glog.Errorf("Failed to remove app: %s: %v", a.app.Name, err)
|
||||
}
|
||||
for _, nat := range a.app.Nats {
|
||||
parts := strings.Split(nat, ":")
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
if err := natRule("D", a.app.Vip.IP, m.ctrl.localIP, parts[0], parts[1]); err != nil {
|
||||
glog.Errorf("Failed to remove app: %s: %v", a.app.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
delete(m.monitors, appName)
|
||||
}
|
||||
func (m *MonitorMgr) runMonitors(app *App) bool {
|
||||
var check bool
|
||||
for _, mon := range app.Monitors {
|
||||
var check bool
|
||||
switch mon.Type {
|
||||
case Monitor_PORT:
|
||||
check = portMonitor(mon.Protocol, mon.Port)
|
||||
@@ -201,8 +211,12 @@ func (m *MonitorMgr) checkCond(am *appMon) error {
|
||||
if err := addLoopback(app.Name, app.Vip); err != nil {
|
||||
return err
|
||||
}
|
||||
if ok, mon := app.needsNatRule(); ok {
|
||||
if err := natRule("A", app.Vip.IP, m.ctrl.localIP, mon.Port, mon.Protocol); err != nil {
|
||||
for _, nat := range app.Nats {
|
||||
parts := strings.Split(nat, ":")
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
if err := natRule("A", app.Vip.IP, m.ctrl.localIP, parts[0], parts[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -258,8 +272,12 @@ func (m *MonitorMgr) CloseAll() {
|
||||
am.done <- true
|
||||
}
|
||||
deleteLoopback(am.app.Vip)
|
||||
if ok, mon := am.app.needsNatRule(); ok {
|
||||
natRule("D", am.app.Vip.IP, m.ctrl.localIP, mon.Port, mon.Protocol)
|
||||
for _, nat := range am.app.Nats {
|
||||
parts := strings.Split(nat, ":")
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
natRule("D", am.app.Vip.IP, m.ctrl.localIP, parts[0], parts[1])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ func localAddress(gw net.IP) (net.IP, error) {
|
||||
}
|
||||
|
||||
func addLoopback(name string, addr *net.IPNet) error {
|
||||
deleteLoopback(addr)
|
||||
prefixLen, _ := addr.Mask.Size()
|
||||
label := fmt.Sprintf("lo:%s", name)
|
||||
// linux kernel limits labels to 15 chars
|
||||
@@ -57,7 +58,7 @@ func deleteLoopback(addr *net.IPNet) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func natRule(op string, vip, localAddr net.IP, port, protocol string) error {
|
||||
func natRule(op string, vip, localAddr net.IP, protocol, port string) error {
|
||||
cmd := fmt.Sprintf(
|
||||
"iptables -t nat -%s PREROUTING -p %s -d %s --dport %s -j DNAT --to-destination %s:%s",
|
||||
op, protocol, vip.String(), port, localAddr.String(), port,
|
||||
|
||||
@@ -46,7 +46,7 @@ func (s *Server) Serve(ctx context.Context) {
|
||||
|
||||
func (s *Server) registerHandler(w http.ResponseWriter, r *http.Request) {
|
||||
queries := r.URL.Query()
|
||||
app, err := controller.NewApp(queries["name"][0], queries["vip"][0], queries["monitor"])
|
||||
app, err := controller.NewApp(queries["name"][0], queries["vip"][0], queries["monitor"], queries["nat"])
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("Invalid request: %v", err), http.StatusBadRequest)
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user