From 1d4fe095f182ce38d5bd080273a3514ff02abee8 Mon Sep 17 00:00:00 2001 From: Mayuresh Gaitonde Date: Fri, 26 Oct 2018 17:09:45 -0700 Subject: [PATCH] Nat support and fix monitoring issues --- README.md | 63 +++++++++++++++++++++++++++++++++++++++++++ controller/app.go | 30 +++++++++------------ controller/consul.go | 9 ++++--- controller/monitor.go | 34 +++++++++++++++++------ controller/system.go | 3 ++- server/server.go | 2 +- 6 files changed, 111 insertions(+), 30 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..66ad2aa --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +# GoCast + +Gocast is a tool that does controller BGP route advertisements from a host. It runs custom defined healthchecks and announces or withdraws routes (most commonly VIPs or Virtual IPs) to a BGP peer. +The most common use case for this is anycast (vip) based load balancing for infrastructure services such as DNS, Syslog etc where several instances are available in geographically diverse regions that announce the same anycast VIP, and clients then get sent to the closest instance. + +## Installation +Use the docker container at mayuresh82/gocast or compile from source: + +1. [Install Go](https://golang.org/doc/install) +2. [Setup your GOPATH](https://golang.org/doc/code.html#GOPATH) +3. Run `go get -d github.com/mayuresh82/gocast` +4. Run `cd $GOPATH/src/github.com/mayuresh82/gocast` +5. Run `make` + +## Design + +GoCast uses [GoBGP](https://github.com/osrg/gobgp) as a library to peer with remote neighbors and announce/withdraw prefixes. It really is just a healthcheck based wrapper around GoBGP. Remote peers can be autodiscovered or statically configured. A peer will most commonly be a Top-Of-Rack (TOR) switch. + +Typically you would run GoCast on the same hosts as the service that needs to be monitored. +Once an application "registers" with GoCast, GoCast then runs the predefined health monitors/checks and if they fail (e.g a service listening on a specific port), the routes are withdrawn thereby taking the node out of service. + +GoCast uses a config file to define agent parameters (http addr, consul server addr, timers etc) and BGP parameters (local/peer ASN, peer IP, origin/communnities). See example config.yaml. + +### Registration +An application can register with the GoCast instance running on the same host using one of the following methods: +1. http call : Make an http get call with the required parameters. For example: +``` +http://gocast-addr/register?name=&vip=&monitor=port:tcp:5000 +``` +Multiple monitors can be defined and the healthcheck succeeds only when all the monitors pass. + +2. Custom defined apps in config.yaml. See the example config.yaml for syntax examples ( not supported yet) + +3. Consul based auto-discovery (see below) + +## Monitors +A health monitor can either be a port monitor, an exec monitor or consul. Port monitors are specified as *port:protocol:portnum* , exec monitors run a script or arbitrary command and pass on successful exit (status code 0), specified as *exec:command* and consul monitors use consul's own healthchecks, specifed simply as *consul* + +## Consul Integration +GoCast supports consul for automatic service discovery and healthchecking. For this to work, the following needs to be setup: +- The host running GoCast needs to have the environment variable **CONSUL_NODE** set to the hostname in consul + +- The following tags must be set in consul for autodiscovery to work: +`enable_gocast` : required +`gocast_vip=`: required +`gocast_monitor=monitor:params`: optional + +If `gocast_monitor=consul` is specified, then GoCast uses the defined healthchecks in consul as the health monitors for the service. + +## Docker support +The docker image at mayuresh82/gocast can be used to run GoCast inside a container. In order for GoCast to manipulate the host network stack correctly, the container needs to run with NET_ADMIN capablity and host mode networking. For example: +``` +docker run -d --cap=NET_ADMIN --net=host mayuresh82/gocast --config=/path/to/config.yaml +``` + +**Caveats and workarounds** + +The service to be monitored can also be run inside a container, provided the published service ports are set to listen on 0.0.0.0 (not a specific IP.) +Certain orchestration solutions such as Nomad run the docker containers with published ports listening only on the physical IP address. This will cause all requests to the app to fail, because the host does not listen on the loopback interface any more (which GoCast uses and assigns the VIP IP to). To work=around this there are 2 options: + +- Start the service container in host networking mode OR + +- Register NAT rules for your service with GoCast for the required protocol/port(s). GoCast will then create iptables NAT rules that map traffic destined to the assigned VIP to the physical IP address. This is achieved by adding the `nat=protocol:port` tag(s) in consul or the http query. \ No newline at end of file diff --git a/controller/app.go b/controller/app.go index f783f36..5c0dce7 100644 --- a/controller/app.go +++ b/controller/app.go @@ -52,6 +52,7 @@ type App struct { Name string Vip *net.IPNet Monitors Monitors + Nats []string } func (a *App) Equal(other *App) bool { @@ -66,42 +67,37 @@ func (a *App) Equal(other *App) bool { return a.Name == other.Name && a.Vip.String() == other.Vip.String() } -func (a *App) needsNatRule() (bool, *Monitor) { - for _, m := range a.Monitors { - if m.Type == Monitor_CONSUL && m.Port != "" { - return true, m - } - } - return false, nil -} - -func NewApp(appName, vip string, monitors []string) (*App, error) { +func NewApp(appName, vip string, monitors []string, nats []string) (*App, error) { if appName == "" { return nil, fmt.Errorf("Invalid app name") } - app := &App{Name: appName} + app := &App{Name: appName, Nats: nats} _, ipnet, err := net.ParseCIDR(vip) if err != nil { return nil, fmt.Errorf("Invalid VIP specified, need ip/mask") } app.Vip = ipnet for _, m := range monitors { + // valid monitor formats: + // "port:tcp:123" , "exec:/local/check.sh", "consul" parts := strings.Split(m, ":") - if len(parts) != 2 && len(parts) != 3 { - glog.Errorf("Invalid monitor specified, ignoring") - continue - } mon := &Monitor{Type: MonitorMap[parts[0]]} switch mon.Type.String() { case "port": + if len(parts) != 3 { + return nil, fmt.Errorf("Invalid port monitor, must specify proto:port") + } mon.Protocol = parts[1] mon.Port = parts[2] case "exec": + if len(parts) != 2 { + return nil, fmt.Errorf("Invalid exec monitor, must specify command") + } mon.Cmd = parts[1] case "consul": - glog.V(2).Infof("Using consul health monitor") + glog.V(2).Infof("Will use consul healthcheck monitor") default: - glog.V(2).Infof("No monitor specified") + glog.V(2).Infof("Invalid monitor specified") } app.Monitors = append(app.Monitors, mon) } diff --git a/controller/consul.go b/controller/consul.go index 1b425e3..e67dba6 100644 --- a/controller/consul.go +++ b/controller/consul.go @@ -65,6 +65,7 @@ func (c *ConsulMon) queryServices() ([]*App, error) { var ( vip string monitors []string + nats []string ) for _, tag := range service.Tags { // try to find the requires tags. Only vip is mandatory @@ -73,17 +74,19 @@ func (c *ConsulMon) queryServices() ([]*App, error) { continue } switch parts[0] { - case "vip": + case "gocast_vip": vip = parts[1] - case "monitor": + case "gocast_monitor": monitors = append(monitors, parts[1]) + case "gocast_nat": + nats = append(nats, parts[1]) } } if vip == "" { glog.Errorf("No vip Tag found in matched service :%s", service.Service) continue } - app, err := NewApp(service.Service, vip, monitors) + app, err := NewApp(service.Service, vip, monitors, nats) if err != nil { glog.Errorf("Unable to add consul app: %v", err) continue diff --git a/controller/monitor.go b/controller/monitor.go index fb0f04f..03028ff 100644 --- a/controller/monitor.go +++ b/controller/monitor.go @@ -7,6 +7,7 @@ import ( api "github.com/osrg/gobgp/api" "net" "os/exec" + "strings" "sync" "time" ) @@ -137,6 +138,7 @@ func (m *MonitorMgr) Add(app *App) { defer m.Unlock() for _, appMon := range m.monitors { if appMon.app.Equal(app) && appMon.checkOn { + glog.V(2).Infof("App %s already exists", app.Name) return } if appMon.app.Vip.String() == app.Vip.String() && appMon.app.Name != app.Name { @@ -161,16 +163,24 @@ func (m *MonitorMgr) Remove(appName string) { glog.Errorf("Failed to withdraw route: %v", err) } } - deleteLoopback(a.app.Vip) - if ok, mon := a.app.needsNatRule(); ok { - natRule("D", a.app.Vip.IP, m.ctrl.localIP, mon.Port, mon.Protocol) + if err := deleteLoopback(a.app.Vip); err != nil { + glog.Errorf("Failed to remove app: %s: %v", a.app.Name, err) + } + for _, nat := range a.app.Nats { + parts := strings.Split(nat, ":") + if len(parts) != 2 { + continue + } + if err := natRule("D", a.app.Vip.IP, m.ctrl.localIP, parts[0], parts[1]); err != nil { + glog.Errorf("Failed to remove app: %s: %v", a.app.Name, err) + } } } delete(m.monitors, appName) } func (m *MonitorMgr) runMonitors(app *App) bool { - var check bool for _, mon := range app.Monitors { + var check bool switch mon.Type { case Monitor_PORT: check = portMonitor(mon.Protocol, mon.Port) @@ -201,8 +211,12 @@ func (m *MonitorMgr) checkCond(am *appMon) error { if err := addLoopback(app.Name, app.Vip); err != nil { return err } - if ok, mon := app.needsNatRule(); ok { - if err := natRule("A", app.Vip.IP, m.ctrl.localIP, mon.Port, mon.Protocol); err != nil { + for _, nat := range app.Nats { + parts := strings.Split(nat, ":") + if len(parts) != 2 { + continue + } + if err := natRule("A", app.Vip.IP, m.ctrl.localIP, parts[0], parts[1]); err != nil { return err } } @@ -258,8 +272,12 @@ func (m *MonitorMgr) CloseAll() { am.done <- true } deleteLoopback(am.app.Vip) - if ok, mon := am.app.needsNatRule(); ok { - natRule("D", am.app.Vip.IP, m.ctrl.localIP, mon.Port, mon.Protocol) + for _, nat := range am.app.Nats { + parts := strings.Split(nat, ":") + if len(parts) != 2 { + continue + } + natRule("D", am.app.Vip.IP, m.ctrl.localIP, parts[0], parts[1]) } } } diff --git a/controller/system.go b/controller/system.go index 1af51b4..bafd4bf 100644 --- a/controller/system.go +++ b/controller/system.go @@ -33,6 +33,7 @@ func localAddress(gw net.IP) (net.IP, error) { } func addLoopback(name string, addr *net.IPNet) error { + deleteLoopback(addr) prefixLen, _ := addr.Mask.Size() label := fmt.Sprintf("lo:%s", name) // linux kernel limits labels to 15 chars @@ -57,7 +58,7 @@ func deleteLoopback(addr *net.IPNet) error { return nil } -func natRule(op string, vip, localAddr net.IP, port, protocol string) error { +func natRule(op string, vip, localAddr net.IP, protocol, port string) error { cmd := fmt.Sprintf( "iptables -t nat -%s PREROUTING -p %s -d %s --dport %s -j DNAT --to-destination %s:%s", op, protocol, vip.String(), port, localAddr.String(), port, diff --git a/server/server.go b/server/server.go index e559e14..aa08872 100644 --- a/server/server.go +++ b/server/server.go @@ -46,7 +46,7 @@ func (s *Server) Serve(ctx context.Context) { func (s *Server) registerHandler(w http.ResponseWriter, r *http.Request) { queries := r.URL.Query() - app, err := controller.NewApp(queries["name"][0], queries["vip"][0], queries["monitor"]) + app, err := controller.NewApp(queries["name"][0], queries["vip"][0], queries["monitor"], queries["nat"]) if err != nil { http.Error(w, fmt.Sprintf("Invalid request: %v", err), http.StatusBadRequest) return