8 Commits

Author SHA1 Message Date
mayuresh82
a38dc2f48f Merge pull request #12 from mayuresh82/race_fix
attempt to fix a race condition
2022-03-09 12:28:28 -08:00
Mayuresh Gaitonde
04159185c9 Fix 2021-12-15 17:10:28 -08:00
Mayuresh Gaitonde
c860f3c50e checks to Add new app 2021-12-15 16:44:13 -08:00
Mayuresh Gaitonde
62db2e5af7 attempt to fix a race condition 2021-12-15 15:36:51 -08:00
mayuresh82
d39e46096f Merge pull request #11 from mayuresh82/deadlock_fix
fix the deadlock
2021-10-20 11:23:51 -07:00
Mayuresh Gaitonde
a99f92e9a5 fix 2021-10-18 18:21:01 -07:00
Mayuresh Gaitonde
5ac02c373b dockerfile fix 2021-10-18 18:16:22 -07:00
Mayuresh Gaitonde
6fdff28716 fix the deadlock 2021-10-18 15:50:13 -07:00
3 changed files with 43 additions and 28 deletions

View File

@@ -1,4 +1,4 @@
FROM golang:alpine as builder FROM golang:1.14-alpine as builder
RUN apk update && \ RUN apk update && \
apk upgrade && \ apk upgrade && \
apk add --no-cache git && \ apk add --no-cache git && \

View File

@@ -58,7 +58,7 @@ type appMon struct {
app *App app *App
done chan bool done chan bool
announced bool announced bool
checkOn bool runLoopOn bool
} }
// MonitorMgr manages the lifecycle of registered apps // MonitorMgr manages the lifecycle of registered apps
@@ -69,7 +69,8 @@ type MonitorMgr struct {
ctrl *Controller ctrl *Controller
consul *ConsulMon consul *ConsulMon
sync.Mutex monMu sync.Mutex
clMu sync.Mutex
} }
func NewMonitor(config *c.Config) *MonitorMgr { func NewMonitor(config *c.Config) *MonitorMgr {
@@ -123,7 +124,7 @@ func (m *MonitorMgr) consulMon() {
} }
// remove currently running apps that are not discovered in this pass // remove currently running apps that are not discovered in this pass
var toRemove []string var toRemove []string
m.Lock() m.monMu.Lock()
for name, mon := range m.monitors { for name, mon := range m.monitors {
if mon.app.Source != "consul" { if mon.app.Source != "consul" {
continue continue
@@ -140,10 +141,10 @@ func (m *MonitorMgr) consulMon() {
toRemove = append(toRemove, name) toRemove = append(toRemove, name)
} }
} }
m.monMu.Unlock()
for _, tr := range toRemove { for _, tr := range toRemove {
m.Remove(tr) m.Remove(tr)
} }
m.Unlock()
} }
<-time.After(m.config.Agent.ConsulQueryInterval) <-time.After(m.config.Agent.ConsulQueryInterval)
} }
@@ -152,31 +153,44 @@ func (m *MonitorMgr) consulMon() {
// Add adds a new app into monitor manager // Add adds a new app into monitor manager
func (m *MonitorMgr) Add(app *App) { func (m *MonitorMgr) Add(app *App) {
// check if already running // check if already running
m.Lock() m.monMu.Lock()
defer m.Unlock() var existing *appMon
for _, appMon := range m.monitors { for _, appMon := range m.monitors {
if appMon.app.Equal(app) && appMon.checkOn { if appMon.app.Equal(app) {
glog.V(2).Infof("App %s already exists", app.Name) glog.Infof("App %s already exists", app.Name)
return existing = appMon
break
} }
if appMon.app.Vip.Net.String() == app.Vip.Net.String() && appMon.app.Name != app.Name { if appMon.app.Vip.Net.String() == app.Vip.Net.String() && appMon.app.Name != app.Name {
glog.Errorf("Error: Vip %s is already being announced by app: %s", app.Vip.Net.String(), appMon.app.Name) glog.Errorf("Error: Vip %s is already being announced by app: %s", app.Vip.Net.String(), appMon.app.Name)
m.monMu.Unlock()
return return
} }
} }
m.Remove(app.Name) m.monMu.Unlock()
// if the same app already exists but its run loop is not running,
// then just restart the run loop
if existing != nil {
if !existing.runLoopOn {
go m.runLoop(existing)
}
} else {
// else add a new app and start its run loop
appMon := &appMon{app: app, done: make(chan bool)} appMon := &appMon{app: app, done: make(chan bool)}
m.monitors[app.Name] = appMon m.monitors[app.Name] = appMon
go m.runLoop(appMon) go m.runLoop(appMon)
glog.Infof("Registered a new app: %v", app.String()) glog.Infof("Registered a new app: %v", app.String())
} }
}
// Remove removes an app from monitor manager, stops BGP // Remove removes an app from monitor manager, stops BGP
/// announcement and cleans up state /// announcement and cleans up state
func (m *MonitorMgr) Remove(appName string) { func (m *MonitorMgr) Remove(appName string) {
m.monMu.Lock()
defer m.monMu.Unlock()
if a, ok := m.monitors[appName]; ok { if a, ok := m.monitors[appName]; ok {
if a.checkOn { if a.runLoopOn {
a.done <- true close(a.done)
} }
if a.announced { if a.announced {
if err := m.ctrl.Withdraw(a.app.Vip); err != nil { if err := m.ctrl.Withdraw(a.app.Vip); err != nil {
@@ -198,6 +212,7 @@ func (m *MonitorMgr) Remove(appName string) {
} }
delete(m.monitors, appName) delete(m.monitors, appName)
} }
func (m *MonitorMgr) runMonitors(app *App) bool { func (m *MonitorMgr) runMonitors(app *App) bool {
for _, mon := range app.Monitors { for _, mon := range app.Monitors {
var check bool var check bool
@@ -223,8 +238,8 @@ func (m *MonitorMgr) runMonitors(app *App) bool {
func (m *MonitorMgr) checkCond(am *appMon) error { func (m *MonitorMgr) checkCond(am *appMon) error {
app := am.app app := am.app
m.Lock() m.clMu.Lock()
defer m.Unlock() defer m.clMu.Unlock()
if m.runMonitors(app) { if m.runMonitors(app) {
glog.V(2).Infof("All Monitors for app: %s succeeded", app.Name) glog.V(2).Infof("All Monitors for app: %s succeeded", app.Name)
if !am.announced { if !am.announced {
@@ -245,7 +260,8 @@ func (m *MonitorMgr) checkCond(am *appMon) error {
} }
am.announced = true am.announced = true
if exit, ok := m.cleanups[app.Name]; ok { if exit, ok := m.cleanups[app.Name]; ok {
exit <- true close(exit)
delete(m.cleanups, app.Name)
} }
} }
} else { } else {
@@ -265,7 +281,8 @@ func (m *MonitorMgr) checkCond(am *appMon) error {
// runLoop periodically checks if an app passes healthchecks // runLoop periodically checks if an app passes healthchecks
// and needs VIP announcement // and needs VIP announcement
func (m *MonitorMgr) runLoop(am *appMon) { func (m *MonitorMgr) runLoop(am *appMon) {
am.checkOn = true glog.Infof("Starting run-loop for app %s", am.app.Name)
am.runLoopOn = true
if err := m.checkCond(am); err != nil { if err := m.checkCond(am); err != nil {
glog.Errorln(err) glog.Errorln(err)
} }
@@ -278,7 +295,8 @@ func (m *MonitorMgr) runLoop(am *appMon) {
glog.Errorln(err) glog.Errorln(err)
} }
case <-am.done: case <-am.done:
glog.V(2).Infof("Exit run-loop for app: %s", am.app.Name) glog.Infof("Exit run-loop for app: %s", am.app.Name)
am.runLoopOn = false
return return
} }
} }
@@ -291,8 +309,8 @@ func (m *MonitorMgr) CloseAll() {
glog.Errorf("Failed to shut-down BGP: %v", err) glog.Errorf("Failed to shut-down BGP: %v", err)
} }
for _, am := range m.monitors { for _, am := range m.monitors {
if am.checkOn { if am.runLoopOn {
am.done <- true close(am.done)
} }
deleteLoopback(am.app.Vip.Net) deleteLoopback(am.app.Vip.Net)
for _, nat := range am.app.Nats { for _, nat := range am.app.Nats {
@@ -313,9 +331,8 @@ func (m *MonitorMgr) Cleanup(app string, exit chan bool) {
select { select {
case <-t.C: case <-t.C:
glog.Infof("Cleaning up app %s", app) glog.Infof("Cleaning up app %s", app)
m.Lock()
m.Remove(app) m.Remove(app)
m.Unlock() return
case <-exit: case <-exit:
return return
} }

View File

@@ -68,9 +68,7 @@ func (s *Server) unregisterHandler(w http.ResponseWriter, r *http.Request) {
http.Error(w, "Invalid request, need app name specified", http.StatusBadRequest) http.Error(w, "Invalid request, need app name specified", http.StatusBadRequest)
return return
} }
s.mon.Lock()
s.mon.Remove(appName[0]) s.mon.Remove(appName[0])
s.mon.Unlock()
} }
func (s *Server) infoHandler(w http.ResponseWriter, r *http.Request) { func (s *Server) infoHandler(w http.ResponseWriter, r *http.Request) {