diff --git a/pkg/metricmonitor/influxdb/controller.go b/pkg/metricmonitor/influxdb/controller.go index da48f3b44d74fc7b17c1e7a192b607698d11bd03..d01a5a75cfd5834fe74c3ffbd375b13d63891356 100644 --- a/pkg/metricmonitor/influxdb/controller.go +++ b/pkg/metricmonitor/influxdb/controller.go @@ -26,6 +26,8 @@ const ( const ( queryDuration int64 = 10 * 1e9 pingTimeOutSecond = 3 * time.Second + maxRetries = 3 + retryInterval = 1 * time.Second ) type Controller struct { @@ -198,14 +200,28 @@ func (c *Controller) GetTopUsedServerSessionOfInstance(log *logger.FMLogger, gro return instances } -// todo 考虑instance较多的场景进行分批 -func (c *Controller) GetInstance(log *logger.FMLogger) ([]InstanceInfo, error) { +func (c *Controller) queryInstanceFromInfluxdb(log *logger.FMLogger) (*influx.Response, error) { command := "SELECT server_session_count,instance_id,fleet_id,scaling_group_id FROM process WHERE time > now() - 10s " log.Info("influxDB query command of getting instance list: [%s]", command) q := influx.NewQuery(command, c.database, c.timePrecision) - resp, err := c.client.Query(q) - if err != nil || resp.Error() != nil { - return nil, err + return c.client.Query(q) +} + +// todo 考虑instance较多的场景进行分批 +func (c *Controller) GetInstance(log *logger.FMLogger) ([]InstanceInfo, error) { + var resp *influx.Response + var err error + for i := 0; i <= maxRetries; i++ { + resp, err = c.queryInstanceFromInfluxdb(log) + if err != nil || resp.Error() != nil { + log.Error("query influxdb error, err %+v, resp error %v", err, resp.Error()) + return nil, err + } + if resp.Results != nil && resp.Results[0].Series != nil { + break + } + log.Error("get instance result from influxdb is nil, try again...") + time.Sleep(retryInterval) } var instanceList []InstanceInfo if resp.Results == nil || resp.Results[0].Series == nil {