// Copyright 2015 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Maintains the summary of aggregated minute, hour, and day stats. // For a container running for more than a day, amount of tracked data can go up to // 40 KB when cpu and memory are tracked. We'll start by enabling collection for the // node, followed by docker, and then all containers as we understand the usage pattern // better // TODO(rjnagal): Optimize the size if we start running it for every container. package summary import ( "fmt" "sync" "time" "github.com/google/cadvisor/info/v1" info "github.com/google/cadvisor/info/v2" ) // Usage fields we track for generating percentiles. type secondSample struct { Timestamp time.Time // time when the sample was recorded. Cpu uint64 // cpu usage Memory uint64 // memory usage } type availableResources struct { Cpu bool Memory bool } type StatsSummary struct { // Resources being tracked for this container. available availableResources // list of second samples. The list is cleared when a new minute samples is generated. secondSamples []*secondSample // minute percentiles. We track 24 * 60 maximum samples. minuteSamples *SamplesBuffer // latest derived instant, minute, hour, and day stats. Instant sample updated every second. // Others updated every minute. derivedStats info.DerivedStats // Guarded by dataLock. dataLock sync.RWMutex } // Adds a new seconds sample. // If enough seconds samples are collected, a minute sample is generated and derived // stats are updated. func (s *StatsSummary) AddSample(stat v1.ContainerStats) error { sample := secondSample{} sample.Timestamp = stat.Timestamp if s.available.Cpu { sample.Cpu = stat.Cpu.Usage.Total } if s.available.Memory { sample.Memory = stat.Memory.WorkingSet } s.secondSamples = append(s.secondSamples, &sample) s.updateLatestUsage() // TODO(jnagal): Use 'available' to avoid unnecessary computation. numSamples := len(s.secondSamples) elapsed := time.Nanosecond if numSamples > 1 { start := s.secondSamples[0].Timestamp end := s.secondSamples[numSamples-1].Timestamp elapsed = end.Sub(start) } if elapsed > 60*time.Second { // Make a minute sample. This works with dynamic housekeeping as long // as we keep max dynamic houskeeping period close to a minute. minuteSample := GetMinutePercentiles(s.secondSamples) // Clear seconds samples. Keep the latest sample for continuity. // Copying and resizing helps avoid slice re-allocation. s.secondSamples[0] = s.secondSamples[numSamples-1] s.secondSamples = s.secondSamples[:1] s.minuteSamples.Add(minuteSample) err := s.updateDerivedStats() if err != nil { return err } } return nil } func (s *StatsSummary) updateLatestUsage() { usage := info.InstantUsage{} numStats := len(s.secondSamples) if numStats < 1 { return } latest := s.secondSamples[numStats-1] usage.Memory = latest.Memory if numStats > 1 { previous := s.secondSamples[numStats-2] cpu, err := getCpuRate(*latest, *previous) if err == nil { usage.Cpu = cpu } } s.dataLock.Lock() defer s.dataLock.Unlock() s.derivedStats.LatestUsage = usage s.derivedStats.Timestamp = latest.Timestamp return } // Generate new derived stats based on current minute stats samples. func (s *StatsSummary) updateDerivedStats() error { derived := info.DerivedStats{} derived.Timestamp = time.Now() minuteSamples := s.minuteSamples.RecentStats(1) if len(minuteSamples) != 1 { return fmt.Errorf("failed to retrieve minute stats") } derived.MinuteUsage = *minuteSamples[0] hourUsage, err := s.getDerivedUsage(60) if err != nil { return fmt.Errorf("failed to compute hour stats: %v", err) } dayUsage, err := s.getDerivedUsage(60 * 24) if err != nil { return fmt.Errorf("failed to compute day usage: %v", err) } derived.HourUsage = hourUsage derived.DayUsage = dayUsage s.dataLock.Lock() defer s.dataLock.Unlock() derived.LatestUsage = s.derivedStats.LatestUsage s.derivedStats = derived return nil } // helper method to get hour and daily derived stats func (s *StatsSummary) getDerivedUsage(n int) (info.Usage, error) { if n < 1 { return info.Usage{}, fmt.Errorf("invalid number of samples requested: %d", n) } samples := s.minuteSamples.RecentStats(n) numSamples := len(samples) if numSamples < 1 { return info.Usage{}, fmt.Errorf("failed to retrieve any minute stats.") } // We generate derived stats even with partial data. usage := GetDerivedPercentiles(samples) // Assumes we have equally placed minute samples. usage.PercentComplete = int32(numSamples * 100 / n) return usage, nil } // Return the latest calculated derived stats. func (s *StatsSummary) DerivedStats() (info.DerivedStats, error) { s.dataLock.RLock() defer s.dataLock.RUnlock() return s.derivedStats, nil } func New(spec v1.ContainerSpec) (*StatsSummary, error) { summary := StatsSummary{} if spec.HasCpu { summary.available.Cpu = true } if spec.HasMemory { summary.available.Memory = true } if !summary.available.Cpu && !summary.available.Memory { return nil, fmt.Errorf("none of the resources are being tracked.") } summary.minuteSamples = NewSamplesBuffer(60 /* one hour */) return &summary, nil }