CSS 390: Notes from Lecture 4 (DRAFT)

Concurrency

"Don't communicate by sharing memory; share memory by communicating"

Quality Assurance

Key questions:

Employ a range of techniques: each method has strengths and weaknesses.

Combinatorial explosion: number of execution paths increases exponentially in the number of branches. We cannot test all possible execution paths. We aim for the more tractable objective of code coverage: execute every statement at least once.

Unit Testing

Component-level testing: confidence that a module works well in isolation

Various support packages for various languages: JUnit (Java), PyUnit (Python), etc.

Dependency Injection/Mocking: replace component not under test with fake component

Unit Testing in Go

testing package works with go test command

source files in the package directory ending with _test.go are assumed to be tests. Functions beginning with Test, taking a testing.T pointer argument

Test-Driven Development

  1. write the test first
  2. test should fail: this validates that something is actually being tested
  3. write just enough code to make the test pass
  4. refactor
  5. repeat

Live Demo

Sysstats Server

Imagine we have a datacenter containing 100 machines. We wish to monitor disk usage, so we write a server program to show the output of the df command.

package main

import (
	"fmt"
	"net/http"
	"os/exec"
)

func stats(response http.ResponseWriter, request *http.Request) {
	response.Header()["content-type"] = []string{"text/plain"}
	bytes, _ := exec.Command("df", "-h").Output()
	response.Write(bytes)
}

func main() {
	http.HandleFunc("/stats", stats)
	err := http.ListenAndServe(":8080", nil)
	fmt.Printf("Server fail: %s\n", err)
}

Sample output:

Filesystem      Size  Used Avail Use% Mounted on
/dev/sda5       887G  831G   11G  99% /
none            4.0K     0  4.0K   0% /sys/fs/cgroup
udev            3.7G  4.0K  3.7G   1% /dev
tmpfs           759M  1.5M  757M   1% /run
none            5.0M     0  5.0M   0% /run/lock
none            3.8G  3.0M  3.7G   1% /run/shm
none            100M   36K  100M   1% /run/user

The jaded senior programmer noticed a flaw on code review. Calls to external commands or remote servers should always have timeouts for error recovery. The revised version looks like this (but that's not the point of today's exercise):

package main

import (
	"fmt"
	"net/http"
	"os/exec"
	"time"
)

const (
	timeout = 2 * time.Second
)

func stats(response http.ResponseWriter, request *http.Request) {
	response.Header()["content-type"] = []string{"text/plain"}

	var (
		done    = make(chan error)
		bytes   []byte
		err     error
		command = exec.Command("df", "-h")
	)

	// Execute the command in a separate goroutine so the main
	// thread can timeout gracefully.
	go func() {
		var err error
		bytes, err = command.Output()
		done <- err
	}()

	// Start the clock.
	timer := time.NewTimer(timeout)

	// Wait for the command to complete or time out.
	select {
	case err = <-done:
		// The goroutine completed or failed.  Stop the timer
		// so its goroutine can terminate.
		timer.Stop()
	case <-timer.C:
		// Too late.  Kill the process so the goroutine can
		// complete.  Otherwise, we'd have a potential leak.
		if err := command.Process.Kill(); err == nil {
			// Catch the done message without blocking, so
			// it won't leak.
			go func() { <-done }()
		}
		err = fmt.Errorf("timeout")
	}

	// Did it work?
	if err != nil {
		http.Error(response, err.Error(), http.StatusInternalServerError)
		return
	}
	response.Write(bytes)
}

func main() {
	http.HandleFunc("/stats", stats)
	err := http.ListenAndServe(":8080", nil)
	fmt.Printf("Server fail: %s\n", err)
}

System Monitor

To monitor disk usage we write a program that will periodically poll the stats server on each machine and fire off an alert if the utilization exceeds an alerting threshold.

To make this manageable for class demonstration, we'll make some simplifications

  1. hardcode the list of disk devices
  2. an alert is simply a message printed to standard output

The program will terminate if it receives too many error responses. Consider whether that is desireable behavior in a monitoring program (hint: it isn't).

package main

import (
	"bufio"
	"fmt"
	"io"
	"net/http"
	"os"
	"strconv"
	"strings"
	"time"
)

const (
	maxConsecutiveErrors = 10
	remote               = "localhost:8080"
	//interval = 5 * time.Minute
	interval = 10 * time.Second
)

const (
	thresholdYellow = 90.0
	thresholdRed    = 95.0
)

var (
	disks = []string{"/dev/sda5"}
)

func Utilization(reader io.Reader) map[string]float64 {
	utilization := make(map[string]float64)
	scanner := bufio.NewScanner(reader)
	for scanner.Scan() {
		line := scanner.Text()
		for _, disk := range disks {
			if strings.HasPrefix(line, disk) {
				fields := strings.Fields(line)
				thisDisk := fields[0]
				utilizationStr := fields[4]
				thisUtilization, err := strconv.ParseFloat(utilizationStr[:len(utilizationStr)-1], 64)
				if err != nil {
					fmt.Fprintf(os.Stderr, "unable to parse line: \"%s\"\n\t%s\n", line, err)
					continue
				}
				utilization[thisDisk] = thisUtilization
			}
		}
	}
	return utilization
}

func main() {
	errorCount := 0
	client := http.Client{}
	ticker := time.Tick(interval)
	for {
		start := time.Now()
		fmt.Fprintf(os.Stderr, "%s: request\n", start)
		response, err := client.Get(fmt.Sprintf("http://%s/stats", remote))
		now := time.Now()
		fmt.Fprintf(os.Stderr, "%s: response received (%s elapsed) \n", now, now.Sub(start))
		if err != nil {
			fmt.Fprintf(os.Stderr, "%s: request stats from client: %s\n", now, err)
			if errorCount++; errorCount > maxConsecutiveErrors {
				fmt.Fprintf(os.Stderr, "Too many errors.  Terminating\n")
				os.Exit(1)
			}
		} else if response.StatusCode != 200 {
			fmt.Fprintf(os.Stderr, "%s: %s", now, response.Status)
		} else {
			//io.Copy(os.Stdout, response.Body)
			utilization := Utilization(response.Body)
			//fmt.Fprintf(os.Stdout, "%v\n", status)
			for theDisk, theUtilization := range utilization {
				if theUtilization > thresholdRed {
					fmt.Fprintf(os.Stdout, "RED   ")
				} else if theUtilization > thresholdYellow {
					fmt.Fprintf(os.Stdout, "YELLOW")
				} else {
					fmt.Fprintf(os.Stdout, "      ")
				}
				fmt.Fprintf(os.Stdout, "\t%15s\t%f\n", theDisk, theUtilization)
			}
		}
		<-ticker
	}
}

The big question is, how the heck do we test this?

System Monitor, Second Try

To write something testable, we have to think about the useful abstractions contained in the monitor:

The first draft of the revised monitor program will have 3 modules and a skeleton main:

package main

import (
	"fmt"
	"os"
	"uwb-css490/monitor2/manager"
)

var (
	disks = []string{"/dev/disk0s2"}
)

func main() {
	if err := manager.Monitor(disks); err != nil {
		fmt.Fprintf(os.Stderr, "%s\n", err)
		os.Exit(1)
	}
}
package manager

import (
	"fmt"
	"os"
	"time"
	"uwb-css490/monitor2/alert"
	"uwb-css490/monitor2/collect"
)

const (
	maxConsecutiveErrors = 10
	remote               = "localhost:8080"
	//interval = 5 * time.Minute
	//interval = 10 * time.Second
	interval = 5 * time.Second
)

const (
	thresholdYellow = 90.0
	thresholdRed    = 95.0
)

var (
	alerts = make(map[string]alert.Alert)
)

func statsFromList(disks []string) map[string]float64 {
	stats := make(map[string]float64)
	for _, d := range disks {
		stats[d] = 0.0
	}
	return stats
}

func Monitor(disks []string) error {
	errorCount := 0
	ticker := time.Tick(interval)
	for {
		stats := statsFromList(disks)
		err := collect.Get(stats, remote)
		if err != nil {
			errorCount++
			if errorCount >= maxConsecutiveErrors {
				err := fmt.Errorf("Too many errors\n")
				return err
			}
		} else {
			errorCount = 0
			for disk, utilization := range stats {
				fmt.Fprintf(os.Stderr, "utilization: %s\t%f\n", disk, utilization)
				if utilization > thresholdRed {
					if thisAlert := alerts[disk]; thisAlert != nil {
						if thisAlert.Level() != alert.Red {
							thisAlert.Reset(alert.Red, fmt.Sprintf("%s: over %5.1f% (%.1f%%)",
								disk, thresholdRed, utilization))
						}
					} else {
						alerts[disk] = alert.New(alert.Red, fmt.Sprintf("%s: over %.1f%% (%.1f%%)",
							disk, thresholdRed, utilization))
					}
				} else if utilization > thresholdYellow {
					if thisAlert := alerts[disk]; thisAlert != nil {
						if thisAlert.Level() != alert.Yellow {
							thisAlert.Reset(alert.Yellow, fmt.Sprintf("%s: over %.1f%% (%.1f%%)",
								disk, thresholdYellow, utilization))
						}
					} else {
						alerts[disk] = alert.New(alert.Yellow, fmt.Sprintf("%s: over %.1f%% (%.1f%%)",
							disk, thresholdYellow, utilization))
					}
				} else {
					if thisAlert := alerts[disk]; thisAlert != nil {
						thisAlert.Reset(alert.Clear, "")
						alerts[disk] = nil
					}
				}
			}
		}
		<-ticker
	}
}
package collect

import (
	"bufio"
	"fmt"
	"io"
	"net/http"
	"os"
	"strconv"
	"strings"
	"time"
)

type Getter interface {
	Get(url string) (*http.Response, error)
}

var Client Getter = &http.Client{}

func Utilization(reader io.Reader, disks map[string]float64) error {
	scanner := bufio.NewScanner(reader)
	for scanner.Scan() {
		line := scanner.Text()
		for disk := range disks {
			if strings.HasPrefix(line, disk) {
				fields := strings.Fields(line)
				utilizationStr := fields[4]
				thisUtilization, err := strconv.ParseFloat(utilizationStr[:len(utilizationStr)-1], 64)
				if err != nil {
					return fmt.Errorf("parse error processing line: \"%s\"\n\t%s\"", line, err)
				}
				disks[disk] = thisUtilization
			}
		}
	}
	return nil
}

func Get(utilization map[string]float64, source string) error {
	request := fmt.Sprintf("http://%s/stats", source)
	start := time.Now()
	fmt.Fprintf(os.Stderr, "%s: request %s\n", start, request)
	response, err := Client.Get(request)
	now := time.Now()
	fmt.Fprintf(os.Stderr, "%s: elapsed time %s\n", now, now.Sub(start))
	if err != nil {
		fmt.Fprintf(os.Stderr, "%s: request stats from client: %s\n", now, err)
		return err
	}
	//defer response.Body.Close()
	if response.StatusCode != 200 {
		err = fmt.Errorf("received status %s", response.Status)
		fmt.Fprintf(os.Stderr, "%s\t%s\n", now, err)
		return err
	}
	err = Utilization(response.Body, utilization)
	if err != nil {
		fmt.Fprintf(os.Stderr, "%s\tparsing data: %s\n", time.Now(), err)
		return err
	}
	return nil
}
package alert

import (
	"fmt"
	"os"
)

const (
	Clear = iota
	Green
	Yellow
	Red
)

type Level int

type Alert interface {
	Level() Level
	Reset(newLevel Level, newMessage string)
	Clear()
}

var LevelName = map[Level]string{
	Clear:  "OK",
	Green:  "GREEN",
	Yellow: "YELLOW",
	Red:    "RED",
}

type alert struct {
	id      int
	level   Level
	message string
}

var lastID int

func (this *alert) print() {
	fmt.Fprintf(os.Stdout, "%s\t%d\t%s\n", LevelName[this.level], this.id, this.message)
}

func (this *alert) Level() Level {
	return this.level
}

func (this *alert) Reset(newLevel Level, newMessage string) {
	if newMessage != this.message {
		this.message = newMessage

	}
	if newLevel != this.level {
		fmt.Fprintf(os.Stdout, "%s->%s\t%d\t%s\n",
			LevelName[this.level], LevelName[newLevel],
			this.id,
			this.message)
		this.level = newLevel
	}
}

func (this *alert) Clear() {
	this.level = Clear
}

var New = func(level Level, message string) Alert {
	lastID++
	this := &alert{id: lastID, level: level, message: message}
	this.print()
	return this
}

This makes the code longer, but gives us several places we can insert useful tests.

Firstly, the code that parses the response and calculates disk utilization has no external dependencies, so it can be tested easily by supplying canned input and evaluating the response.

package collect

import (
	//"fmt"
	"strings"
	"testing"
)

func check(t *testing.T, m map[string]float64, disk string, expected float64) {
	actual, ok := m[disk]
	if !ok {
		t.Errorf("missing %s", disk)
	}
	if actual != expected {
		t.Errorf("disk %s: expected %f, got %f", disk, expected, actual)
	}
}

func TestUtilization(t *testing.T) {
	in := strings.NewReader(`
/dev/disk0s2    250G   114G   136G    46% 27795469 33273971   46%   /
/dev/disk0s3    250G   114G   136G    99% 27795469 33273971   88%   /mnt
`)
	out := map[string]float64{
		"/dev/disk0s2": 0.0,
		"/dev/disk0s3": 0.0,
		"/dev/disk0s4": 0.0,
	}
	err := Utilization(in, out)
	if err != nil {
		t.Errorf("parse error %s", err)
	}
	if len(out) != 3 {
		t.Errorf("expected 3 values, received %d", len(out))
	}
	check(t, out, "/dev/disk0s2", 46.0)
	check(t, out, "/dev/disk0s3", 99.0)
	check(t, out, "/dev/disk0s4", 0.0)
}

More interestingly, note the lines in the collect package.

type Getter interface {
	Get(url string) (*http.Response, error)
}

var Client Getter = &http.Client{}

collect.Client is a varible that holds a http.Client object, through which a "HTTP GET" request is made. Since http.Client has a Get, it automatically implements the collect.Getter interface, so the collect.Client variable may be declared as type collect.Getter.

By declaring collect.Client this way, we can inject a dependency into the program by replacing its value with a mock client in the test's init function.

To mock out the http.Client, object we also need a ReadCloser object to provide a value for the response.Body field.

Once we've mocked out the http client, we can run the test without requiring a working sysstat server!

Note that this is an incomplete test, just a demonstration of the capability.

package manager

import (
	"fmt"
	"io"
	"net/http"
	"strings"
	"testing"
	"uwb-css490/monitor2/collect"
)

type mockClient struct {
	index     int
	responses []string
}

type ReadCloser struct {
	reader io.Reader
}

var Done chan bool

func init() {
	Done = make(chan bool)
	collect.Client = &mockClient{
		responses: []string{
			"/dev/disk0s2    250G   114G   136G    46% 27795469 33273971   46%   /\n",
			"/dev/disk0s2    250G   114G   136G    80% 27795469 33273971   46%   /\n",
			"/dev/disk0s2    250G   114G   136G    92% 27795469 33273971   46%   /\n",
			"/dev/disk0s2    250G   114G   136G    96% 27795469 33273971   46%   /\n",
			"/dev/disk0s2    250G   114G   136G    92% 27795469 33273971   46%   /\n",
			"/dev/disk0s2    250G   114G   136G    80% 27795469 33273971   46%   /\n",
			"/dev/disk0s2    250G   114G   136G    90% 27795469 33273971   46%   /\n",
		},
	}
}

func (this *mockClient) Get(url string) (*http.Response, error) {
	if this.index >= len(this.responses) {
		Done <- true
		return nil, fmt.Errorf("out of responses")
	}
	response := &http.Response{
		StatusCode: 200,
		Body: &ReadCloser{
			reader: strings.NewReader(this.responses[this.index]),
		},
	}
	this.index++
	return response, nil
}

func (this *ReadCloser) Read(p []byte) (int, error) {
	return this.reader.Read(p)
}

func (this *ReadCloser) Close() error {
	return nil
}

func TestMonitor(t *testing.T) {
	go Monitor([]string{"/dev/disk0s2"})
	<-Done
}

Footnotes