CSS 390: Notes from Lecture 8 (DRAFT)

Administrivia

Quote of the day: Productive in a day, efficient in a week, expert in a year—Andrew Gerrand, "Go and the Zen of Python" (slide # 28)

Our Story So Far

Code Reviews

Code Review as a Presubmit Requirement

Goals

Live Demo

Eavesdrop on code reviews for the wikimedia project: https://gerrit.wikimedia.org/

Go

Using a Channel as a Mutex

Use a buffered channel:


// Initialize: create and fill buffered channel.
mutex := make(chan bool, 1)
mutex <-true

//...

<-mutex
// Critical section
mutex <-true

Worker Pools

Baseline program: compute vector distance. Include a random sleep to simulate an I/O-bound task. Use normal distribution with 50 ms mean and 20 ms deviation. Baseline runs in approximately 1 minute.

package main

import (
	"fmt"
	"math"
	"math/rand"
	"time"
)

const (
	avg = float64(50 * time.Millisecond)
	dev = float64(20 * time.Millisecond)

	max = 50
)

func load() {
	interval := time.Duration(rand.NormFloat64()*dev + avg)
	time.Sleep(interval)
}

func distance(x float64, y float64) float64 {
	load()
	return math.Sqrt(x*x + y*y)
}

func serial() {
	start := time.Now()
	for i := 1; i <= max; i++ {
		for j := 1; j <= i; j++ {
			x, y := float64(i), float64(j)
			d := distance(x, y)
			fmt.Printf("%9.3f\t%9.3f\t%9.3f\t\n", x, y, d)
		}
	}
	end := time.Now()
	fmt.Printf("\n\nelapsed time: %s\n", end.Sub(start))
}

func main() {
	serial()
}

Two basic approaches:

  1. keep poolsize threads running and communicate through a channel
  2. spawn off threads (goroutines) as needed, taking advantage of cheap thread creation overhead

Running 10 concurrent threads reduces the run time to about 6 seconds. The experiment may be rerun with different pool sizes. Since the threads are running in parallel, we need to set up a mechanism to communicate the results back to the main thread.

package main

import (
	"fmt"
	"math"
	"math/rand"
	"time"
)

const (
	average   = float64(50 * time.Millisecond)
	deviation = float64(20 * time.Millisecond)
	max       = 50
	poolsize  = 10
)

// load simulator
func load() {
	load := time.Duration(rand.NormFloat64()*deviation + average)
	time.Sleep(load)
}

func distance(x float64, y float64) float64 {
	load()
	return math.Sqrt(x*x + y*y)
}

type Result struct {
	X        float64
	Y        float64
	Distance float64
}

func main() {
	entries := max * (max + 1) / 2
	resultsQueue := make(chan *Result, 2*poolsize)

	limit := make(chan interface{}, poolsize)
	for i := 0; i < poolsize; i++ {
		limit <- true
	}
	startPool := time.Now()
	go func() {
		for i := 1; i <= max; i++ {
			for j := 1; j <= i; j++ {
				x, y := float64(i), float64(j)
				<-limit
				go func() {
					defer func() { limit <- true }()
					resultsQueue <- &Result{
						X:        x,
						Y:        y,
						Distance: distance(x, y),
					}
				}()
			}
		}
	}()
	for i := 0; i < entries; i++ {
		r := <-resultsQueue
		fmt.Printf("%8.4f\t%8.4f\t%8.4f\n", r.X, r.Y, r.Distance)

	}
	endPool := time.Now()

	fmt.Println()

	fmt.Printf("elapsed time workpool: %s\n", endPool.Sub(startPool))
}

Refactoring to make the code easier to read:

package main

import (
	"fmt"
	"math"
	"math/rand"
	"time"
)

const (
	average   = float64(50 * time.Millisecond)
	deviation = float64(20 * time.Millisecond)
	max       = 50
	poolsize  = 10
)

// load simulator
func load() {
	load := time.Duration(rand.NormFloat64()*deviation + average)
	time.Sleep(load)
}

func distance(x float64, y float64) float64 {
	load()
	return math.Sqrt(x*x + y*y)
}

type Result struct {
	X        float64
	Y        float64
	Distance float64
}

// worker calculates the distance and sends both the input parameter
// and the result to the results channel.
func worker(x, y float64, limit chan interface{}, results chan *Result) {
	defer func() { limit <- true }()
	results <- &Result{
		X:        x,
		Y:        y,
		Distance: distance(x, y),
	}
}

func poolManager(limit chan interface{}, results chan *Result) {
	for i := 1; i <= max; i++ {
		for j := 1; j <= i; j++ {
			x, y := float64(i), float64(j)
			// Pull one out of the limit channel.  If this
			// blocks, there are poolsize goroutines
			// currently in flight.  The worker is
			// repsonsible for putting an entry back into
			// the limit channel.
			<-limit
			go worker(x, y, limit, results)
		}
	}
}

func main() {
	entries := max * (max + 1) / 2

	// Get results back.  Should be faster than the worker thread,
	// but buffer it anyway to avoid waiting unnecessarily.
	results := make(chan *Result, 2*poolsize)

	// Create & fill a channel with buffer of poolsize.  A new
	// thread will be spawned only when the pool isn't empty.
	limit := make(chan interface{}, poolsize)
	for i := 0; i < poolsize; i++ {
		limit <- true
	}

	startPool := time.Now()

	// Need to run the pool manager in a goroutine so the results
	// channel doesn't block.
	go poolManager(limit, results)

	// Get the results and print.  Included within the timed range
	// for consistency with the serial timing.
	for i := 0; i < entries; i++ {
		r := <-results
		fmt.Printf("%8.4f\t%8.4f\t%8.4f\n", r.X, r.Y, r.Distance)

	}
	endPool := time.Now()

	fmt.Println()

	fmt.Printf("elapsed time workpool: %s\n", endPool.Sub(startPool))
}