Quote of the day: Productive in a day, efficient in a week, expert in a year—Andrew Gerrand, "Go and the Zen of Python" (slide # 28)
report
(the output) to
reporter
(the thing producing the output)
Eavesdrop on code reviews for the wikimedia project: https://gerrit.wikimedia.org/
Use a buffered channel:
// Initialize: create and fill buffered channel.
mutex := make(chan bool, 1)
mutex <-true
//...
<-mutex
// Critical section
mutex <-true
Baseline program: compute vector distance. Include a random sleep to simulate an I/O-bound task. Use normal distribution with 50 ms mean and 20 ms deviation. Baseline runs in approximately 1 minute.
package main
import (
"fmt"
"math"
"math/rand"
"time"
)
const (
avg = float64(50 * time.Millisecond)
dev = float64(20 * time.Millisecond)
max = 50
)
func load() {
interval := time.Duration(rand.NormFloat64()*dev + avg)
time.Sleep(interval)
}
func distance(x float64, y float64) float64 {
load()
return math.Sqrt(x*x + y*y)
}
func serial() {
start := time.Now()
for i := 1; i <= max; i++ {
for j := 1; j <= i; j++ {
x, y := float64(i), float64(j)
d := distance(x, y)
fmt.Printf("%9.3f\t%9.3f\t%9.3f\t\n", x, y, d)
}
}
end := time.Now()
fmt.Printf("\n\nelapsed time: %s\n", end.Sub(start))
}
func main() {
serial()
}
Two basic approaches:
poolsize
threads running and communicate through a channel
Running 10 concurrent threads reduces the run time to about 6 seconds. The experiment may be rerun with different pool sizes. Since the threads are running in parallel, we need to set up a mechanism to communicate the results back to the main thread.
package main
import (
"fmt"
"math"
"math/rand"
"time"
)
const (
average = float64(50 * time.Millisecond)
deviation = float64(20 * time.Millisecond)
max = 50
poolsize = 10
)
// load simulator
func load() {
load := time.Duration(rand.NormFloat64()*deviation + average)
time.Sleep(load)
}
func distance(x float64, y float64) float64 {
load()
return math.Sqrt(x*x + y*y)
}
type Result struct {
X float64
Y float64
Distance float64
}
func main() {
entries := max * (max + 1) / 2
resultsQueue := make(chan *Result, 2*poolsize)
limit := make(chan interface{}, poolsize)
for i := 0; i < poolsize; i++ {
limit <- true
}
startPool := time.Now()
go func() {
for i := 1; i <= max; i++ {
for j := 1; j <= i; j++ {
x, y := float64(i), float64(j)
<-limit
go func() {
defer func() { limit <- true }()
resultsQueue <- &Result{
X: x,
Y: y,
Distance: distance(x, y),
}
}()
}
}
}()
for i := 0; i < entries; i++ {
r := <-resultsQueue
fmt.Printf("%8.4f\t%8.4f\t%8.4f\n", r.X, r.Y, r.Distance)
}
endPool := time.Now()
fmt.Println()
fmt.Printf("elapsed time workpool: %s\n", endPool.Sub(startPool))
}
Refactoring to make the code easier to read:
package main
import (
"fmt"
"math"
"math/rand"
"time"
)
const (
average = float64(50 * time.Millisecond)
deviation = float64(20 * time.Millisecond)
max = 50
poolsize = 10
)
// load simulator
func load() {
load := time.Duration(rand.NormFloat64()*deviation + average)
time.Sleep(load)
}
func distance(x float64, y float64) float64 {
load()
return math.Sqrt(x*x + y*y)
}
type Result struct {
X float64
Y float64
Distance float64
}
// worker calculates the distance and sends both the input parameter
// and the result to the results channel.
func worker(x, y float64, limit chan interface{}, results chan *Result) {
defer func() { limit <- true }()
results <- &Result{
X: x,
Y: y,
Distance: distance(x, y),
}
}
func poolManager(limit chan interface{}, results chan *Result) {
for i := 1; i <= max; i++ {
for j := 1; j <= i; j++ {
x, y := float64(i), float64(j)
// Pull one out of the limit channel. If this
// blocks, there are poolsize goroutines
// currently in flight. The worker is
// repsonsible for putting an entry back into
// the limit channel.
<-limit
go worker(x, y, limit, results)
}
}
}
func main() {
entries := max * (max + 1) / 2
// Get results back. Should be faster than the worker thread,
// but buffer it anyway to avoid waiting unnecessarily.
results := make(chan *Result, 2*poolsize)
// Create & fill a channel with buffer of poolsize. A new
// thread will be spawned only when the pool isn't empty.
limit := make(chan interface{}, poolsize)
for i := 0; i < poolsize; i++ {
limit <- true
}
startPool := time.Now()
// Need to run the pool manager in a goroutine so the results
// channel doesn't block.
go poolManager(limit, results)
// Get the results and print. Included within the timed range
// for consistency with the serial timing.
for i := 0; i < entries; i++ {
r := <-results
fmt.Printf("%8.4f\t%8.4f\t%8.4f\n", r.X, r.Y, r.Distance)
}
endPool := time.Now()
fmt.Println()
fmt.Printf("elapsed time workpool: %s\n", endPool.Sub(startPool))
}