Basic data structure and algorithm merging and sorting of manual golang

origin

Recently, I read < < my first algorithm book > > ([Japan] Ishida Baohui; Miyazaki Xiuyi) this series of notes is intended to be practiced by golang

Merge sort

The merge sort algorithm will divide the sequence into two subsequences with the same length,
When it is impossible to further divide (that is, there is only one data in each subsequence),
Merge the subsequences.

Merging refers to merging two ordered subsequences into an ordered sequence.
This operation will be repeated until all subsequences are merged into a whole.

The total running time is O(nlogn),This is the same as the heap sort mentioned earlier.

From <<My first algorithm book>> [[Japan] Ishida Baohui; Miyazaki Xiuyi

technological process

  1. Given array to be sorted data[N]
  2. Create buffer[N]
  3. Set the initial merge step (length of sorted subsequence), span=1, that is, treat each element as a sorted subsequence
  4. Merge all sorted subsequences into the buffer according to the current step size
    1. Set q1 as the header subscript of subsequence 1, q2 as the header subscript of subsequence 2, and p points to the corresponding area of the buffer
    2. Compare the value size of q1 and q2 positions
    3. If the value of q1 is small, take the value of q1, put it into p, and then add 1 to both q1 and p
    4. If the value of q2 is small, take the value of q2, put it into p, and then add 1 to both q2 and p
    5. If the subsequence has been fetched, copy the subsequence that has not been fetched to p
  5. Exchange pointers between data and buffer, treat data as buffer and buffer as data to be merged
  6. span = span*2
  7. Repeat steps 3-6 until span > n

Design

  • ISorter: defines the sorter interface Define the value comparison function to be compatible with any value type, and adjust the comparison function to sort in reverse order
  • tMergeSort: merge sorter, which implements the ISorter interface

unit testing

merge_sort_test.go. Merging and sorting is relatively fast, so the test scale is set to 100000 elements

package sorting

import (
	"fmt"
	"learning/gooop/sorting"
	"learning/gooop/sorting/merge_sort"
	"math/rand"
	"testing"
	"time"
)

func Test_MergeSort(t *testing.T) {
	fnAssertTrue := func(b bool, msg string) {
		if !b {
			t.Fatal(msg)
		}
	}

	reversed := false
	fnCompare := func(a interface{}, b interface{}) sorting.CompareResult {
		i1 := a.(int)
		i2 := b.(int)

		if i1 < i2 {
			if reversed {
				return sorting.GREATER
			} else {
				return sorting.LESS
			}
		} else if i1 == i2 {
			return sorting.EQUAL
		} else {
			if reversed {
				return sorting.LESS
			} else {
				return sorting.GREATER
			}
		}
	}

	fnTestSorter := func(sorter sorting.ISorter) {
		reversed = false

		// test simple array
		samples := []interface{} { 2,3,1,5,4,7,6 }
		samples = sorter.Sort(samples, fnCompare)
		fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 2 3 4 5 6 7]",  "expecting 1,2,3,4,5,6,7")
		t.Log("pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]")

		// test 10000 items sorting
		rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
		for plus := 0;plus < 3;plus++ {
			sampleCount := 100 * 1000 + plus
			t.Logf("prepare large array with %v items", sampleCount)
			samples = make([]interface{}, sampleCount)
			for i := 0; i < sampleCount; i++ {
				samples[i] = rnd.Intn(sampleCount * 10)
			}

			t.Logf("sorting large array with %v items", sampleCount)
			t0 := time.Now().UnixNano()
			samples = sorter.Sort(samples, fnCompare)
			cost := time.Now().UnixNano() - t0
			for i := 1; i < sampleCount; i++ {
				fnAssertTrue(fnCompare(samples[i-1], samples[i]) != sorting.GREATER, "expecting <=")
			}
			t.Logf("end sorting large array, cost = %v ms", cost/1000000)
		}

		// test 0-20
		sampleCount := 20
		t.Log("sorting 0-20")
		samples = make([]interface{}, sampleCount)
		for i := 0;i < sampleCount;i++ {
			for {
				p := rnd.Intn(sampleCount)
				if samples[p] == nil {
					samples[p] = i
					break
				}
			}
		}
		t.Logf("unsort = %v", samples)

		samples = sorter.Sort(samples, fnCompare)
		t.Logf("sorted = %v", samples)
		fnAssertTrue(fmt.Sprintf("%v", samples) == "[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]", "expecting 0-20")
		t.Log("pass sorting 0-20")

		// test special
		samples = []interface{} {}
		samples = sorter.Sort(samples, fnCompare)
		t.Log("pass sorting []")

		samples = []interface{} { 1 }
		samples = sorter.Sort(samples, fnCompare)
		t.Log("pass sorting [1]")

		samples = []interface{} { 3,1 }
		samples = sorter.Sort(samples, fnCompare)
		fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 3]",  "expecting 1,3")
		t.Log("pass sorting [1 3]")

		reversed = true
		samples = []interface{} { 2, 3,1 }
		samples = sorter.Sort(samples, fnCompare)
		fnAssertTrue(fmt.Sprintf("%v", samples) == "[3 2 1]",  "expecting 3,2,1")
		t.Log("pass sorting [3 2 1]")
	}

	t.Log("\ntesting MergeSort")
	fnTestSorter(merge_sort.MergeSort)
}

Test output

  • Merge sort is quite fast, about twice as fast as heap sort
  • Compared with bubbling, selection and insertion, it has exponential improvement, which is in line with the theoretical analysis
  • The cost is consistent with the heap sorting. Additional space of size N needs to be allocated as the merge buffer
  • The main reason why it is faster than heap sorting is that the data structure of merge sorting is simpler and does not need to create a large number of specific structures
$ go test -v merge_sort_test.go 
=== RUN   Test_MergeSort
    merge_sort_test.go:111: 
        testing MergeSort
    merge_sort_test.go:48: pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]
    merge_sort_test.go:54: prepare large array with 100000 items
    merge_sort_test.go:60: sorting large array with 100000 items
    merge_sort_test.go:67: end sorting large array, cost = 35 ms
    merge_sort_test.go:54: prepare large array with 100001 items
    merge_sort_test.go:60: sorting large array with 100001 items
    merge_sort_test.go:67: end sorting large array, cost = 36 ms
    merge_sort_test.go:54: prepare large array with 100002 items
    merge_sort_test.go:60: sorting large array with 100002 items
    merge_sort_test.go:67: end sorting large array, cost = 33 ms
    merge_sort_test.go:72: sorting 0-20
    merge_sort_test.go:83: unsort = [6 10 8 9 14 1 12 4 19 7 11 16 15 17 0 2 18 3 5 13]
    merge_sort_test.go:86: sorted = [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
    merge_sort_test.go:88: pass sorting 0-20
    merge_sort_test.go:93: pass sorting []
    merge_sort_test.go:97: pass sorting [1]
    merge_sort_test.go:102: pass sorting [1 3]
    merge_sort_test.go:108: pass sorting [3 2 1]
--- PASS: Test_MergeSort (0.12s)
PASS
ok      command-line-arguments  0.127s

ISorter.go

Defines the sorter interface Define the value comparison function to be compatible with any value type, and adjust the comparison function to sort in reverse order

package sorting

type ISorter interface {
	Sort(data []interface{}, comparator CompareFunction) []interface{}
}

type CompareFunction func(a interface{}, b interface{}) CompareResult

type CompareResult int
const LESS CompareResult = -1
const EQUAL CompareResult = 0
const GREATER CompareResult = 1

tMergeSort.go

Merge sorter to realize the ISorter interface

package merge_sort

import (
	"learning/gooop/sorting"
)

type tMergeSort struct {}

func newMergeSort() sorting.ISorter {
	return &tMergeSort{}
}

func (me *tMergeSort) Sort(data []interface{}, comparator sorting.CompareFunction) []interface{} {
	if data == nil {
		return nil
	}

	size := len(data)
	if size <= 1 {
		return data
	}

	var result []interface{} = nil
	buffer := make([]interface{}, size)
	for span := 1; span <= size;span *= 2 {
		for i := 0;i < size;i += span * 2 {
			merge(size, data, i, i + span, span, buffer, i, comparator)
		}

		result = buffer
		data, buffer = buffer, data
	}

	if result == nil {
		result = data
	}

	return result
}

// Merge the two subsequences in the data array: [Q1: Q1 + span], [Q2: Q2 + span) to the offset position of the target array result
func merge(size int, data []interface{}, q1 int, q2 int, span int, result []interface{}, offset int, comparator sorting.CompareFunction) {
	e1 := min(q1 + span, size)
	e2 := min(q2 + span, size)
	j := -1
	k := -1

	for i := 0;i < span*2;i++ {
		if q1 >= e1 {
			j = q2
			k = e2

		} else if q2 >= e2 {
			j = q1
			k = e1
		}

		if j >= 0 {
			for p := j;p < k;p++ {
				result[offset] = data[p]
				offset++
			}
			break
		}

		v1 := data[q1]
		v2 := data[q2]

		if lessEqual(v1, v2, comparator) {
			result[offset] = v1
			q1++
		} else {
			result[offset] = v2
			q2++
		}
		offset++
	}
}

func lessEqual(v1 interface{}, v2 interface{}, comparator sorting.CompareFunction) bool {
	return comparator(v1, v2) != sorting.GREATER
}

func min(a,b int) int {
	if a <= b {
		return a
	} else {
		return b
	}
}

var MergeSort = newMergeSort()

(end)

Tags: Go

Posted by beamerrox on Sat, 16 Apr 2022 06:19:48 +0930