diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d90fa73 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.pyc +dist/ +*.egg-info/ +*.o +*.so +test diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..a2d3ad2 --- /dev/null +++ b/README.rst @@ -0,0 +1,29 @@ +mergesort +========= + +Academic example of merge sort implementation in C. +Implementation includes one process version and parallel +implementation using limited fork()s and example python +bindings using ctypes. + +Contents +======== + +ms.h, ms.c - merge sort implementation +main.c - sample test of using merge sort implementation with time measurement +pyms.py - sample python bindings using ctypes +test.py - sample test of using pyms + +Compile shared library with + +.. code:: bash + + cc -c -Wall -Werror -fpic ms.c + cc -shared -o libms.so ms.o + +and sample C program (set LD_LIBRARY_PATH before running it) + +.. code:: bash + + cc -Wall -o test ms.c main.c + diff --git a/main.c b/main.c new file mode 100644 index 0000000..5c2f2a9 --- /dev/null +++ b/main.c @@ -0,0 +1,117 @@ +#include "ms.h" +#include +#include +#include +#include +#include + +struct timespec timespecdiff(struct timespec start, struct timespec end) +{ + struct timespec temp; + if ((end.tv_nsec - start.tv_nsec) < 0) { + temp.tv_sec = end.tv_sec - start.tv_sec - 1; + temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec; + } else { + temp.tv_sec = end.tv_sec - start.tv_sec; + temp.tv_nsec = end.tv_nsec - start.tv_nsec; + } + + return temp; +} + +void shuffle(int foo[], size_t len) +{ + int i; + int x; + int t; + + for (i = 0; i < len; ++i) { + x = random() % len; + t = foo[x]; + foo[x] = foo[i]; + foo[i] = t; + } +} + +int check_arr(int foo[], size_t len) +{ + int i; + + for (i = 0; i < len; ++i) { + if (foo[i] != i) { + return 0; + } + } + return 1; +} + +int main(int argc, char **argv) +{ + int i; + int *foo; + int len; + struct timespec start, end; + struct timespec diff; + long diffms; + int x; + int forklimitsz; + + len = 50000001; + forklimitsz = 3; + foo = (int *) malloc(sizeof(int *) * len); + if (foo == NULL) { + fprintf(stderr, "Not enough memory\n"); + exit(1); + } + for (i = 0; i < len; ++i) { + foo[i] = i; + } + + srandom(time(NULL)); + printf("arr len: %d\n", len); + for (x = 0; x < 2; ++x) { + if (x == 0) { + printf("linear\n"); + } else { + printf("parallel\n"); + } + shuffle(foo, len); + + if (len <= 100) { + printf("sorting arr: \n"); + for (i = 0; i < len; ++i) { + printf(" %d", foo[i]); + } + printf("\n"); + } + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); + if (x == 0) { + mymergesort(foo, len); + } else { + if (mymergesortp(foo, len, forklimitsz) < 0) { + perror("mergesortp"); + } + } + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end); + if (len <= 100) { + printf("sorted arr: \n"); + for (i = 0; i < len; ++i) { + printf(" %d", foo[i]); + } + printf("\n"); + } + if (check_arr(foo, len) == 0) { + printf("ERROR in sort\n"); + } else { + printf("sort OK\n"); + diff = timespecdiff(start, end); + diffms = diff.tv_sec * 1000 + diff.tv_nsec / 1000000; + printf("duration: %lds %ldns\n\t%ldms\n", diff.tv_sec, diff.tv_nsec, diffms); + } + } + + free(foo); + + return 0; +} + diff --git a/ms.c b/ms.c new file mode 100644 index 0000000..b33be5f --- /dev/null +++ b/ms.c @@ -0,0 +1,147 @@ +#include "ms.h" + +void mymerge(int left[], size_t leftlen, int right[], size_t rightlen, int res[]) +{ + size_t i; + size_t j; + size_t k; + + for (i = 0, j = 0, k = 0; i < leftlen && j < rightlen;) { + if (left[i] <= right[j]) { + res[k++] = left[i++]; + } else { + res[k++] = right[j++]; + } + } + for (; i < leftlen; ++i) { + res[k++] = left[i]; + } + for (; j < rightlen; ++j) { + res[k++] = right[j]; + } +} + +void _mergesort(int arr[], size_t len, int res[]) +{ + size_t mid; + size_t leftlen; + size_t rightlen; + int foo; + + if (len == 2) { + if (arr[1] < arr[0]) { + foo = arr[1]; + arr[1] = arr[0]; + arr[0] = foo; + } + return; + } else if (len < 2) { + return; + } + + mid = (len - 1) / 2; + leftlen = mid + 1; + rightlen = len - mid - 1; + _mergesort(arr, leftlen, res); + _mergesort(&arr[mid + 1], rightlen, &res[mid + 1]); + mymerge(arr, leftlen, &arr[mid + 1], rightlen, res); + memcpy(arr, res, len * sizeof(int)); +} + +int mymergesort(int arr[], size_t len) +{ + int *res = (int *)malloc(sizeof(int *) * len); + if (res == NULL) { + return -1; + } + _mergesort(arr, len, res); + free(res); + return 0; +} + +int _mergesortp(int arr[], size_t len, int res[], int forklimitsz) +{ + size_t mid; + size_t leftlen; + size_t rightlen; + int foo; + pid_t pid, wpid; + int status; + + if (len == 2) { + if (arr[1] < arr[0]) { + foo = arr[1]; + arr[1] = arr[0]; + arr[0] = foo; + } + return 0; + } else if (len < 2) { + return 0; + } + + mid = (len - 1) / 2; + leftlen = mid + 1; + rightlen = len - mid - 1; + + if (len <= forklimitsz) { + _mergesort(arr, leftlen, res); + _mergesort(&arr[mid + 1], rightlen, &res[mid + 1]); + mymerge(arr, leftlen, &arr[mid + 1], rightlen, res); + memcpy(arr, res, len * sizeof(int)); + + return 0; + } + + pid = fork(); + if (pid < 0) { + return -1; + } + if (pid == 0) { /* child */ + foo = _mergesortp(&arr[mid + 1], rightlen, &res[mid + 1], forklimitsz); + if (foo < 0) { + exit(1); + } + exit(0); + } else { /* parent */ + foo = _mergesortp(arr, leftlen, res, forklimitsz); + if (foo < 0) { + return foo; + } + /* wait child and then merge */ + if ((wpid = waitpid(pid, &status, 0)) != pid) { + return -1; + } + if (status != 0) { + return status; + } + mymerge(arr, leftlen, &arr[mid + 1], rightlen, res); + memcpy(arr, res, len * sizeof(int)); + return 0; + } +} + +int mymergesortp(int arr[], size_t len, int cnt) +{ + int foo; + int forklimitsz; + size_t size = sizeof(int) * len; + int *arrp = mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON, -1, 0); + if (arrp == MAP_FAILED) { + return -1; + } + int *resp = mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON, -1, 0); + if (resp == MAP_FAILED) { + munmap(arrp, size); + return -1; + } + memcpy(arrp, arr, size); + forklimitsz = len / cnt; + foo = _mergesortp(arrp, len, resp, forklimitsz); + memcpy(arr, arrp, size); + munmap(arrp, size); + munmap(resp, size); + return foo; +} + diff --git a/ms.h b/ms.h new file mode 100644 index 0000000..7e2f366 --- /dev/null +++ b/ms.h @@ -0,0 +1,17 @@ +#ifndef __MS_H__ +#define __MS_H__ 1 + +#include +#include +#include +#include +#include +#include + +void mymerge(int left[], size_t leftlen, int right[], size_t rightlen, int res[]); + +int mymergesort(int arr[], size_t len); + +int mymergesortp(int arr[], size_t len, int cnt); + +#endif diff --git a/pyms.py b/pyms.py new file mode 100644 index 0000000..3c05ac5 --- /dev/null +++ b/pyms.py @@ -0,0 +1,27 @@ +import ctypes +import sys + + +if sys.platform.startswith('win'): + _mslib = ctypes.CDLL('libms.dll') +else: + _mslib = ctypes.CDLL('libms.so') + + +def mergesort(arr): + size = len(arr) + IntArr = ctypes.c_int * size + x = IntArr(*arr) + foo = _mslib.mymergesort(x, size) + del arr[:] + arr.extend(x) + return foo + +def mergesortp(arr, cnt): + size = len(arr) + IntArr = ctypes.c_int * size + x = IntArr(*arr) + foo = _mslib.mymergesortp(x, size, cnt) + del arr[:] + arr.extend(x) + return foo diff --git a/test.py b/test.py new file mode 100644 index 0000000..6abed77 --- /dev/null +++ b/test.py @@ -0,0 +1,42 @@ +from __future__ import print_function +import pyms +import random +import time + +len = 50000001 +forkdiv = 3 +print("len: {}".format(len)) +foo = [x for x in range(len)] +random.shuffle(foo) +print("linear") +start = time.time() +ret = pyms.mergesort(foo) +end = time.time() +if ret < 0: + print("error: {}".format(ret)); +else: + for i, e in enumerate(foo): + if i != e: + print("ERROR in sort") + break + else: + print("sort OK") + diff = end - start + print("duration: {}".format(diff)) + +random.shuffle(foo) +print("parallel") +start = time.time() +ret = pyms.mergesortp(foo, forkdiv) +end = time.time() +if ret < 0: + print("error: {}".format(ret)); +else: + for i, e in enumerate(foo): + if i != e: + print("ERROR in sort") + break + else: + print("sort OK") + diff = end - start + print("duration: {}".format(diff))