import os
import time
import argparse
import multiprocessing
import numpy as np
import matplotlib.pyplot as plt
from cffi import FFI
from multiprocessing import Pool
from joblib import Parallel, delayed
from PIL import Image

parser = argparse.ArgumentParser(description='Rust vs. Python Image Cropping Bench')
parser.add_argument('--batch-size', type=int, default=10,
                    help="batch-size (default: 10)")
parser.add_argument('--use-vips', type=bool, default=False,
                    help="use VIPS instead of PIL-SIMD (default: False)")
parser.add_argument('--use-grayscale', type=bool, default=False,
                    help="use grayscale images (default: False)")
parser.add_argument('--use-threading', type=bool, default=False,
                    help="use threading instead of multiprocessing (default: False)")
parser.add_argument('--num-trials', type=int, default=10,
                    help="number of trials to average over (default: 10)")
args = parser.parse_args()

if args.use_vips:
    import pyvips

def find(name, path):
    for root, dirs, files in os.walk(path):
        if name in files:
            return os.path.join(root, name)


def generate_scale_x_y(batch_size):
    scale = np.random.rand(batch_size).astype(np.float32)
    x = np.random.rand(batch_size).astype(np.float32)
    y = np.random.rand(batch_size).astype(np.float32)
    return scale, x, y

def rust_crop_bench(ffi, lib, path_list, chans, scale, x, y, window_size, max_img_percentage):
    path_keepalive = [ffi.new("char[]", p) for p in path_list]
    batch_size = len(path_list)
    crops = np.zeros(chans*window_size*window_size*batch_size, dtype=np.uint8)
    lib.parallel_crop_and_resize(ffi.new("char* []", path_keepalive),
                                 ffi.cast("uint8_t*", ffi.cast("uint8_t*", np.ascontiguousarray(crops).ctypes.data)), # resultant crops
                                 ffi.cast("float*", ffi.cast("float*", np.ascontiguousarray(scale).ctypes.data)), # scale
                                 ffi.cast("float*", ffi.cast("float*", np.ascontiguousarray(x).ctypes.data)),     # x
                                 ffi.cast("float*", ffi.cast("float*", np.ascontiguousarray(y).ctypes.data)),     # y
                                 window_size,
                                 chans,
                                 max_img_percentage,
                                 batch_size)
    crops = crops.reshape([batch_size, window_size, window_size, chans])
    # plt.imshow(crops[np.random.randint(batch_size)].squeeze()); plt.show()
    return crops


class CropLambda(object):
    """Returns a lambda that crops to a region.

    Args:
        window_size: the resized return image [not related to img_percentage].
        max_img_percentage: the maximum percentage of the image to use for the crop.
    """

    def __init__(self, path, window_size, max_img_percentage=0.15):
        self.path = path
        self.window_size = window_size
        self.max_img_percent = max_img_percentage

    def scale(self, val, newmin, newmax):
        return (((val) * (newmax - newmin)) / (1.0)) + newmin

    def __call__(self, crop):
        if args.use_vips:
            return self.__call_pyvips__(crop)

        return self.__call_PIL__(crop)

    def __call_PIL__(self, crop):
        ''' converts [crop_center, x, y] to a 4-tuple
            defining the left, upper, right, and lower
            pixel coordinate and return a lambda '''
        with open(self.path, 'rb') as f:
            with Image.open(f) as img:
                img_size = np.array(img.size) # numpy-ize the img size (tuple)

                # scale the (x, y) co-ordinates to the size of the image
                assert crop[1] >= 0 and crop[1] <= 1, "x needs to be \in [0, 1]"
                assert crop[2] >= 0 and crop[2] <= 1, "y needs to be \in [0, 1]"
                x, y = [int(self.scale(crop[1], 0, img_size[0])),
                        int(self.scale(crop[2], 0, img_size[1]))]

                # calculate the scale of the true crop using the provided scale
                # Note: this is different from the return size, i.e. window_size
                crop_scale = min(crop[0], self.max_img_percent)
                crop_size = np.floor(img_size * crop_scale).astype(int) - 1

                # bound the (x, t) co-ordinates to be plausible
                # i.e < img_size - crop_size
                max_coords = img_size - crop_size
                x, y = min(x, max_coords[0]), min(y, max_coords[1])

                # crop the actual image and then upsample it to window_size
                # resample = 2 is a BILINEAR transform, avoid importing PIL for enum
                # TODO: maybe also try 1 = ANTIALIAS = LANCZOS
                crop_img = img.crop((x, y, x + crop_size[0], y + crop_size[1]))
                return crop_img.resize((self.window_size, self.window_size), resample=2)

    def __call_pyvips__(self, crop):
        ''' converts [crop_center, x, y] to a 4-tuple
            defining the left, upper, right, and lower
            pixel coordinate and return a lambda '''
        img = pyvips.Image.new_from_file(self.path, access='sequential')
        img_size = np.array([img.height, img.width]) # numpy-ize the img size (tuple)

        # scale the (x, y) co-ordinates to the size of the image
        assert crop[1] >= 0 and crop[1] <= 1, "x needs to be \in [0, 1]"
        assert crop[2] >= 0 and crop[2] <= 1, "y needs to be \in [0, 1]"
        x, y = [int(self.scale(crop[1], 0, img_size[0])),
                int(self.scale(crop[2], 0, img_size[1]))]

        # calculate the scale of the true crop using the provided scale
        # Note: this is different from the return size, i.e. window_size
        crop_scale = min(crop[0], self.max_img_percent)
        crop_size = np.floor(img_size * crop_scale).astype(int) - 1

        # bound the (x, t) co-ordinates to be plausible
        # i.e < img_size - crop_size
        max_coords = img_size - crop_size
        x, y = min(x, max_coords[0]), min(y, max_coords[1])

        # crop the actual image and then upsample it to window_size
        # resample = 2 is a BILINEAR transform, avoid importing PIL for enum
        # TODO: maybe also try 1 = ANTIALIAS = LANCZOS
        crop_img = img.crop(x, y, crop_size[0], crop_size[1])
        #return crop_img.resize((self.window_size, self.window_size), resample=2)
        return np.array(crop_img.resize(self.window_size / crop_img.width,
                                        vscale=self.window_size / crop_img.height).write_to_memory())

class CropLambdaPool(object):
    def __init__(self, num_workers=8):
        self.num_workers = num_workers
        self.backend = 'threading' if args.use_threading else 'loky'

    def _apply(self, lbda, z_i):
        return lbda(z_i)

    def __call__(self, list_of_lambdas, z_vec):
        # with Pool(self.num_workers) as pool:
        #     return pool.starmap(self._apply, zip(list_of_lambdas, z_vec))
        return Parallel(n_jobs=len(list_of_lambdas), backend=self.backend)(
            delayed(self._apply)(list_of_lambdas[i], z_vec[i]) for i in range(len(list_of_lambdas)))

def python_crop_bench(paths, scale, x, y, window_size, max_img_percentage):
    crop_lbdas = [CropLambda(p, window_size, max_img_percentage) for p in paths]
    z = np.hstack([np.expand_dims(scale, 1), np.expand_dims(x, 1), np.expand_dims(y, 1)])
    #return CropLambdaPool(num_workers=multiprocessing.cpu_count())(crop_lbdas, z)
    return CropLambdaPool(num_workers=32)(crop_lbdas, z)

def create_and_set_ffi():
    ffi = FFI()
    ffi.cdef("""
    typedef struct  {
        void* data;
        size_t len;
    } array_t;

    void parallel_crop_and_resize(char**, uint8_t*, float*, float*, float*, uint32_t, uint32_t, float, size_t);

    """);

    lib = ffi.dlopen(find("libparallel_image_crop.so", ".."))
    return lib, ffi


if __name__ == "__main__":
    lena = find("lena_gray.png", "..") if args.use_grayscale else find("lena.png", "..")
    path_list = [lena for _ in range(args.batch_size)]
    for i in range(len(path_list)):  # convert to ascii for ffi
        path_list[i] = path_list[i].encode('ascii')

    scale, x, y = generate_scale_x_y(len(path_list))

    # bench python
    python_time = []
    for i in range(args.num_trials):
        start_time = time.time()
        python_crop_bench(path_list, scale, x, y, 32, 0.25)
        python_time.append(time.time() - start_time)

    print("python crop average over {} trials : {} +/- {} sec".format(
        args.num_trials, np.mean(python_time), np.std(python_time)))

    # bench rust lib
    rust_time = []
    lib, ffi = create_and_set_ffi()
    chans = 3
    for i in range(args.num_trials):
        start_time = time.time()
        rust_crop_bench(ffi, lib, path_list, chans, scale, x, y, 32, 0.25)
        rust_time.append(time.time() - start_time)

    print("rust crop average over {} trials : {} +/- {} sec".format(
        args.num_trials, np.mean(rust_time), np.std(rust_time)))