/* * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include "mlx5.h" /* Only ia64 requires this */ #ifdef __ia64__ #define MLX5_SHM_ADDR ((void *)0x8000000000000000UL) #define MLX5_SHMAT_FLAGS (SHM_RND) #else #define MLX5_SHM_ADDR NULL #define MLX5_SHMAT_FLAGS 0 #endif #ifndef HPAGE_SIZE #define HPAGE_SIZE (2UL * 1024 * 1024) #endif #define MLX5_SHM_LENGTH HPAGE_SIZE #define MLX5_Q_CHUNK_SIZE 32768 static void free_huge_mem(struct mlx5_hugetlb_mem *hmem) { if (hmem->bitmap) free(hmem->bitmap); if (shmdt(hmem->shmaddr) == -1) mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); shmctl(hmem->shmid, IPC_RMID, NULL); free(hmem); } static struct mlx5_hugetlb_mem *alloc_huge_mem(size_t size) { struct mlx5_hugetlb_mem *hmem; size_t shm_len; hmem = malloc(sizeof(*hmem)); if (!hmem) return NULL; shm_len = align(size, MLX5_SHM_LENGTH); hmem->shmid = shmget(IPC_PRIVATE, shm_len, SHM_HUGETLB | SHM_R | SHM_W); if (hmem->shmid == -1) { mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); goto out_free; } hmem->shmaddr = shmat(hmem->shmid, MLX5_SHM_ADDR, MLX5_SHMAT_FLAGS); if (hmem->shmaddr == (void *)-1) { mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); goto out_rmid; } hmem->bitmap = bitmap_alloc0(shm_len / MLX5_Q_CHUNK_SIZE); if (!hmem->bitmap) { mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); goto out_shmdt; } hmem->bmp_size = shm_len / MLX5_Q_CHUNK_SIZE; /* * Marked to be destroyed when process detaches from shmget segment */ shmctl(hmem->shmid, IPC_RMID, NULL); return hmem; out_shmdt: if (shmdt(hmem->shmaddr) == -1) mlx5_dbg(stderr, MLX5_DBG_CONTIG, "%s\n", strerror(errno)); out_rmid: shmctl(hmem->shmid, IPC_RMID, NULL); out_free: free(hmem); return NULL; } static int alloc_huge_buf(struct mlx5_context *mctx, struct mlx5_buf *buf, size_t size, int page_size) { int found = 0; int nchunk; struct mlx5_hugetlb_mem *hmem; int ret; buf->length = align(size, MLX5_Q_CHUNK_SIZE); nchunk = buf->length / MLX5_Q_CHUNK_SIZE; if (!nchunk) return 0; mlx5_spin_lock(&mctx->hugetlb_lock); list_for_each(&mctx->hugetlb_list, hmem, entry) { if (!bitmap_full(hmem->bitmap, hmem->bmp_size)) { buf->base = bitmap_find_free_region(hmem->bitmap, hmem->bmp_size, nchunk); if (buf->base != hmem->bmp_size) { bitmap_fill_region(hmem->bitmap, buf->base, buf->base + nchunk); buf->hmem = hmem; found = 1; break; } } } mlx5_spin_unlock(&mctx->hugetlb_lock); if (!found) { hmem = alloc_huge_mem(buf->length); if (!hmem) return -1; buf->base = 0; assert(nchunk <= hmem->bmp_size); bitmap_fill_region(hmem->bitmap, 0, nchunk); buf->hmem = hmem; mlx5_spin_lock(&mctx->hugetlb_lock); if (nchunk != hmem->bmp_size) list_add(&mctx->hugetlb_list, &hmem->entry); else list_add_tail(&mctx->hugetlb_list, &hmem->entry); mlx5_spin_unlock(&mctx->hugetlb_lock); } buf->buf = hmem->shmaddr + buf->base * MLX5_Q_CHUNK_SIZE; ret = ibv_dontfork_range(buf->buf, buf->length); if (ret) { mlx5_dbg(stderr, MLX5_DBG_CONTIG, "\n"); goto out_fork; } buf->type = MLX5_ALLOC_TYPE_HUGE; return 0; out_fork: mlx5_spin_lock(&mctx->hugetlb_lock); bitmap_zero_region(hmem->bitmap, buf->base, buf->base + nchunk); if (bitmap_empty(hmem->bitmap, hmem->bmp_size)) { list_del(&hmem->entry); mlx5_spin_unlock(&mctx->hugetlb_lock); free_huge_mem(hmem); } else mlx5_spin_unlock(&mctx->hugetlb_lock); return -1; } static void free_huge_buf(struct mlx5_context *ctx, struct mlx5_buf *buf) { int nchunk; nchunk = buf->length / MLX5_Q_CHUNK_SIZE; if (!nchunk) return; mlx5_spin_lock(&ctx->hugetlb_lock); bitmap_zero_region(buf->hmem->bitmap, buf->base, buf->base + nchunk); if (bitmap_empty(buf->hmem->bitmap, buf->hmem->bmp_size)) { list_del(&buf->hmem->entry); mlx5_spin_unlock(&ctx->hugetlb_lock); free_huge_mem(buf->hmem); } else mlx5_spin_unlock(&ctx->hugetlb_lock); } void mlx5_free_buf_extern(struct mlx5_context *ctx, struct mlx5_buf *buf) { ibv_dofork_range(buf->buf, buf->length); ctx->extern_alloc.free(buf->buf, ctx->extern_alloc.data); } int mlx5_alloc_buf_extern(struct mlx5_context *ctx, struct mlx5_buf *buf, size_t size) { void *addr; addr = ctx->extern_alloc.alloc(size, ctx->extern_alloc.data); if (addr || size == 0) { if (ibv_dontfork_range(addr, size)) { mlx5_dbg(stderr, MLX5_DBG_CONTIG, "External mode dontfork_range failed\n"); ctx->extern_alloc.free(addr, ctx->extern_alloc.data); return -1; } buf->buf = addr; buf->length = size; buf->type = MLX5_ALLOC_TYPE_EXTERNAL; return 0; } mlx5_dbg(stderr, MLX5_DBG_CONTIG, "External alloc failed\n"); return -1; } static void mlx5_free_buf_custom(struct mlx5_context *ctx, struct mlx5_buf *buf) { struct mlx5_parent_domain *mparent_domain = buf->mparent_domain; mparent_domain->free(&mparent_domain->mpd.ibv_pd, mparent_domain->pd_context, buf->buf, buf->resource_type); } static int mlx5_alloc_buf_custom(struct mlx5_context *ctx, struct mlx5_buf *buf, size_t size) { struct mlx5_parent_domain *mparent_domain = buf->mparent_domain; void *addr; addr = mparent_domain->alloc(&mparent_domain->mpd.ibv_pd, mparent_domain->pd_context, size, buf->req_alignment, buf->resource_type); if (addr == IBV_ALLOCATOR_USE_DEFAULT) return 1; if (addr || size == 0) { buf->buf = addr; buf->length = size; buf->type = MLX5_ALLOC_TYPE_CUSTOM; return 0; } return -1; } int mlx5_alloc_prefered_buf(struct mlx5_context *mctx, struct mlx5_buf *buf, size_t size, int page_size, enum mlx5_alloc_type type, const char *component) { int ret; if (type == MLX5_ALLOC_TYPE_CUSTOM) { ret = mlx5_alloc_buf_custom(mctx, buf, size); if (ret <= 0) return ret; /* Fallback - default allocation is required */ } /* * Fallback mechanism priority: * huge pages * contig pages * default */ if (type == MLX5_ALLOC_TYPE_HUGE || type == MLX5_ALLOC_TYPE_PREFER_HUGE || type == MLX5_ALLOC_TYPE_ALL) { ret = alloc_huge_buf(mctx, buf, size, page_size); if (!ret) return 0; if (type == MLX5_ALLOC_TYPE_HUGE) return -1; mlx5_dbg(stderr, MLX5_DBG_CONTIG, "Huge mode allocation failed, fallback to %s mode\n", MLX5_ALLOC_TYPE_ALL ? "contig" : "default"); } if (type == MLX5_ALLOC_TYPE_CONTIG || type == MLX5_ALLOC_TYPE_PREFER_CONTIG || type == MLX5_ALLOC_TYPE_ALL) { ret = mlx5_alloc_buf_contig(mctx, buf, size, page_size, component); if (!ret) return 0; if (type == MLX5_ALLOC_TYPE_CONTIG) return -1; mlx5_dbg(stderr, MLX5_DBG_CONTIG, "Contig allocation failed, fallback to default mode\n"); } if (type == MLX5_ALLOC_TYPE_EXTERNAL) return mlx5_alloc_buf_extern(mctx, buf, size); return mlx5_alloc_buf(buf, size, page_size); } int mlx5_free_actual_buf(struct mlx5_context *ctx, struct mlx5_buf *buf) { int err = 0; switch (buf->type) { case MLX5_ALLOC_TYPE_ANON: mlx5_free_buf(buf); break; case MLX5_ALLOC_TYPE_HUGE: free_huge_buf(ctx, buf); break; case MLX5_ALLOC_TYPE_CONTIG: mlx5_free_buf_contig(ctx, buf); break; case MLX5_ALLOC_TYPE_EXTERNAL: mlx5_free_buf_extern(ctx, buf); break; case MLX5_ALLOC_TYPE_CUSTOM: mlx5_free_buf_custom(ctx, buf); break; default: mlx5_err(ctx->dbg_fp, "Bad allocation type\n"); } return err; } /* This function computes log2(v) rounded up. We don't want to have a dependency to libm which exposes ceil & log2 APIs. Code was written based on public domain code: URL: http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog. */ static uint32_t mlx5_get_block_order(uint32_t v) { static const uint32_t bits_arr[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; static const uint32_t shift_arr[] = {1, 2, 4, 8, 16}; int i; uint32_t input_val = v; register uint32_t r = 0;/* result of log2(v) will go here */ for (i = 4; i >= 0; i--) { if (v & bits_arr[i]) { v >>= shift_arr[i]; r |= shift_arr[i]; } } /* Rounding up if required */ r += !!(input_val & ((1 << r) - 1)); return r; } bool mlx5_is_custom_alloc(struct ibv_pd *pd) { struct mlx5_parent_domain *mparent_domain = to_mparent_domain(pd); return (mparent_domain && mparent_domain->alloc && mparent_domain->free); } bool mlx5_is_extern_alloc(struct mlx5_context *context) { return context->extern_alloc.alloc && context->extern_alloc.free; } void mlx5_get_alloc_type(struct mlx5_context *context, struct ibv_pd *pd, const char *component, enum mlx5_alloc_type *alloc_type, enum mlx5_alloc_type default_type) { char *env_value; char name[128]; if (mlx5_is_custom_alloc(pd)) { *alloc_type = MLX5_ALLOC_TYPE_CUSTOM; return; } if (mlx5_is_extern_alloc(context)) { *alloc_type = MLX5_ALLOC_TYPE_EXTERNAL; return; } snprintf(name, sizeof(name), "%s_ALLOC_TYPE", component); *alloc_type = default_type; env_value = getenv(name); if (env_value) { if (!strcasecmp(env_value, "ANON")) *alloc_type = MLX5_ALLOC_TYPE_ANON; else if (!strcasecmp(env_value, "HUGE")) *alloc_type = MLX5_ALLOC_TYPE_HUGE; else if (!strcasecmp(env_value, "CONTIG")) *alloc_type = MLX5_ALLOC_TYPE_CONTIG; else if (!strcasecmp(env_value, "PREFER_CONTIG")) *alloc_type = MLX5_ALLOC_TYPE_PREFER_CONTIG; else if (!strcasecmp(env_value, "PREFER_HUGE")) *alloc_type = MLX5_ALLOC_TYPE_PREFER_HUGE; else if (!strcasecmp(env_value, "ALL")) *alloc_type = MLX5_ALLOC_TYPE_ALL; } } static void mlx5_alloc_get_env_info(struct mlx5_context *mctx, int *max_block_log, int *min_block_log, const char *component) { char *env; int value; char name[128]; /* First set defaults */ *max_block_log = MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE; *min_block_log = MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE; snprintf(name, sizeof(name), "%s_MAX_LOG2_CONTIG_BSIZE", component); env = getenv(name); if (env) { value = atoi(env); if (value <= MLX5_MAX_LOG2_CONTIG_BLOCK_SIZE && value >= MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE) *max_block_log = value; else mlx5_err(mctx->dbg_fp, "Invalid value %d for %s\n", value, name); } sprintf(name, "%s_MIN_LOG2_CONTIG_BSIZE", component); env = getenv(name); if (env) { value = atoi(env); if (value >= MLX5_MIN_LOG2_CONTIG_BLOCK_SIZE && value <= *max_block_log) *min_block_log = value; else mlx5_err(mctx->dbg_fp, "Invalid value %d for %s\n", value, name); } } int mlx5_alloc_buf_contig(struct mlx5_context *mctx, struct mlx5_buf *buf, size_t size, int page_size, const char *component) { void *addr = MAP_FAILED; int block_size_exp; int max_block_log; int min_block_log; struct ibv_context *context = &mctx->ibv_ctx.context; off_t offset; mlx5_alloc_get_env_info(mctx, &max_block_log, &min_block_log, component); block_size_exp = mlx5_get_block_order(size); if (block_size_exp > max_block_log) block_size_exp = max_block_log; do { offset = 0; set_command(MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES, &offset); set_order(block_size_exp, &offset); addr = mmap(NULL , size, PROT_WRITE | PROT_READ, MAP_SHARED, context->cmd_fd, page_size * offset); if (addr != MAP_FAILED) break; /* * The kernel returns EINVAL if not supported */ if (errno == EINVAL) return -1; block_size_exp -= 1; } while (block_size_exp >= min_block_log); mlx5_dbg(mctx->dbg_fp, MLX5_DBG_CONTIG, "block order %d, addr %p\n", block_size_exp, addr); if (addr == MAP_FAILED) return -1; if (ibv_dontfork_range(addr, size)) { munmap(addr, size); return -1; } buf->buf = addr; buf->length = size; buf->type = MLX5_ALLOC_TYPE_CONTIG; return 0; } void mlx5_free_buf_contig(struct mlx5_context *mctx, struct mlx5_buf *buf) { ibv_dofork_range(buf->buf, buf->length); munmap(buf->buf, buf->length); } int mlx5_alloc_buf(struct mlx5_buf *buf, size_t size, int page_size) { int ret; int al_size; al_size = align(size, page_size); ret = posix_memalign(&buf->buf, page_size, al_size); if (ret) return ret; ret = ibv_dontfork_range(buf->buf, al_size); if (ret) free(buf->buf); if (!ret) { buf->length = al_size; buf->type = MLX5_ALLOC_TYPE_ANON; } return ret; } void mlx5_free_buf(struct mlx5_buf *buf) { ibv_dofork_range(buf->buf, buf->length); free(buf->buf); }