/* GPLv2 or OpenIB.org BSD (MIT) See COPYING file These accessors always map to PCI-E TLPs in predictable ways. Translation to other buses should follow similar definitions. write32(mem, 1) Produce a 4 byte MemWr TLP with bit 0 of DW byte offset 0 set write32_be(mem, htobe32(1)) Produce a 4 byte MemWr TLP with bit 0 of DW byte offset 3 set write32_le(mem, htole32(1)) Produce a 4 byte MemWr TLP with bit 0 of DW byte offset 0 set For ordering these accessors are similar to the Kernel's concept of writel_relaxed(). When working with UC memory the following hold: 1) Strong ordering is required when talking to the same device (eg BAR), and combining is not permitted: write32(mem, 1); write32(mem + 4, 1); write32(mem, 1); Must produce three TLPs, in order. 2) Ordering ignores all pthread locking: pthread_spin_lock(&lock); write32(mem, global++); pthread_spin_unlock(&lock); When run concurrently on all CPUs the device must observe all stores, but the data value will not be strictly increasing. 3) Interaction with DMA is not ordered. Explicit use of a barrier from udma_barriers is required: *dma_mem = 1; udma_to_device_barrier(); write32(mem, GO_DMA); 4) Access out of program order (eg speculation), either by the CPU or compiler is not permitted: if (cond) read32(); Must not issue a read TLP if cond is false. If these are used with WC memory then #1 and #4 do not apply, and all WC accesses must be bracketed with mmio_wc_start() // mmio_flush_writes() */ #ifndef __UTIL_MMIO_H #define __UTIL_MMIO_H #include #include #include #include #include #include #include /* The first step is to define the 'raw' accessors. To make this very safe with sparse we define two versions of each, a le and a be - however the code is always identical. */ #ifdef __s390x__ #include #define MAKE_WRITE(_NAME_, _SZ_) \ static inline void _NAME_##_be(void *addr, __be##_SZ_ value) \ { \ if (s390_is_mio_supported) \ s390_pcistgi(addr, value, sizeof(value)); \ else \ s390_mmio_write_syscall(addr, &value, sizeof(value)); \ } \ static inline void _NAME_##_le(void *addr, __le##_SZ_ value) \ { \ if (s390_is_mio_supported) \ s390_pcistgi(addr, value, sizeof(value)); \ else \ s390_mmio_write_syscall(addr, &value, sizeof(value)); \ } #define MAKE_READ(_NAME_, _SZ_) \ static inline __be##_SZ_ _NAME_##_be(const void *addr) \ { \ __be##_SZ_ res; \ if (s390_is_mio_supported) \ res = s390_pcilgi(addr, sizeof(res)); \ else \ s390_mmio_read_syscall(addr, &res, sizeof(res)); \ return res; \ } \ static inline __le##_SZ_ _NAME_##_le(const void *addr) \ { \ __le##_SZ_ res; \ if (s390_is_mio_supported) \ res = s390_pcilgi(addr, sizeof(res)); \ else \ s390_mmio_read_syscall(addr, &res, sizeof(res)); \ return res; \ } static inline void mmio_write8(void *addr, uint8_t value) { if (s390_is_mio_supported) s390_pcistgi(addr, value, sizeof(value)); else s390_mmio_write_syscall(addr, &value, sizeof(value)); } static inline uint8_t mmio_read8(const void *addr) { uint8_t res; if (s390_is_mio_supported) res = s390_pcilgi(addr, sizeof(res)); else s390_mmio_read_syscall(addr, &res, sizeof(res)); return res; } #else /* __s390x__ */ #define MAKE_WRITE(_NAME_, _SZ_) \ static inline void _NAME_##_be(void *addr, __be##_SZ_ value) \ { \ atomic_store_explicit((_Atomic(uint##_SZ_##_t) *)addr, \ (__force uint##_SZ_##_t)value, \ memory_order_relaxed); \ } \ static inline void _NAME_##_le(void *addr, __le##_SZ_ value) \ { \ atomic_store_explicit((_Atomic(uint##_SZ_##_t) *)addr, \ (__force uint##_SZ_##_t)value, \ memory_order_relaxed); \ } #define MAKE_READ(_NAME_, _SZ_) \ static inline __be##_SZ_ _NAME_##_be(const void *addr) \ { \ return (__force __be##_SZ_)atomic_load_explicit( \ (_Atomic(uint##_SZ_##_t) *)addr, memory_order_relaxed); \ } \ static inline __le##_SZ_ _NAME_##_le(const void *addr) \ { \ return (__force __le##_SZ_)atomic_load_explicit( \ (_Atomic(uint##_SZ_##_t) *)addr, memory_order_relaxed); \ } static inline void mmio_write8(void *addr, uint8_t value) { atomic_store_explicit((_Atomic(uint8_t) *)addr, value, memory_order_relaxed); } static inline uint8_t mmio_read8(const void *addr) { return atomic_load_explicit((_Atomic(uint32_t) *)addr, memory_order_relaxed); } #endif /* __s390x__ */ MAKE_WRITE(mmio_write16, 16) MAKE_WRITE(mmio_write32, 32) MAKE_READ(mmio_read16, 16) MAKE_READ(mmio_read32, 32) #if SIZEOF_LONG == 8 MAKE_WRITE(mmio_write64, 64) MAKE_READ(mmio_read64, 64) #else void mmio_write64_be(void *addr, __be64 val); static inline void mmio_write64_le(void *addr, __le64 val) { mmio_write64_be(addr, (__be64 __force)val); } /* There is no way to do read64 atomically, rather than provide some sketchy implementation we leave these functions undefined, users should not call them if SIZEOF_LONG != 8, but instead implement an appropriate version. */ __be64 mmio_read64_be(const void *addr); __le64 mmio_read64_le(const void *addr); #endif /* SIZEOF_LONG == 8 */ #undef MAKE_WRITE #undef MAKE_READ /* Now we can define the host endian versions of the operator, this just includes a call to htole. */ #define MAKE_WRITE(_NAME_, _SZ_) \ static inline void _NAME_(void *addr, uint##_SZ_##_t value) \ { \ _NAME_##_le(addr, htole##_SZ_(value)); \ } #define MAKE_READ(_NAME_, _SZ_) \ static inline uint##_SZ_##_t _NAME_(const void *addr) \ { \ return le##_SZ_##toh(_NAME_##_le(addr)); \ } /* This strictly guarantees the order of TLP generation for the memory copy to be in ascending address order. */ #if defined(__aarch64__) || defined(__arm__) #include static inline void _mmio_memcpy_x64_64b(void *dest, const void *src) { vst4q_u64(dest, vld4q_u64(src)); } static inline void _mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt) { do { _mmio_memcpy_x64_64b(dest, src); bytecnt -= sizeof(uint64x2x4_t); src += sizeof(uint64x2x4_t); dest += sizeof(uint64x2x4_t); } while (bytecnt > 0); } #define mmio_memcpy_x64(dest, src, bytecount) \ ({ \ if (__builtin_constant_p((bytecount) == 64)) \ _mmio_memcpy_x64_64b((dest), (src)); \ else \ _mmio_memcpy_x64((dest), (src), (bytecount)); \ }) #elif defined(__s390x__) void mmio_memcpy_x64(void *dst, const void *src, size_t bytecnt); #else /* Transfer is some multiple of 64 bytes */ static inline void mmio_memcpy_x64(void *dest, const void *src, size_t bytecnt) { uintptr_t *dst_p = dest; /* Caller must guarantee: assert(bytecnt != 0); assert((bytecnt % 64) == 0); assert(((uintptr_t)dest) % __alignof__(*dst) == 0); assert(((uintptr_t)src) % __alignof__(*dst) == 0); */ /* Use the native word size for the copy */ if (sizeof(*dst_p) == 8) { const __be64 *src_p = src; do { /* Do 64 bytes at a time */ mmio_write64_be(dst_p++, *src_p++); mmio_write64_be(dst_p++, *src_p++); mmio_write64_be(dst_p++, *src_p++); mmio_write64_be(dst_p++, *src_p++); mmio_write64_be(dst_p++, *src_p++); mmio_write64_be(dst_p++, *src_p++); mmio_write64_be(dst_p++, *src_p++); mmio_write64_be(dst_p++, *src_p++); bytecnt -= 8 * sizeof(*dst_p); } while (bytecnt > 0); } else if (sizeof(*dst_p) == 4) { const __be32 *src_p = src; do { mmio_write32_be(dst_p++, *src_p++); mmio_write32_be(dst_p++, *src_p++); bytecnt -= 2 * sizeof(*dst_p); } while (bytecnt > 0); } } #endif MAKE_WRITE(mmio_write16, 16) MAKE_WRITE(mmio_write32, 32) MAKE_WRITE(mmio_write64, 64) MAKE_READ(mmio_read16, 16) MAKE_READ(mmio_read32, 32) MAKE_READ(mmio_read64, 64) #undef MAKE_WRITE #undef MAKE_READ #endif