/*
 * This file is part of libFirm.
 * Copyright (C) 2012 University of Karlsruhe.
 */

/**
 * @file
 * @brief   Load/Store optimizations.
 * @author  Michael Beck
 */
#include "array.h"
#include "dbginfo_t.h"
#include "debug.h"
#include "entity_t.h"
#include "ircons_t.h"
#include "iredges_t.h"
#include "irflag_t.h"
#include "irgmod.h"
#include "irgopt.h"
#include "irgraph_t.h"
#include "irgwalk.h"
#include "irhooks.h"
#include "irmemory.h"
#include "irmode_t.h"
#include "irnode_t.h"
#include "irnodehashmap.h"
#include "iropt_dbg.h"
#include "iropt_t.h"
#include "iroptimize.h"
#include "irtools.h"
#include "panic.h"
#include "set.h"
#include "target_t.h"
#include "tv_t.h"
#include "type_t.h"
#include "util.h"
#include <string.h>

/** The debug handle. */
DEBUG_ONLY(static firm_dbg_module_t *dbg;)

#define MAX_PROJ MAX(MAX((unsigned)pn_Load_max, (unsigned)pn_Store_max), (unsigned)pn_Call_max)

typedef enum changes_t {
	NO_CHANGES = 0,
	DF_CHANGED = (1 << 0), /**< data flow changed */
	CF_CHANGED = (1 << 1), /**< control flow changed */
	/** nodes have been created but are not reachable. This is a bad hack
	 * try hard to avoid it! */
	NODES_CREATED = (1 << 2),
} changes_t;

/**
 * walker environment
 */
typedef struct walk_env_t {
	struct obstack obst;    /**< list of all stores */
	changes_t      changes; /**< a bitmask of graph changes */
} walk_env_t;

/** A Load/Store info. */
typedef struct ldst_info_t {
	ir_node  *projs[MAX_PROJ+1]; /**< list of Proj's of this node */
	ir_node  *exc_block;         /**< the exception block if available */
	int      exc_idx;            /**< predecessor index in exception block */
	unsigned visited;            /**< visited counter for breaking loops */
} ldst_info_t;

typedef struct base_offset_t {
	ir_node *base;
	long     offset;
} base_offset_t;

typedef struct track_load_env_t {
	ir_node      *load;
	base_offset_t base_offset;
	ir_node      *ptr; /* deprecated: alternative representation of
	                      base_offset */
} track_load_env_t;

/**
 * flags for control flow.
 */
typedef enum block_flags_t {
	BLOCK_HAS_COND = (1 << 0), /**< Block has conditional control flow */
	BLOCK_HAS_EXC  = (1 << 1), /**< Block has exceptional control flow */
} block_flags_t;

/**
 * a Block info.
 */
typedef struct block_info_t {
	block_flags_t flags;  /**< flags for the block */
} block_info_t;

/** the master visited flag for loop detection. */
static unsigned master_visited;

#define INC_MASTER()       ++master_visited
#define MARK_NODE(info)    (info)->visited = master_visited
#define NODE_VISITED(info) (info)->visited >= master_visited

/**
 * get the Load/Store info of a node
 */
static ldst_info_t *get_ldst_info(ir_node *node, struct obstack *obst)
{
	ldst_info_t *info = (ldst_info_t *)get_irn_link(node);
	if (info == NULL) {
		info = OALLOCZ(obst, ldst_info_t);
		set_irn_link(node, info);
	}
	return info;
}

/**
 * get the Block info of a node
 */
static block_info_t *get_block_info(ir_node *node, struct obstack *obst)
{
	block_info_t *info = (block_info_t *)get_irn_link(node);
	if (info == NULL) {
		info = OALLOCZ(obst, block_info_t);
		set_irn_link(node, info);
	}
	return info;
}

/**
 * update the projection info for a Load/Store
 */
static changes_t update_projs(ldst_info_t *info, ir_node *proj)
{
	unsigned nr = get_Proj_num(proj);
	assert(nr <= MAX_PROJ);

	if (info->projs[nr] != NULL) {
		/* there is already one, do CSE */
		exchange(proj, info->projs[nr]);
		return DF_CHANGED;
	} else {
		info->projs[nr] = proj;
		return NO_CHANGES;
	}
}

/**
 * update the exception block info for a Load/Store node.
 *
 * @param info   the load/store info struct
 * @param block  the exception handler block for this load/store
 * @param pos    the control flow input of the block
 */
static void update_exc(ldst_info_t *info, ir_node *block, int pos)
{
	assert(info->exc_block == NULL);
	info->exc_block = block;
	info->exc_idx   = pos;
}

/**
 * walker, collects all Proj/Load/Store/Call/CopyB nodes
 *
 * walks from Start -> End
 */
static void collect_nodes(ir_node *node, void *env)
{
	walk_env_t *wenv   = (walk_env_t *)env;
	unsigned    opcode = get_irn_opcode(node);

	if (opcode == iro_Proj) {
		ir_node *pred = get_Proj_pred(node);
		opcode = get_irn_opcode(pred);

		if (opcode == iro_Load || opcode == iro_Store || opcode == iro_Call) {
			ldst_info_t *ldst_info = get_ldst_info(pred, &wenv->obst);

			wenv->changes |= update_projs(ldst_info, node);

			/*
			 * Place the Proj's to the same block as the
			 * predecessor Load. This is always ok and prevents
			 * "non-SSA" form after optimizations if the Proj
			 * is in a wrong block.
			 */
			ir_node *blk      = get_nodes_block(node);
			ir_node *pred_blk = get_nodes_block(pred);
			if (blk != pred_blk) {
				wenv->changes |= DF_CHANGED;
				set_nodes_block(node, pred_blk);
			}
		}
	} else if (opcode == iro_Block) {
		for (int i = get_Block_n_cfgpreds(node); i-- > 0; ) {
			bool     is_exc = false;
			ir_node *proj   = get_Block_cfgpred(node, i);
			ir_node *pred   = proj;

			/* ignore Bad predecessors, they will be removed later */
			if (is_Bad(pred))
				continue;

			if (is_Proj(proj)) {
				pred   = get_Proj_pred(proj);
				is_exc = is_x_except_Proj(proj);
			}
			ir_node      *pred_block = get_nodes_block(pred);
			block_info_t *bl_info    = get_block_info(pred_block, &wenv->obst);

			if (is_fragile_op(pred) && is_exc)
				bl_info->flags |= BLOCK_HAS_EXC;
			else if (is_irn_forking(pred))
				bl_info->flags |= BLOCK_HAS_COND;

			opcode = get_irn_opcode(pred);
			if (is_exc && (opcode == iro_Load || opcode == iro_Store
			               || opcode == iro_Call)) {
				ldst_info_t *ldst_info = get_ldst_info(pred, &wenv->obst);

				update_exc(ldst_info, node, i);
			}
		}
	} else if (is_memop(node)) {
		/* Just initialize a ldst_info */
		(void)get_ldst_info(node, &wenv->obst);
	}
}

/* forward */
static void reduce_node_usage(ir_node *ptr);

/**
 * Update a Load that may have lost its users.
 */
static void handle_load_update(ir_node *load)
{
	/* do NOT touch volatile loads for now */
	if (get_Load_volatility(load) == volatility_is_volatile)
		return;

	ldst_info_t *info = (ldst_info_t *)get_irn_link(load);
	if (!info->projs[pn_Load_res] && !info->projs[pn_Load_X_except]) {
		ir_node *ptr = get_Load_ptr(load);
		ir_node *mem = get_Load_mem(load);

		/* a Load whose value is neither used nor exception checked, remove it */
		exchange(info->projs[pn_Load_M], mem);
		if (info->projs[pn_Load_X_regular]) {
			ir_node *jmp = new_r_Jmp(get_nodes_block(load));
			exchange(info->projs[pn_Load_X_regular], jmp);
		}
		kill_node(load);
		reduce_node_usage(ptr);
	}
}

/**
 * A use of a node has vanished. Check if this was a Proj
 * node and update the counters.
 */
static void reduce_node_usage(ir_node *ptr)
{
	if (!is_Proj(ptr))
		return;
	if (get_irn_n_edges(ptr) > 0)
		return;

	/* this Proj is dead now */
	ir_node *pred = get_Proj_pred(ptr);
	if (is_Load(pred)) {
		ldst_info_t *info = (ldst_info_t *)get_irn_link(pred);
		info->projs[get_Proj_num(ptr)] = NULL;

		/* this node lost its result proj, handle that */
		handle_load_update(pred);
	}
}

/**
 * Kill a Load or Store and all other nodes which are not needed after
 * it has been killed.
 */
static void kill_and_reduce_usage(ir_node *node)
{
	ir_node *ptr;
	ir_node *value;
	switch (get_irn_opcode(node)) {
	case iro_Load:
		ptr   = get_Load_ptr(node);
		value = NULL;
		break;
	case iro_Store:
		ptr   = get_Store_ptr(node);
		value = get_Store_value(node);
		break;
	default:
		panic("cannot handle node %+F", node);
	}

	kill_node(node);
	reduce_node_usage(ptr);
	if (value != NULL) {
		reduce_node_usage(value);
	}
}

static void get_base_and_offset(ir_node *ptr, base_offset_t *base_offset)
{
	/* TODO: long might not be enough, we should probably use some tarval
	 * thingy, or at least detect long overflows and abort */
	long     offset = 0;
	ir_mode *mode   = get_irn_mode(ptr);
	for (;;) {
		if (is_Add(ptr)) {
			ir_node *l = get_Add_left(ptr);
			ir_node *r = get_Add_right(ptr);

			if (get_irn_mode(l) != mode || !is_Const(r))
				break;

			offset += get_Const_long(r);
			ptr     = l;
		} else if (is_Sub(ptr)) {
			ir_node *r = get_Sub_right(ptr);
			if (!is_Const(r))
				break;

			offset -= get_Const_long(r);
			ptr     = get_Sub_left(ptr);
		} else if (is_Sel(ptr)) {
			ir_node *index = get_Sel_index(ptr);
			if (!is_Const(index))
				break;

			ir_type *type         = get_Sel_type(ptr);
			ir_type *element_type = get_array_element_type(type);
			if (get_type_state(element_type) != layout_fixed)
				break;

			/* TODO: may overflow here */
			int size = get_type_size(element_type);
			offset += size * get_Const_long(index);
			ptr     = get_Sel_ptr(ptr);
		} else if (is_Member(ptr)) {
			ir_entity *entity = get_Member_entity(ptr);
			ir_type   *owner  = get_entity_owner(entity);
			if (get_type_state(owner) != layout_fixed)
				break;
			offset += get_entity_offset(entity);
			ptr     = get_Member_ptr(ptr);
		} else
			break;
	}

	base_offset->offset = offset;
	base_offset->base   = ptr;
}

/**
 * Checks whether the memory of the first access
 * is contained within the second one.
 */
static bool is_contained_in(ir_mode *const load_mode, const base_offset_t *const load_bo,
                            ir_mode *const prev_mode, const base_offset_t *const prev_bo)
{
	if (load_bo->base != prev_bo->base)
		return false;

	/* ensure the load value is completely contained in the previous one */
	long delta = load_bo->offset - prev_bo->offset;
	if (delta < 0)
		return false;
	long load_mode_len = get_mode_size_bytes(load_mode);
	long prev_mode_len = get_mode_size_bytes(prev_mode);
	if (delta+load_mode_len > prev_mode_len)
		return false;

	/* simple case: previous value has the same mode */
	if (load_mode == prev_mode)
		return true;

	ir_mode_arithmetic prev_arithmetic = get_mode_arithmetic(prev_mode);
	ir_mode_arithmetic load_arithmetic = get_mode_arithmetic(load_mode);
	return (prev_arithmetic == irma_twos_complement &&
	        load_arithmetic == irma_twos_complement)
	       || (prev_arithmetic != load_arithmetic
	           && load_mode_len == prev_mode_len);
}


/**
 * This is called for load-after-load and load-after-store.
 * If possible the value of the previous load/store is transformed in a way
 * so the 2nd load can be left out/replaced by arithmetic on the previous
 * value.
 */
static ir_node *transform_previous_value(ir_mode *const load_mode,
                                         const long load_offset, ir_mode *const prev_mode,
                                         const long prev_offset, ir_node *const prev_value,
                                         ir_node *const block, ir_node *const load)
{
	/* simple case: previous value has the same mode */
	if (load_mode == prev_mode)
		return prev_value;

	/* two complement values can be transformed with bitops */
	dbg_info          *const dbgi            = get_irn_dbg_info(load);
	long               const load_mode_len   = get_mode_size_bytes(load_mode);
	long               const prev_mode_len   = get_mode_size_bytes(prev_mode);
	ir_mode_arithmetic const prev_arithmetic = get_mode_arithmetic(prev_mode);
	ir_mode_arithmetic const load_arithmetic = get_mode_arithmetic(load_mode);
	if (prev_arithmetic == irma_twos_complement &&
	    load_arithmetic == irma_twos_complement) {
		/* produce a shift to adjust offset delta */
		long           delta = load_offset - prev_offset;
		unsigned const shift = ir_target_big_endian()
			? prev_mode_len - load_mode_len - delta
			: delta;
		ir_node *new_value = prev_value;
		if (shift != 0) {
			if (mode_is_reference(prev_mode)) {
				ir_mode *const new_mode = get_reference_offset_mode(prev_mode);
				new_value = new_r_Conv(block, new_value, new_mode);
			}
			ir_graph *const irg  = get_irn_irg(block);
			ir_node  *const cnst = new_r_Const_long(irg, mode_Iu, shift * 8);
			new_value = new_rd_Shr(dbgi, block, new_value, cnst);
		}

		return new_rd_Conv(dbgi, block, new_value, load_mode);
	} else {
		assert(prev_arithmetic != load_arithmetic && load_mode_len == prev_mode_len);
		return new_rd_Bitcast(dbgi, block, prev_value, load_mode);
	}
}

static changes_t replace_load(ir_node *load, ir_node *new_value)
{
	const ldst_info_t *info = (ldst_info_t *)get_irn_link(load);
	if (info->projs[pn_Load_M])
		exchange(info->projs[pn_Load_M], get_Load_mem(load));

	changes_t res = NO_CHANGES;
	/* no exception */
	if (info->projs[pn_Load_X_except] != NULL) {
		ir_graph *irg = get_irn_irg(load);
		ir_node  *bad = new_r_Bad(irg, mode_X);
		exchange(info->projs[pn_Load_X_except], bad);
		res |= CF_CHANGED;

		assert(info->projs[pn_Load_X_regular] != NULL);
		ir_node *jmp = new_r_Jmp(get_nodes_block(load));
		exchange(info->projs[pn_Load_X_regular], jmp);
	}

	/* loads without user should already be optimized away */
	assert(info->projs[pn_Load_res] != NULL);
	exchange(info->projs[pn_Load_res], new_value);

	kill_and_reduce_usage(load);
	return res | DF_CHANGED;
}

/**
 * returns false if op cannot be reached through the X_regular proj of
 * @p prev_op or if there are no exceptions possible.
 */
static bool on_regular_path(ir_node *op, ir_node *prev_op)
{
	/* TODO: create a real test, for now we just make sure the previous node
	 * does not throw an exception. */
	(void)op;
	return !is_fragile_op(prev_op) || !ir_throws_exception(prev_op);
}

static changes_t try_load_after_store(track_load_env_t *env, ir_node *store)
{
	ir_node *const load = env->load;
	if (!on_regular_path(load, store))
		return NO_CHANGES;

	ir_node       *store_ptr        = get_Store_ptr(store);
	base_offset_t  prev_base_offset;
	get_base_and_offset(store_ptr, &prev_base_offset);
	ir_mode       *const load_mode   = get_Load_mode(load);
	ir_mode       *const store_mode  = get_irn_mode(get_Store_value(store));
	base_offset_t *const base_offset = &env->base_offset;

	/* load value completely contained in previous store? */
	if (is_contained_in(load_mode, base_offset, store_mode, &prev_base_offset)) {
		ir_node *const store_value = get_Store_value(store);
		ir_node *const block       = get_nodes_block(load);
		ir_node *const new_value
			= transform_previous_value(load_mode, base_offset->offset, store_mode,
			                           prev_base_offset.offset, store_value, block, load);
		DBG_OPT_RAW(load, new_value);
		return replace_load(load, new_value);
	}

	return NO_CHANGES;

}

static changes_t try_load_after_load(track_load_env_t *env, ir_node *prev_load)
{
	ir_node *const load = env->load;
	if (!on_regular_path(load, prev_load))
		return NO_CHANGES;

	ldst_info_t *const info       = (ldst_info_t *)get_irn_link(prev_load);
	ir_node     *const prev_value = info->projs[pn_Load_res];
	/* the other load is unused and will get removed later anyway */
	if (prev_value == NULL)
		return NO_CHANGES;

	ir_node *const prev_ptr        = get_Load_ptr(prev_load);
	base_offset_t prev_base_offset;
	get_base_and_offset(prev_ptr, &prev_base_offset);
	ir_mode       *const load_mode   = get_Load_mode(load);
	ir_mode       *const prev_mode   = get_Load_mode(prev_load);
	base_offset_t *const base_offset = &env->base_offset;

	/* load value completely contained in previous load? */
	if (is_contained_in(load_mode, base_offset, prev_mode, &prev_base_offset)) {
		ir_node *const block     = get_nodes_block(load);
		ir_node *const new_value
			= transform_previous_value(load_mode, base_offset->offset, prev_mode,
			                           prev_base_offset.offset, prev_value, block, load);
		DBG_OPT_RAR(prev_load, load);
		return replace_load(load, new_value);
	}

	/* previous load value completely contained in current load? */
	if (get_Load_type(load) == get_Load_type(prev_load) &&
	    is_contained_in(prev_mode, &prev_base_offset, load_mode, base_offset)) {
		ir_graph *const irg   = get_irn_irg(prev_load);
		ir_node  *const block = get_nodes_block(prev_load);

		/* Adapt pointer of previous load. */
		const long        delta         = base_offset->offset - prev_base_offset.offset;
		assert(delta <= 0);
		ir_mode    *const ptr_mode      = get_irn_mode(prev_ptr);
		ir_mode    *const ptr_mode_offs = get_reference_offset_mode(ptr_mode);
		ir_node    *const c             = new_r_Const_long(irg, ptr_mode_offs, delta);
		ir_node    *const add           = new_r_Add(block, prev_ptr, c);
		set_Load_ptr(prev_load, add);

		/* Change mode of previous load. */
		set_Load_mode(prev_load, load_mode);
		ir_node   *const original_proj = info->projs[pn_Load_res];
		ir_node   *const result_proj   = new_r_Proj(prev_load, load_mode, pn_Load_res);
		changes_t        changed       = replace_load(load, result_proj);
		ir_node   *const new_prev_value
			= transform_previous_value(prev_mode, prev_base_offset.offset, load_mode,
			                           base_offset->offset, result_proj, block, load);
		exchange(original_proj, new_prev_value);
		info->projs[pn_Load_res] = result_proj;
		changed |= DF_CHANGED;
		DBG_OPT_RAR(prev_load, load);
		return changed;
	}

	return NO_CHANGES;

}

static bool try_update_ptr_CopyB(track_load_env_t *env, ir_node *copyb)
{
	ir_node       *copyb_dst       = get_CopyB_dst(copyb);
	base_offset_t  dst_base_offset;
	get_base_and_offset(copyb_dst, &dst_base_offset);
	if (dst_base_offset.base != env->base_offset.base)
		return false;

	/* see if bytes loaded are fully contained in the CopyB */
	if (env->base_offset.offset < dst_base_offset.offset)
		return false;

	ir_type       *copyb_type      = get_CopyB_type(copyb);
	long           n_copy          = get_type_size(copyb_type);
	ir_node       *copyb_src       = get_CopyB_src(copyb);
	base_offset_t  src_base_offset;
	get_base_and_offset(copyb_src, &src_base_offset);

	long     delta     = env->base_offset.offset - dst_base_offset.offset;
	ir_node *load      = env->load;
	ir_mode *load_mode = get_Load_mode(load);
	long     load_size = get_mode_size_bytes(load_mode);
	if (delta + load_size > n_copy)
		return false;

	/* track src input */
	env->base_offset.base   = src_base_offset.base;
	env->base_offset.offset = src_base_offset.offset + delta;

	/*
	 * Everything is OK, we can replace
	 *   ptr = (load_base_ptr + load_offset)
	 * with
	 *   new_load_ptr = (src_base_ptr + delta)
	 */
	ir_graph *irg          = get_irn_irg(load);
	ir_node  *block        = get_nodes_block(load);
	ir_mode  *mode_ref     = get_irn_mode(src_base_offset.base);
	ir_mode  *mode_offset  = get_reference_offset_mode(mode_ref);
	ir_node  *cnst         = new_r_Const_long(irg, mode_offset,
	                                          src_base_offset.offset + delta);
	ir_node  *new_load_ptr = new_r_Add(block, src_base_offset.base, cnst);
	env->ptr = new_load_ptr;
	return true;
}

/* Note on terminology:
 * In the following functions, two types are associated with each Load
 * and Store: load_type/store_type are the types of the actual
 * operations (derived from the modes of the IR nodes), whereas
 * load_objt/store_objt are the types of the objects in memory which are
 * accessed by the Load/Store nodes.
 */

/**
 * Follow the memory chain as long as there are only Loads,
 * alias free Stores, and constant Calls and try to replace the
 * current Load by a previous ones.
 * Note that in unreachable loops it might happen that we reach
 * load again, as well as we can fall into a cycle.
 * We break such cycles using a special visited flag.
 *
 * INC_MASTER() must be called before dive into
 */
static changes_t follow_load_mem_chain(track_load_env_t *env, ir_node *start)
{
	ir_node  *load      = env->load;
	ir_type  *load_type = get_Load_type(load);
	unsigned  load_size = get_mode_size_bytes(get_Load_mode(load));

	ir_node   *node = start;
	changes_t  res  = NO_CHANGES;
	for (;;) {
		ldst_info_t *node_info = (ldst_info_t *)get_irn_link(node);

		if (is_Store(node)) {
			/* first try load-after-store */
			changes_t changes = try_load_after_store(env, node);
			if (changes != NO_CHANGES)
				return changes | res;

			/* check if we can pass through this store */
			const ir_node     *ptr        = get_Store_ptr(node);
			const ir_node     *value      = get_Store_value(node);
			const ir_type     *store_type = get_Store_type(node);
			unsigned           store_size = get_mode_size_bytes(get_irn_mode(value));
			ir_alias_relation  rel        = get_alias_relation(
				ptr, store_type, store_size,
				env->ptr, load_type, load_size);
			/* if the might be an alias, we cannot pass this Store */
			if (rel != ir_no_alias)
				break;
			node = skip_Proj(get_Store_mem(node));
		} else if (is_Load(node)) {
			/* try load-after-load */
			changes_t changes = try_load_after_load(env, node);
			if (changes != NO_CHANGES)
				return changes | res;
			/* we can skip any load */
			node = skip_Proj(get_Load_mem(node));
		} else if (is_CopyB(node)) {
			/*
			 * We cannot replace the Load with another
			 * Load from the CopyB's source directly,
			 * because there may be Stores in between,
			 * destroying the source data. However, we can
			 * use the source address from this point
			 * onwards for further optimizations.
			 */
			bool updated = try_update_ptr_CopyB(env, node);
			if (updated) {
				/* Special case: If new_ptr points to
				 * a constant, we *can* replace the
				 * Load immediately.
				 */
				res |= NODES_CREATED;
				ir_mode *load_mode = get_Load_mode(load);
				ir_node *new_value = predict_load(env->ptr, load_mode);
				if (new_value != NULL)
					return replace_load(load, new_value) | res;
			}

			/* check aliasing with the CopyB */
			ir_node           *dst  = get_CopyB_dst(node);
			ir_type           *type = get_CopyB_type(node);
			unsigned           size = get_type_size(type);
			ir_alias_relation  rel  = get_alias_relation(
				dst, type, size,
				env->ptr, load_type, load_size);
			/* possible alias => we cannot continue */
			if (rel != ir_no_alias)
				break;
			node = skip_Proj(get_CopyB_mem(node));
		} else if (is_irn_const_memory(node)) {
			node = skip_Proj(get_memop_mem(node));
		} else {
			/* be conservative about any other node and assume aliasing
			 * that changes the loaded value */
			break;
		}

		/* check for cycles */
		if (NODE_VISITED(node_info))
			break;
		MARK_NODE(node_info);
	}

	if (is_Sync(node)) {
		/* handle all Sync predecessors */
		foreach_irn_in(node, i, in) {
			ir_node *skipped = skip_Proj(in);
			res |= follow_load_mem_chain(env, skipped);
			if ((res & ~NODES_CREATED) != NO_CHANGES)
				break;
		}
	}
	return res;
}

ir_node *can_replace_load_by_const(const ir_node *load, ir_node *c)
{
	ir_mode  *c_mode = get_irn_mode(c);
	ir_mode  *l_mode = get_Load_mode(load);
	ir_node  *block  = get_nodes_block(load);
	dbg_info *dbgi   = get_irn_dbg_info(load);
	ir_node  *res    = duplicate_subgraph(dbgi, c, block);

	if (c_mode != l_mode) {
		/* check, if the mode matches OR can be easily converted info */
		if (is_reinterpret_cast(c_mode, l_mode)) {
			/* copy the value from the const code irg and cast it */
			res = new_rd_Conv(dbgi, block, res, l_mode);
		} else {
			return NULL;
		}
	}
	return res;
}

static ir_entity *find_entity(ir_node *ptr)
{
	switch (get_irn_opcode(ptr)) {
	case iro_Address:
		return get_Address_entity(ptr);

	case iro_Offset:
		return get_Offset_entity(ptr);

	case iro_Member: {
		ir_node *pred = get_Member_ptr(ptr);
		if (get_irg_frame(get_irn_irg(ptr)) == pred)
			return get_Member_entity(ptr);

		return find_entity(pred);
	}

	case iro_Add: {
		ir_node *const right = get_Add_right(ptr);
		if (mode_is_reference(get_irn_mode(right)))
			return find_entity(right);
	} /* FALLTHROUGH */
	case iro_Sub:
		return find_entity(get_binop_left(ptr));

	default:
		return NULL;
	}
}

/**
 * optimize a Load
 *
 * @param load  the Load node
 */
static changes_t optimize_load(ir_node *load)
{
	const ldst_info_t *info = (ldst_info_t *)get_irn_link(load);
	changes_t          res  = NO_CHANGES;

	/* do NOT touch volatile loads for now */
	if (get_Load_volatility(load) == volatility_is_volatile)
		return NO_CHANGES;

	/* the address of the load to be optimized */
	ir_node *ptr = get_Load_ptr(load);

	/* The mem of the Load. Must still be returned after optimization. */
	ir_node *mem = get_Load_mem(load);

	if (info->projs[pn_Load_res] == NULL
	    && info->projs[pn_Load_X_except] == NULL) {
		assert(info->projs[pn_Load_X_regular] == NULL);
		/* the value is never used and we don't care about exceptions, remove */
		exchange(info->projs[pn_Load_M], mem);
		kill_and_reduce_usage(load);
		return res | DF_CHANGED;
	}

	/* a load from an entity which is never written to can be replaced by
	 * the value of the entity's initializer. */
	ir_entity *entity = find_entity(ptr);
	if (entity != NULL && get_entity_kind(entity) == IR_ENTITY_NORMAL
	    && !(get_entity_usage(entity) & ir_usage_write)) {

		set_entity_linkage(entity, IR_LINKAGE_CONSTANT);
		ir_node *ptr  = get_Load_ptr(load);
		ir_mode *mode = get_Load_mode(load);
		ir_node *val  = predict_load(ptr, mode);
		if (val != NULL) {
			ldst_info_t *info = (ldst_info_t*)get_irn_link(load);
			exchange(info->projs[pn_Load_res], val);
			exchange(info->projs[pn_Load_M], get_Load_mem(load));
			kill_and_reduce_usage(load);
			return DF_CHANGED;
		}
	}

	track_load_env_t env = { .ptr = ptr };
	get_base_and_offset(ptr, &env.base_offset);

	/* Check, if the base address of this load is used more than once.
	 * If not, we won't find another store/load/CopyB anyway.
	 * TODO: can we miss values with multiple users in between? */
	if (get_irn_n_edges(ptr) <= 1 && get_irn_n_edges(env.base_offset.base) <= 1)
		return res;

	/*
	 * follow the memory chain as long as there are only Loads
	 * and try to replace current Load or Store by a previous one.
	 * Note that in unreachable loops it might happen that we reach
	 * load again, as well as we can fall into a cycle.
	 * We break such cycles using a special visited flag.
	 */
	INC_MASTER();
	env.load = load;
	res = follow_load_mem_chain(&env, skip_Proj(mem));
	return res;
}

/**
 * Check whether small is a part of large (starting at same address).
 */
static bool is_partially_same(ir_node *small, ir_node *large)
{
	const ir_mode *sm = get_irn_mode(small);
	const ir_mode *lm = get_irn_mode(large);

	/* FIXME: Check endianness */
	return is_Conv(small) && get_Conv_op(small) == large
	    && get_mode_size_bytes(sm) < get_mode_size_bytes(lm)
	    && get_mode_arithmetic(sm) == irma_twos_complement
	    && get_mode_arithmetic(lm) == irma_twos_complement;
}

/**
 * follow the memory chain as long as there are only Loads and alias free
 * Stores.
 * INC_MASTER() must be called before dive into
 */
static changes_t follow_store_mem_chain(ir_node *store, ir_node *start,
                                        bool had_split)
{
	changes_t    res   = NO_CHANGES;
	ldst_info_t *info  = (ldst_info_t *)get_irn_link(store);
	ir_node     *ptr   = get_Store_ptr(store);
	ir_node     *mem   = get_Store_mem(store);
	ir_node     *value = get_Store_value(store);
	ir_type     *type  = get_Store_type(store);
	unsigned     size  = get_mode_size_bytes(get_irn_mode(value));
	ir_node     *block = get_nodes_block(store);

	ir_node *node = start;
	while (node != store) {
		ldst_info_t *node_info = (ldst_info_t *)get_irn_link(node);

		/*
		 * BEWARE: one might think that checking the modes is useless, because
		 * if the pointers are identical, they refer to the same object.
		 * This is only true in strong typed languages, not is C were the
		 * following is possible *(ir_type1 *)p = a; *(ir_type2 *)p = b ...
		 * However, if the size of the mode that is written is bigger or equal
		 * the size of the old one, the old value is completely overwritten and
		 * can be killed ...
		 */
		if (is_Store(node) && !had_split && get_Store_ptr(node) == ptr &&
		    get_nodes_block(node) == block) {
			/*
			 * a Store after a Store in the same Block -- a write after write.
			 */

			/*
			 * We may remove the first Store, if the old value is completely
			 * overwritten or the old value is a part of the new value,
			 * and if it does not have an exception handler.
			 *
			 * TODO: What, if both have the same exception handler ???
			 */
			if (get_Store_volatility(node) != volatility_is_volatile
			    && !node_info->projs[pn_Store_X_except]) {
				ir_node  *predvalue = get_Store_value(node);
				ir_mode  *predmode  = get_irn_mode(predvalue);
				unsigned  predsize  = get_mode_size_bytes(predmode);

				if (predsize <= size || is_partially_same(predvalue, value)) {
					DBG_OPT_WAW(node, store);
					DB((dbg, LEVEL_1, "  killing store %+F (override by %+F)\n",
					    node, store));
					exchange(node_info->projs[pn_Store_M], get_Store_mem(node));
					kill_and_reduce_usage(node);
					return DF_CHANGED;
				}
			}

			/*
			 * We may remove the Store, if the old value already contains
			 * the new value, and if it does not have an exception handler.
			 *
			 * TODO: What, if both have the same exception handler ???
			 */
			if (get_Store_volatility(store) != volatility_is_volatile
			        && !info->projs[pn_Store_X_except]) {
				ir_node *predvalue = get_Store_value(node);

				if (is_partially_same(value, predvalue)) {
					DBG_OPT_WAW(node, store);
					DB((dbg, LEVEL_1, "  killing store %+F (override by %+F)\n",
					    node, store));
					exchange(info->projs[pn_Store_M], mem);
					kill_and_reduce_usage(store);
					return DF_CHANGED;
				}
			}
		} else if (is_Load(node) && get_Load_ptr(node) == ptr &&
		           value == node_info->projs[pn_Load_res]) {
			/*
			 * a Store of a value just loaded from the same address
			 * -- a write after read.
			 * We may remove the Store, if it does not have an exception
			 * handler.
			 */
			if (!info->projs[pn_Store_X_except]) {
				DBG_OPT_WAR(store, node);
				DB((dbg, LEVEL_1,
				    "  killing store %+F (read %+F from same address)\n",
				    store, node));
				exchange(info->projs[pn_Store_M], mem);
				kill_and_reduce_usage(store);
				return DF_CHANGED;
			}
		}

		if (is_Store(node)) {
			/* check if we can pass through this store */
			ir_node           *store_ptr   = get_Store_ptr(node);
			ir_node           *store_value = get_Store_value(node);
			unsigned           store_size  = get_mode_size_bytes(get_irn_mode(store_value));
			ir_type           *store_type  = get_Store_type(node);
			ir_alias_relation  rel         = get_alias_relation(
				store_ptr, store_type, store_size,
				ptr, type, size);
			/* if the might be an alias, we cannot pass this Store */
			if (rel != ir_no_alias)
				break;
			node = skip_Proj(get_Store_mem(node));
		} else if (is_Load(node)) {
			ir_node           *load_ptr  = get_Load_ptr(node);
			ir_type           *load_type = get_Load_type(node);
			unsigned           load_size = get_mode_size_bytes(get_Load_mode(node));
			ir_alias_relation  rel       = get_alias_relation(
				load_ptr, load_type, load_size,
				ptr, type, size);
			if (rel != ir_no_alias)
				break;

			node = skip_Proj(get_Load_mem(node));
		} else if (is_CopyB(node)) {
			ir_node           *copyb_src  = get_CopyB_src(node);
			ir_type           *copyb_type = get_CopyB_type(node);
			unsigned           copyb_size = get_type_size(copyb_type);
			ir_alias_relation  src_rel    = get_alias_relation(
				copyb_src, copyb_type, copyb_size,
				ptr, type, size);
			if (src_rel != ir_no_alias)
				break;
			ir_node           *copyb_dst = get_CopyB_dst(node);
			ir_alias_relation  dst_rel   = get_alias_relation(
				copyb_dst, copyb_type, copyb_size,
				ptr, type, size);
			if (dst_rel != ir_no_alias)
				break;
		} else {
			/* follow only Load chains */
			break;
		}

		/* check for cycles */
		if (NODE_VISITED(node_info))
			break;
		MARK_NODE(node_info);
	}

	if (is_Sync(node)) {
		/* handle all Sync predecessors */
		foreach_irn_in(node, i, in) {
			ir_node *skipped = skip_Proj(in);
			res |= follow_store_mem_chain(store, skipped, true);
			if (res != NO_CHANGES)
				break;
		}
	}
	return res;
}

/**
 * optimize a Store
 *
 * @param store  the Store node
 */
static changes_t optimize_store(ir_node *store)
{
	if (get_Store_volatility(store) == volatility_is_volatile)
		return NO_CHANGES;

	/* Check, if the address of this Store is used more than once.
	 * If not, this Store cannot be removed in any case. */
	ir_node *ptr = get_Store_ptr(store);
	if (get_irn_n_edges(ptr) <= 1)
		return NO_CHANGES;

	ir_node *mem = get_Store_mem(store);

	/* follow the memory chain as long as there are only Loads */
	INC_MASTER();

	return follow_store_mem_chain(store, skip_Proj(mem), false);
}

/**
 * Checks whether @c ptr of type @c ptr_type lies completely within an
 * object of type @c struct_type starting at @c struct_ptr;
 */
static bool ptr_is_in_struct(ir_node *ptr, ir_type *ptr_type,
                             ir_node *struct_ptr, ir_type *struct_type)
{
	base_offset_t base_offset;
	get_base_and_offset(ptr, &base_offset);
	long     ptr_offset    = base_offset.offset;
	base_offset_t struct_offset;
	get_base_and_offset(struct_ptr, &struct_offset);
	unsigned ptr_size      = get_type_size(ptr_type);
	unsigned struct_size   = get_type_size(struct_type);

	return base_offset.base == struct_offset.base &&
		ptr_offset >= struct_offset.offset &&
		ptr_offset + ptr_size <= struct_offset.offset + struct_size;
}

/**
 * Tries to optimize @c copyb. This function handles the following
 * cases:
 * - A previous Store that lies completely within @c copyb's destination
 *   will be deleted, except it modifies the @c copyb's source.
 * - If a previous CopyB writes to @c copyb's source, @c copyb will
 *   read from the previous CopyB's source if possible. Cases where
 *   the CopyB nodes are offset against each other are not handled.
 */
static changes_t follow_copyb_mem_chain(ir_node *copyb, ir_node *start,
                                        bool had_split)
{
	changes_t res   = NO_CHANGES;
	ir_node  *src   = get_CopyB_src(copyb);
	ir_node  *dst   = get_CopyB_dst(copyb);
	ir_type  *type  = get_CopyB_type(copyb);
	unsigned  size  = get_type_size(type);
	ir_node  *block = get_nodes_block(copyb);

	ir_node *node = start;
	while (node != copyb) {
		ldst_info_t *node_info = (ldst_info_t *)get_irn_link(node);

		if (is_Store(node)
		    && get_Store_volatility(node) != volatility_is_volatile
		    && !node_info->projs[pn_Store_X_except] && !had_split
		    && get_nodes_block(node) == block) {
			/* If the CopyB completely overwrites the Store,
			 * and the Store does not modify the CopyB's source memory,
			 * and the Store has no exception handler,
			 * the Store can be removed.
			 * This is basically a write-after-write. */
			ir_node           *store_ptr  = get_Store_ptr(node);
			ir_type           *store_type = get_Store_type(node);
			unsigned           store_size = get_mode_size_bytes(get_irn_mode(store_ptr));
			ir_alias_relation  src_rel    = get_alias_relation(
				src, type, size,
				store_ptr, store_type, store_size);

			if (src_rel == ir_no_alias
			    && ptr_is_in_struct(store_ptr, store_type, dst, type)) {
				DBG_OPT_WAW(node, copyb);
				DB((dbg, LEVEL_1, "  killing store %+F (override by %+F)\n",
				    node, copyb));
				exchange(node_info->projs[pn_Store_M], get_Store_mem(node));
				kill_and_reduce_usage(node);
				return DF_CHANGED;
			}
		}

		if (is_Store(node)) {
			ir_node  *store_ptr   = get_Store_ptr(node);
			ir_type  *store_type  = get_Store_type(node);
			ir_node  *store_value = get_Store_value(node);
			unsigned  store_size  = get_mode_size_bytes(get_irn_mode(store_value));
			/* check if we can pass through this store */
			ir_alias_relation src_rel = get_alias_relation(
				store_ptr, store_type, store_size,
				src, type, size);
			if (src_rel != ir_no_alias)
				break;
			ir_alias_relation dst_rel = get_alias_relation(
				store_ptr, store_type, store_size,
				dst, type, size);
			if (dst_rel != ir_no_alias)
				break;

			node = skip_Proj(get_Store_mem(node));
		} else if (is_Load(node)) {
			ir_node  *load_ptr  = get_Load_ptr(node);
			ir_type  *load_type = get_Load_type(node);
			unsigned  load_size = get_mode_size_bytes(get_Load_mode(node));
			ir_alias_relation rel = get_alias_relation(
				load_ptr, load_type, load_size,
				dst, type, size);
			if (rel != ir_no_alias)
				break;

			node = skip_Proj(get_Load_mem(node));
		} else if (is_CopyB(node)) {
			ir_node  *pred_dst  = get_CopyB_dst(node);
			ir_node  *pred_src  = get_CopyB_src(node);
			ir_type  *pred_type = get_CopyB_type(node);
			unsigned  pred_size = get_type_size(pred_type);

			if (src == pred_dst && size == pred_size
			    && get_CopyB_volatility(node) == volatility_non_volatile) {
				src = pred_src;

				/* Special case: If src now points to a constant, we *can*
				 * replace it immediately. */
				ir_entity *entity = find_entity(src);
				if (entity != NULL
				    && (get_entity_linkage(entity) & IR_LINKAGE_CONSTANT)) {
					DBG((dbg, LEVEL_1,
					     "Optimizing %+F to read from %+F's source\n",
					     copyb, node));

					set_CopyB_src(copyb, src);
					return res | DF_CHANGED;
				}
			} else {
				ir_alias_relation dst_dst_rel = get_alias_relation(
					pred_dst, pred_type, pred_size,
					dst, type, size);
				if (dst_dst_rel != ir_no_alias)
					break;
				ir_alias_relation src_dst_rel = get_alias_relation(
					pred_src, pred_type, pred_size,
					dst, type, size);
				if (src_dst_rel != ir_no_alias)
					break;
				ir_alias_relation dst_src_rel = get_alias_relation(
					pred_dst, pred_type, pred_size,
					src, type, size);
				if (dst_src_rel != ir_no_alias)
					break;
			}

			node = skip_Proj(get_CopyB_mem(node));
		} else {
			break;
		}

		/* check for cycles */
		if (NODE_VISITED(node_info))
			break;
		MARK_NODE(node_info);
	}

	if (is_Sync(node)) {
		/* handle all Sync predecessors */
		foreach_irn_in(node, i, in) {
			ir_node *skipped = skip_Proj(in);
			res |= follow_copyb_mem_chain(copyb, skipped, true);
			if (res != NO_CHANGES)
				break;
		}
	}
	return res;
}

/**
 * Optimizes a CopyB node.
 *
 * @param copyb  the CopyB node
 */
static changes_t optimize_copyb(ir_node *copyb)
{
	if (get_CopyB_volatility(copyb) == volatility_is_volatile)
		return NO_CHANGES;

	ir_node *mem = get_CopyB_mem(copyb);

	INC_MASTER();

	return follow_copyb_mem_chain(copyb, skip_Proj(mem), false);
}

/* check if a node has more than one real user. Keepalive edges do not count as
 * real users */
static bool has_multiple_users(const ir_node *node)
{
	unsigned real_users = 0;
	foreach_out_edge(node, edge) {
		ir_node *user = get_edge_src_irn(edge);
		if (is_End(user))
			continue;
		++real_users;
		if (real_users > 1)
			return true;
	}
	return false;
}

static ir_cons_flags get_store_cons_flags(ir_node *const store)
{
	ir_cons_flags cons = cons_none;
	if (get_Store_volatility(store) != volatility_non_volatile)
		cons |= cons_volatile;
	if (get_Store_unaligned(store) != align_is_aligned)
		cons |= cons_unaligned;
	if (get_irn_pinned(store) == op_pin_state_floats)
		cons |= cons_floats;
	if (ir_throws_exception(store))
		cons |= cons_throws_exception;
	return cons;
}

/**
 * walker, optimizes Phi after Stores to identical places:
 * Does the following optimization:
 * @verbatim
 *
 *   val1   val2   val3          val1  val2  val3
 *    |      |      |               \    |    /
 *  Store  Store  Store              \   |   /
 *      \    |    /                   PhiData
 *       \   |   /                       |
 *        \  |  /                      Store
 *          PhiM
 *
 * @endverbatim
 * This reduces the number of stores and allows for predicated execution.
 * Moves Stores back to the end of a function which may be bad.
 *
 * This is only possible if the predecessor blocks have only one successor.
 */
static changes_t optimize_phi(ir_node *phi, walk_env_t *wenv)
{
	/* Must be a memory Phi */
	if (get_irn_mode(phi) != mode_M)
		return NO_CHANGES;

	/* Phi must have at least one operand and it is pointless to distribute over a
	 * Phi with exactly one operand.  It would needlessly create a new Store and
	 * lose the debug info of the Store. */
	int n = get_Phi_n_preds(phi);
	if (n <= 1)
		return NO_CHANGES;

	/* must be only one user */
	ir_node *projM = get_Phi_pred(phi, 0);
	if (has_multiple_users(projM))
		return NO_CHANGES;

	ir_node *store = skip_Proj(projM);
	if (!is_Store(store))
		return NO_CHANGES;

	/* check if the block is post dominated by Phi-block
	   and has no exception exit */
	ir_node      *block   = get_nodes_block(store);
	block_info_t *bl_info = (block_info_t *)get_irn_link(block);
	if (bl_info->flags & BLOCK_HAS_EXC)
		return NO_CHANGES;

	ir_node *phi_block = get_nodes_block(phi);
	if (!block_strictly_postdominates(phi_block, block))
		return NO_CHANGES;

	/* this is the address of the store */
	ir_node      *const ptr  = get_Store_ptr(store);
	ir_mode      *const mode = get_irn_mode(get_Store_value(store));
	ir_cons_flags const cons = get_store_cons_flags(store);
	ldst_info_t        *info = (ldst_info_t *)get_irn_link(store);
	ir_node      *const exc  = info->exc_block;

	for (int i = 1; i < n; ++i) {
		ir_node *pred = get_Phi_pred(phi, i);
		if (has_multiple_users(pred))
			return NO_CHANGES;

		pred = skip_Proj(pred);
		if (!is_Store(pred))
			return NO_CHANGES;

		if (ptr != get_Store_ptr(pred)
		    || mode != get_irn_mode(get_Store_value(pred)))
			return NO_CHANGES;
		if (cons != get_store_cons_flags(pred))
			return NO_CHANGES;

		/* check, if all stores have the same exception flow */
		info = (ldst_info_t *)get_irn_link(pred);
		if (exc != info->exc_block)
			return NO_CHANGES;


		/* check if the block is post dominated by Phi-block
		   and has no exception exit. Note that block must be different from
		   Phi-block, else we would move a Store from end End of a block to its
		   Start... */
		ir_node      *block   = get_nodes_block(pred);
		block_info_t *bl_info = (block_info_t *)get_irn_link(block);
		if (bl_info->flags & BLOCK_HAS_EXC)
			return NO_CHANGES;
		if (!block_strictly_postdominates(phi_block, block))
			return NO_CHANGES;
	}

	/*
	 * ok, when we are here, we found all predecessors of a Phi that
	 * are Stores to the same address and size. That means whatever
	 * we do before we enter the block of the Phi, we do a Store.
	 * So, we can move the Store to the current block:
	 *
	 *   val1    val2    val3          val1  val2  val3
	 *    |       |       |               \    |    /
	 * | Str | | Str | | Str |             \   |   /
	 *      \     |     /                   PhiData
	 *       \    |    /                       |
	 *        \   |   /                       Str
	 *           PhiM
	 *
	 * Is only allowed if the predecessor blocks have only one successor.
	 */
	ir_node **projMs = ALLOCAN(ir_node*, n);
	ir_node **inM    = ALLOCAN(ir_node*, n);
	ir_node **inD    = ALLOCAN(ir_node*, n);
	ir_type  *type   = NULL;
	int      *idx    = ALLOCAN(int, n);

	/* Prepare: Collect all Store nodes.  We must do this
	   first because we otherwise may loose a store when exchanging its
	   memory Proj.
	 */
	for (int i = n; i-- > 0; ) {
		projMs[i] = get_Phi_pred(phi, i);

		ir_node *const store = get_Proj_pred(projMs[i]);
		info  = (ldst_info_t *)get_irn_link(store);

		inM[i] = get_Store_mem(store);
		inD[i] = get_Store_value(store);
		idx[i] = info->exc_idx;

		if (i == n - 1) {
			type = get_Store_type(store);
		} else if (get_Store_type(store) != type) {
			/* We cannot merge Stores with different types. */
			return NO_CHANGES;
		}
	}

	/* second step: remove keep edge from old Phi and create a new memory Phi */
	ir_node *phiM = get_Phi_loop(phi) ? new_r_Phi_loop(phi_block, n, inM)
	                                  : new_r_Phi(phi_block, n, inM, mode_M);

	/* third step: create a new data Phi */
	ir_node *phiD = new_r_Phi(phi_block, n, inD, mode);

	/* fourth step: create the Store */
	store = new_r_Store(phi_block, phiM, ptr, phiD, type, cons);
	projM = new_r_Proj(store, mode_M, pn_Store_M);

	/* rewire memory and kill the old nodes */
	for (int i = n; i-- > 0; ) {
		ir_node *proj = projMs[i];
		assert(is_Proj(proj));
		ir_node *store = get_Proj_pred(proj);
		exchange(proj, inM[i]);
		kill_and_reduce_usage(store);
	}

	info = get_ldst_info(store, &wenv->obst);
	info->projs[pn_Store_M] = projM;

	/* fifths step: repair exception flow */
	changes_t res = NO_CHANGES;
	if (exc) {
		ir_node *projX = new_r_Proj(store, mode_X, pn_Store_X_except);

		info->projs[pn_Store_X_except] = projX;
		info->exc_block                = exc;
		info->exc_idx                  = idx[0];

		for (int i = 0; i < n; ++i) {
			set_Block_cfgpred(exc, idx[i], projX);
		}

		if (n > 1) {
			/* the exception block should be optimized as some inputs are
			 * identical now */
		}

		res |= CF_CHANGED;
	}

	/* sixth step: replace old Phi */
	if (get_Phi_loop(phi))
		remove_keep_alive(phi);
	exchange(phi, projM);

	return res | DF_CHANGED;
}

static changes_t optimize_conv_load(ir_node *conv)
{
	ir_node *op = get_Conv_op(conv);
	if (!is_Proj(op))
		return NO_CHANGES;
	if (has_multiple_users(op))
		return NO_CHANGES;
	/* shrink mode of load if possible. */
	ir_node *load = get_Proj_pred(op);
	if (!is_Load(load))
		return NO_CHANGES;

	/* only do it if we are the only user (otherwise the risk is too
	 * great that we end up with 2 loads instead of one). */
	ir_mode *mode      = get_irn_mode(conv);
	ir_mode *load_mode = get_Load_mode(load);
	int      bits_diff
		= get_mode_size_bits(load_mode) - get_mode_size_bits(mode);
	if (mode_is_float(load_mode) || mode_is_float(mode) || bits_diff < 0)
	    return NO_CHANGES;

	if (ir_target_big_endian()) {
		if (bits_diff % 8 != 0)
			return NO_CHANGES;
		ir_graph *irg       = get_irn_irg(conv);
		ir_node  *ptr       = get_Load_ptr(load);
		ir_mode  *mode      = get_irn_mode(ptr);
		ir_mode  *mode_offs = get_reference_offset_mode(mode);
		ir_node  *delta     = new_r_Const_long(irg, mode_offs, bits_diff/8);
		ir_node  *block     = get_nodes_block(load);
		ir_node  *add       = new_r_Add(block, ptr, delta);
		set_Load_ptr(load, add);
	}
	set_Load_mode(load, mode);
	set_irn_mode(op, mode);
	exchange(conv, op);
	return DF_CHANGED;
}

/**
 * walker, do the optimizations
 */
static void do_load_store_optimize(ir_node *n, void *env)
{
	walk_env_t *wenv = (walk_env_t *)env;
	switch (get_irn_opcode(n)) {
	case iro_Load:  wenv->changes |= optimize_load(n);      break;
	case iro_Store: wenv->changes |= optimize_store(n);     break;
	case iro_CopyB: wenv->changes |= optimize_copyb(n);     break;
	case iro_Phi:   wenv->changes |= optimize_phi(n, wenv); break;
	case iro_Conv:  wenv->changes |= optimize_conv_load(n); break;
	default:
		break;
	}
}

/**
 * Check if a store is dead and eliminate it.
 *
 * @param store  the Store node
 */
static changes_t eliminate_dead_store(ir_node *store)
{
	if (get_Store_volatility(store) == volatility_is_volatile)
		return NO_CHANGES;

	ir_node   *ptr    = get_Store_ptr(store);
	ir_entity *entity = find_entity(ptr);

	/* a store to an entity which is never read is unnecessary */
	if (entity != NULL && !(get_entity_usage(entity) & ir_usage_read)) {
		ldst_info_t *info = (ldst_info_t *)get_irn_link(store);

		if (info->projs[pn_Store_X_except] == NULL) {
			DB((dbg, LEVEL_1,
			    "  Killing useless %+F to never read entity %+F\n", store,
			    entity));
			exchange(info->projs[pn_Store_M], get_Store_mem(store));
			kill_and_reduce_usage(store);
			return DF_CHANGED;
		}
	}

	return NO_CHANGES;
}

static changes_t eliminate_dead_copyb(ir_node *copyb)
{
	if (get_CopyB_volatility(copyb) == volatility_is_volatile)
		return NO_CHANGES;

	/* a CopyB whose destination is never read is unnecessary */
	ir_node   *ptr    = get_CopyB_dst(copyb);
	ir_entity *entity = find_entity(ptr);
	if (entity != NULL && !(get_entity_usage(entity) & ir_usage_read)) {
		DB((dbg, LEVEL_1, "  Killing useless %+F to never read entity %+F\n",
		    copyb, entity));
		reduce_node_usage(get_CopyB_dst(copyb));
		reduce_node_usage(get_CopyB_src(copyb));
		exchange(copyb, get_CopyB_mem(copyb));
		return DF_CHANGED;
	}

	return NO_CHANGES;
}

/**
 * walker to eliminate dead stores.
 */
static void do_eliminate_dead_stores(ir_node *n, void *env)
{
	walk_env_t *wenv = (walk_env_t *)env;
	if (is_Store(n)) {
		wenv->changes |= eliminate_dead_store(n);
	} else if (is_CopyB(n)) {
		wenv->changes |= eliminate_dead_copyb(n);
	}
}

/** A scc. */
typedef struct scc {
	ir_node *head;      /**< the head of the list */
} scc;

/** A node entry. */
typedef struct node_entry {
	unsigned DFSnum;    /**< the DFS number of this node */
	unsigned low;       /**< the low number of this node */
	bool     in_stack;  /**< flag, set if the node is on the stack */
	ir_node *next;      /**< link to the next node the the same scc */
	scc     *pscc;      /**< the scc of this node */
	unsigned POnum;     /**< the post order number for blocks */
} node_entry;

/** A loop entry. */
typedef struct loop_env {
	ir_nodehashmap_t map;
	struct obstack   obst;
	ir_node        **stack;        /**< the node stack */
	size_t           tos;          /**< tos index */
	unsigned         nextDFSnum;   /**< the current DFS number */
	unsigned         POnum;        /**< current post order number */

	changes_t        changes;      /**< a bitmask of graph changes */
} loop_env;

/**
 * Gets the node_entry of a node
 */
static node_entry *get_irn_ne(ir_node *irn, loop_env *env)
{
	node_entry *e = ir_nodehashmap_get(node_entry, &env->map, irn);

	if (e == NULL) {
		e = OALLOC(&env->obst, node_entry);
		memset(e, 0, sizeof(*e));
		ir_nodehashmap_insert(&env->map, irn, e);
	}
	return e;
}

/**
 * Push a node onto the stack.
 *
 * @param env   the loop environment
 * @param n     the node to push
 */
static void push(loop_env *env, ir_node *n)
{
	if (env->tos == ARR_LEN(env->stack)) {
		size_t nlen = ARR_LEN(env->stack) * 2;
		ARR_RESIZE(ir_node*, env->stack, nlen);
	}
	env->stack[env->tos++] = n;
	node_entry *e = get_irn_ne(n, env);
	e->in_stack = true;
}

/**
 * pop a node from the stack
 *
 * @param env   the loop environment
 *
 * @return  The topmost node
 */
static ir_node *pop(loop_env *env)
{
	ir_node    *n = env->stack[--env->tos];
	node_entry *e = get_irn_ne(n, env);
	e->in_stack = false;
	return n;
}

/**
 * Check if irn is a region constant.
 * The block or irn must strictly dominate the header block.
 *
 * @param irn           the node to check
 * @param header_block  the header block of the induction variable
 */
static bool is_rc(ir_node *irn, ir_node *header_block)
{
	ir_node *block = get_nodes_block(irn);
	return (block != header_block) && block_dominates(block, header_block);
}

typedef struct phi_entry phi_entry;
struct phi_entry {
	ir_node   *phi;    /**< A phi with a region const memory. */
	int        pos;    /**< The position of the region const memory */
	ir_node   *load;   /**< the newly created load for this phi */
	phi_entry *next;
};

/**
 * An entry in the avail set.
 */
typedef struct avail_entry_t {
	ir_node *ptr;   /**< the address pointer */
	ir_mode *mode;  /**< the load mode */
	ir_node *load;  /**< the associated Load */
} avail_entry_t;

/**
 * Compare two avail entries.
 */
static int cmp_avail_entry(const void *elt, const void *key, size_t size)
{
	(void)size;
	const avail_entry_t *a = (const avail_entry_t *)elt;
	const avail_entry_t *b = (const avail_entry_t *)key;
	return a->ptr != b->ptr || a->mode != b->mode;
}

/**
 * Calculate the hash value of an avail entry.
 */
static unsigned hash_cache_entry(const avail_entry_t *entry)
{
	return get_irn_idx(entry->ptr)*9 + hash_ptr(entry->mode);
}

/**
 * Move loops out of loops if possible.
 *
 * @param pscc   the loop described by an SCC
 * @param env    the loop environment
 */
static void move_loads_out_of_loops(scc *pscc, loop_env *env)
{
	/* collect all outer memories */
	phi_entry *phi_list = NULL;
	for (ir_node *phi = pscc->head, *next; phi != NULL; phi = next) {
		node_entry *ne = get_irn_ne(phi, env);
		next = ne->next;

		/* check all memory Phi's */
		if (!is_Phi(phi))
			continue;

		assert(get_irn_mode(phi) == mode_M && "DFS return non-memory Phi");

		foreach_irn_in_r(phi, j, pred) {
			node_entry *const pe = get_irn_ne(pred, env);

			if (pe->pscc != ne->pscc) {
				/* not in the same SCC, is region const */
				phi_entry *pe = OALLOC(&env->obst, phi_entry);

				pe->phi  = phi;
				pe->pos  = j;
				pe->next = phi_list;
				phi_list = pe;
			}
		}
	}
	/* no Phis no fun */
	assert(phi_list != NULL && "DFS found a loop without Phi");

	/* for now, we cannot handle more than one input (only reducible cf) */
	if (phi_list->next != NULL)
		return;

	set *avail = new_set(cmp_avail_entry, 8);

	for (ir_node *load = pscc->head, *next; load != NULL; load = next) {
		node_entry *ne = get_irn_ne(load, env);
		next = ne->next;

		if (is_Load(load)) {
			ldst_info_t *info = (ldst_info_t *)get_irn_link(load);
			ir_node     *ptr = get_Load_ptr(load);

			/* for now, we cannot handle Loads with exceptions */
			if (info->projs[pn_Load_res] == NULL
			    || info->projs[pn_Load_X_regular] != NULL
			    || info->projs[pn_Load_X_except] != NULL)
				continue;

			/* for now, we can only move Load(Global) */
			if (!is_Address(ptr))
				continue;
			ir_type  *load_type  = get_Load_type(load);
			ir_mode  *load_mode  = get_Load_mode(load);
			unsigned  load_size  = get_mode_size_bytes(load_mode);
			ir_node  *other;
			ir_node  *next_other;
			for (other = pscc->head; other != NULL; other = next_other) {
				node_entry *ne = get_irn_ne(other, env);
				next_other = ne->next;

				if (is_Store(other)) {
					ir_node *store_ptr   = get_Store_ptr(other);
					ir_node *store_value = get_Store_value(other);
					ir_type *store_type  = get_Store_type(other);
					unsigned store_size  = get_mode_size_bytes(get_irn_mode(store_value));

					ir_alias_relation rel = get_alias_relation(
						store_ptr, store_type, store_size,
						ptr, load_type, load_size);
					/* if the might be an alias, we cannot pass this Store */
					if (rel != ir_no_alias)
						break;
				}
				/* only Phis and pure Calls are allowed here, so ignore them */
			}
			if (other == NULL) {
				ldst_info_t *ninfo = NULL;

				/* yep, no aliasing Store found, Load can be moved */
				DB((dbg, LEVEL_1, "  Found a Load that could be moved: %+F\n",
				    load));

				dbg_info *db = get_irn_dbg_info(load);
				for (phi_entry *pe = phi_list; pe != NULL; pe = pe->next) {
					int      pos  = pe->pos;
					ir_node *phi  = pe->phi;
					ir_node *blk  = get_nodes_block(phi);
					ir_node *pred = get_Block_cfgpred_block(blk, pos);

					avail_entry_t  entry = { .ptr = ptr, .mode = load_mode };
					avail_entry_t *res
						= set_find(avail_entry_t, avail, &entry, sizeof(entry), hash_cache_entry(&entry));
					ir_node *irn;
					if (res != NULL) {
						irn = res->load;
					} else {
						irn        = new_rd_Load(db, pred, get_Phi_pred(phi, pos), ptr, load_mode, load_type, cons_none);
						entry.load = irn;
						(void)set_insert(avail_entry_t, avail, &entry, sizeof(entry), hash_cache_entry(&entry));
						DB((dbg, LEVEL_1, "  Created %+F in %+F\n", irn, pred));
					}
					pe->load = irn;
					ninfo    = get_ldst_info(irn, &env->obst);

					ir_node *mem = new_r_Proj(irn, mode_M, pn_Load_M);
					ninfo->projs[pn_Load_M] = mem;
					if (res == NULL) {
						/* irn is from cache, so do not set phi pred again.
						 * There might be other Loads between phi and irn
						 * already. */
						set_Phi_pred(phi, pos, mem);
					}

					ninfo->projs[pn_Load_res] = new_r_Proj(irn, load_mode, pn_Load_res);
				}

				/* now kill the old Load */
				exchange(info->projs[pn_Load_M], get_Load_mem(load));
				exchange(info->projs[pn_Load_res], ninfo->projs[pn_Load_res]);

				env->changes |= DF_CHANGED;
			}
		}
	}
	del_set(avail);
}

/**
 * Process a loop SCC.
 *
 * @param pscc  the SCC
 * @param env   the loop environment
 */
static void process_loop(scc *pscc, loop_env *env)
{
	/* find the header block for this scc */
	ir_node    *header = NULL;
	node_entry *h      = NULL;
	for (ir_node *irn = pscc->head, *next; irn != NULL; irn = next) {
		node_entry *e = get_irn_ne(irn, env);
		next = e->next;

		ir_node    *block = get_nodes_block(irn);
		node_entry *b     = get_irn_ne(block, env);
		if (header != NULL) {
			if (h->POnum < b->POnum) {
				header = block;
				h      = b;
			}
		} else {
			header = block;
			h      = b;
		}
	}

	/* check if this scc contains only Phi, Loads or Stores nodes */
	bool     only_phi    = true;
	unsigned num_outside = 0;
	bool     process     = false;
	ir_node *out_rc      = NULL;
	for (ir_node *irn = pscc->head, *next; irn != NULL; irn = next) {
		node_entry *e = get_irn_ne(irn, env);
		next = e->next;

		switch (get_irn_opcode(irn)) {
		case iro_Call:
			if (is_irn_const_memory(irn)) {
				/* pure calls can be treated like loads */
				only_phi = false;
				break;
			}
			/* non-pure calls must be handle like may-alias Stores */
			goto fail;
		case iro_CopyB:
			/* cannot handle CopyB yet */
			goto fail;
		case iro_Load:
			process = true;
			if (get_Load_volatility(irn) == volatility_is_volatile) {
				/* cannot handle loops with volatile Loads */
				goto fail;
			}
			only_phi = false;
			break;
		case iro_Store:
			if (get_Store_volatility(irn) == volatility_is_volatile) {
				/* cannot handle loops with volatile Stores */
				goto fail;
			}
			only_phi = false;
			break;
		default:
			only_phi = false;
			break;
		case iro_Phi:
			foreach_irn_in_r(irn, i, pred) {
				node_entry *pe = get_irn_ne(pred, env);

				if (pe->pscc != e->pscc) {
					/* not in the same SCC, must be a region const */
					if (!is_rc(pred, header)) {
						/* not a memory loop */
						goto fail;
					}
					if (out_rc == NULL) {
						/* first region constant */
						out_rc = pred;
						++num_outside;
					} else if (out_rc != pred) {
						/* another region constant */
						++num_outside;
					}
				}
			}
			break;
		}
	}
	if (!process)
		goto fail;

	/* found a memory loop */
	DB((dbg, LEVEL_2, "  Found a memory loop:\n  "));
	if (only_phi && num_outside == 1) {
		/* a phi cycle with only one real predecessor can be collapsed */
		DB((dbg, LEVEL_2, "  Found a USELESS Phi cycle:\n  "));

		for (ir_node *irn = pscc->head, *next; irn != NULL; irn = next) {
			node_entry *e = get_irn_ne(irn, env);
			next = e->next;
			exchange(irn, out_rc);
		}
		env->changes |= DF_CHANGED;
		return;
	}

#ifdef DEBUG_libfirm
	for (ir_node *irn = pscc->head, *next; irn != NULL; irn = next) {
		node_entry *e = get_irn_ne(irn, env);
		next = e->next;
		DB((dbg, LEVEL_2, " %+F,", irn));
	}
	DB((dbg, LEVEL_2, "\n"));
#endif
	move_loads_out_of_loops(pscc, env);

fail:;
}

/**
 * Process a SCC.
 *
 * @param pscc  the SCC
 * @param env   the loop environment
 */
static void process_scc(scc *pscc, loop_env *env)
{
	ir_node    *head = pscc->head;
	node_entry *e    = get_irn_ne(head, env);

#ifdef DEBUG_libfirm
	DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
	for (ir_node *irn = pscc->head, *next; irn != NULL; irn = next) {
		node_entry *e = get_irn_ne(irn, env);
		next = e->next;

		DB((dbg, LEVEL_4, " %+F,", irn));
	}
	DB((dbg, LEVEL_4, "\n"));
#endif

	if (e->next != NULL) {
		/* this SCC has more than one member */
		process_loop(pscc, env);
	}
}

/**
 * Do Tarjan's SCC algorithm and drive load/store optimization.
 *
 * @param irn  start at this node
 * @param env  the loop environment
 */
static void dfs(ir_node *irn, loop_env *env)
{
	mark_irn_visited(irn);

	node_entry *node = get_irn_ne(irn, env);
	node->DFSnum = env->nextDFSnum++;
	node->low    = node->DFSnum;
	push(env, irn);

	/* handle preds */
	if (is_Phi(irn) || is_Sync(irn)) {
		foreach_irn_in(irn, i, pred) {
			node_entry *o = get_irn_ne(pred, env);

			if (!irn_visited(pred)) {
				dfs(pred, env);
				node->low = MIN(node->low, o->low);
			}
			if (o->DFSnum < node->DFSnum && o->in_stack)
				node->low = MIN(o->DFSnum, node->low);
		}
	} else if (is_fragile_op(irn)) {
		ir_node    *pred = get_memop_mem(irn);
		node_entry *o    = get_irn_ne(pred, env);

		if (!irn_visited(pred)) {
			dfs(pred, env);
			node->low = MIN(node->low, o->low);
		}
		if (o->DFSnum < node->DFSnum && o->in_stack)
			node->low = MIN(o->DFSnum, node->low);
	} else if (is_Proj(irn)) {
		ir_node    *pred = get_Proj_pred(irn);
		node_entry *o    = get_irn_ne(pred, env);

		if (!irn_visited(pred)) {
			dfs(pred, env);
			node->low = MIN(node->low, o->low);
		}
		if (o->DFSnum < node->DFSnum && o->in_stack)
			node->low = MIN(o->DFSnum, node->low);
	} else {
		 /* IGNORE predecessors */
	}

	if (node->low == node->DFSnum) {
		scc *pscc = OALLOC(&env->obst, scc);
		pscc->head = NULL;
		ir_node *x;
		do {
			x = pop(env);
			node_entry *e = get_irn_ne(x, env);
			e->pscc    = pscc;
			e->next    = pscc->head;
			pscc->head = x;
		} while (x != irn);

		process_scc(pscc, env);
	}
}

/**
 * Do the DFS on the memory edges a graph.
 *
 * @param irg  the graph to process
 * @param env  the loop environment
 */
static void do_dfs(ir_graph *irg, loop_env *env)
{
	inc_irg_visited(irg);

	/* visit all memory nodes */
	ir_node *endblk = get_irg_end_block(irg);
	for (int i = get_Block_n_cfgpreds(endblk); i-- > 0; ) {
		ir_node *pred = get_Block_cfgpred(endblk, i);

		pred = skip_Proj(pred);
		if (is_Return(pred)) {
			dfs(get_Return_mem(pred), env);
		} else if (is_Raise(pred)) {
			dfs(get_Raise_mem(pred), env);
		} else if (is_fragile_op(pred)) {
			dfs(get_memop_mem(pred), env);
		} else if (is_Bad(pred)) {
			/* ignore non-optimized block predecessor */
		} else {
			panic("unknown EndBlock predecessor");
		}
	}

	/* visit the keep-alives */
	ir_node *end = get_irg_end(irg);
	for (int i = get_End_n_keepalives(end); i-- > 0; ) {
		ir_node *ka = get_End_keepalive(end, i);

		if (is_Phi(ka) && !irn_visited(ka))
			dfs(ka, env);
	}
}

/**
 * Optimize Loads/Stores in loops.
 *
 * @param irg  the graph
 */
static changes_t optimize_loops(ir_graph *irg)
{
	loop_env env = { .stack = NEW_ARR_F(ir_node*, 128), .changes = NO_CHANGES };
	ir_nodehashmap_init(&env.map);
	obstack_init(&env.obst);

	/* calculate the SCC's and drive loop optimization. */
	do_dfs(irg, &env);

	DEL_ARR_F(env.stack);
	obstack_free(&env.obst, NULL);
	ir_nodehashmap_destroy(&env.map);

	return env.changes;
}

static ir_node *get_ptr(const ir_node *node)
{
	if (is_Store(node))
		return get_Store_ptr(node);
	if (is_Load(node))
		return get_Load_ptr(node);
	return NULL;
}

static int cmp_ptrs(const void *p0, const void *p1)
{
	ir_node *const *n0 = (ir_node*const *)p0;
	ir_node *const *n1 = (ir_node*const *)p1;

	ir_node *ptr0 = get_ptr(skip_Proj(*n0));
	ir_node *ptr1 = get_ptr(skip_Proj(*n1));
	if (ptr0 == NULL || ptr1 == NULL) {
		long num0 = get_irn_node_nr(*n0);
		long num1 = get_irn_node_nr(*n1);
		return (num0 > num1) - (num0 < num1);
	}

	base_offset_t bo0;
	base_offset_t bo1;
	get_base_and_offset(ptr0, &bo0);
	get_base_and_offset(ptr1, &bo1);
	if (bo0.base != bo1.base)
		return (bo0.base > bo1.base) - (bo0.base < bo1.base);
	return (bo0.offset > bo1.offset) - (bo0.offset < bo1.offset);
}

static void combine_memop(ir_node *sync, void *data)
{
	(void)data;
	if (!is_Sync(sync))
		return;

	unsigned   pointer_size = ir_target_pointer_size();
	int        n_preds      = get_Sync_n_preds(sync);
	ir_node  **new_in       = ALLOCAN(ir_node*, n_preds);
	MEMCPY(new_in, get_irn_in(sync), n_preds);

	QSORT(new_in, n_preds, cmp_ptrs);

	bool changed = false;
	for (int i = 0; i < n_preds-1; ++i) {
again:;
		ir_node *pred0 = new_in[i];
		if (pred0 == NULL)
			continue;

		ir_node *skipped = skip_Proj(pred0);
		ir_node *store0  = skipped;
		if (is_Store(store0)) {
			if (get_Store_volatility(store0) == volatility_is_volatile
			 || ir_throws_exception(store0))
				continue;
			ir_node *store_val  = get_Store_value(store0);
			ir_mode *store_mode = get_irn_mode(store_val);
			if (get_mode_arithmetic(store_mode) != irma_twos_complement)
				continue;
			unsigned store_size = get_mode_size_bytes(store_mode);
			if (store_size >= pointer_size || !is_po2_or_zero(store_size))
				continue;
			ir_mode *mode_unsigned = find_unsigned_mode(store_mode);
			if (mode_unsigned == NULL)
				continue;
			ir_mode *double_mode = find_double_bits_int_mode(mode_unsigned);
			if (double_mode == NULL)
				continue;

			ir_node *store_ptr0 = get_Store_ptr(store0);
			base_offset_t base0;
			get_base_and_offset(store_ptr0, &base0);
			ir_node *mem = get_Store_mem(store0);

			/* found a candidate, the only store we can merge with
			 * must be the next in the list */
			ir_node *pred1 = NULL;
			int i2 = i + 1;
			for ( ; i2 < n_preds; ++i2) {
				pred1 = new_in[i2];
				if (pred1 != NULL)
					break;
			}
			if (pred1 == NULL)
				continue;
			ir_node *store1 = skip_Proj(pred1);
			if (!is_Store(store1)
			 || get_Store_volatility(store1) == volatility_is_volatile
			 || ir_throws_exception(store1)
			 || get_Store_mem(store1) != mem)
				continue;
			ir_node *store_val1  = get_Store_value(store1);
			ir_mode *store_mode1 = get_irn_mode(store_val1);
			if (get_mode_arithmetic(store_mode1) != irma_twos_complement
			 || get_mode_size_bytes(store_mode1) != store_size)
				continue;

			ir_node *store_ptr1 = get_Store_ptr(store1);
			base_offset_t base1;
			get_base_and_offset(store_ptr1, &base1);
			if (base0.base   != base1.base
			 || base1.offset != base0.offset + (long)(store_size))
				continue;

			/* sort values according to endianess */
			if (ir_target_big_endian()) {
				ir_node *tmp = store_val;
				store_val  = store_val1;
				store_val1 = tmp;
			}

			/* Abort optimisation if we can't guarantee that the extra
			 * arithmetic code below will disappear. */
			if (!is_Const(store_val1)) {
				if (!is_Shr(store_val1))
					continue;
				ir_node *shiftval = get_Shr_right(store_val1);
				if (!is_Const(shiftval))
					continue;
				ir_tarval *tv = get_Const_tarval(shiftval);
				if (!tarval_is_long(tv)
				    || get_tarval_long(tv) != (long)store_size*8)
					continue;
			}

			/* Combine types if necessary */
			ir_type *type0 = get_Store_type(store0);
			ir_type *type1 = get_Store_type(store1);
			ir_type *type;
			if (type0 != type1) {
				/* Construct an anonymous struct type
				 * for the combined Store.
				 */
				ident *struct_id = id_unique("__combined_Store");
				type = new_type_struct(struct_id);
				ident *member0_id = id_unique("__Store_part");
				new_entity(type, member0_id, type0);
				ident *member1_id = id_unique("__Store_part");
				new_entity(type, member1_id, type1);
			} else {
				type = type0;
			}

			/* combine values */
			dbg_info *dbgi   = get_irn_dbg_info(store0);
			ir_node  *block  = get_nodes_block(store0);
			ir_graph *irg    = get_irn_irg(store0);
			ir_node  *convu0 = new_r_Conv(block, store_val, mode_unsigned);
			ir_node  *conv0  = new_r_Conv(block, convu0, double_mode);
			ir_node  *convu1 = new_r_Conv(block, store_val1, mode_unsigned);
			ir_node  *conv1  = new_r_Conv(block, convu1, double_mode);
			ir_node  *cnst   = new_r_Const_long(irg, mode_Iu, store_size*8);
			ir_node  *shl    = new_r_Shl(block, conv1, cnst);
			ir_node  *or     = new_r_Or(block, conv0, shl);

			/* create a new store and replace the two small stores */
			ir_cons_flags flags = cons_unaligned;
			if (!get_irn_pinned(store0) || !get_irn_pinned(store1))
				flags |= cons_floats;
			ir_node *new_store
				= new_rd_Store(dbgi, block, mem, store_ptr0, or, type, flags);
			exchange(store0, new_store);
			exchange(store1, new_store);
			new_in[i]  = pred0;
			new_in[i2] = NULL;
			/* restart detection loop */
			i       = 0;
			changed = true;
			goto again;
		}
	}

	if (changed) {
		/* compact new_in array (remove NULL entries) */
		int new_arity = 0;
		for (int i = 0; i < n_preds; ++i) {
			ir_node *in = new_in[i];
			if (in != NULL)
				new_in[new_arity++] = in;
		}

		if (new_arity == 1)
			exchange(sync, new_in[0]);
		else
			set_irn_in(sync, new_arity, new_in);
	}
}

void combine_memops(ir_graph *irg)
{
	/* We don't have code yet to test whether the address is aligned for the
	 * combined modes, so we can only do this if the backend supports unaligned
	 * stores. */
	if (!ir_target.fast_unaligned_memaccess)
		return;

	irg_walk_graph(irg, combine_memop, NULL, NULL);
}

void optimize_load_store(ir_graph *irg)
{
	assure_irg_properties(irg, IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
	                         | IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES
	                         | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
	                         | IR_GRAPH_PROPERTY_NO_TUPLES
	                         | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
	                         | IR_GRAPH_PROPERTY_CONSISTENT_POSTDOMINANCE
	                         | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);

	FIRM_DBG_REGISTER(dbg, "firm.opt.ldstopt");

	assert(get_irg_pinned(irg) != op_pin_state_floats);

	const ir_disambiguator_options opts =
		get_irg_memory_disambiguator_options(irg);
	if ((opts & aa_opt_always_alias) == 0) {
		assure_irp_globals_entity_usage_computed();
	}

	walk_env_t env = { .changes = NO_CHANGES };
	obstack_init(&env.obst);

	/* init the links, then collect Loads/Stores/Proj's in lists */
	ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
	master_visited = 0;
	irg_walk_graph(irg, firm_clear_link, collect_nodes, &env);

	/* now we have collected enough information, optimize */
	irg_walk_graph(irg, NULL, do_load_store_optimize, &env);

	/* optimize_load can introduce dead stores. They are
	 * eliminated now. */
	clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
	assure_irg_entity_usage_computed(irg);
	irg_walk_graph(irg, NULL, do_eliminate_dead_stores, &env);

	env.changes |= optimize_loops(irg);
	ir_free_resources(irg, IR_RESOURCE_IRN_LINK);

	obstack_free(&env.obst, NULL);

	confirm_irg_properties(irg,
		env.changes == NO_CHANGES ? IR_GRAPH_PROPERTIES_ALL :
		(env.changes & CF_CHANGED) != 0 ? IR_GRAPH_PROPERTIES_NONE :
		(env.changes & DF_CHANGED) != 0 ? IR_GRAPH_PROPERTIES_CONTROL_FLOW :
		/*NODES_CREATED*/ IR_GRAPH_PROPERTIES_CONTROL_FLOW
		| IR_GRAPH_PROPERTY_NO_BADS | IR_GRAPH_PROPERTY_NO_TUPLES
		| IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE
		| IR_GRAPH_PROPERTY_MANY_RETURNS);
}