ST4 (single structure)
Store single 4-element structure from one lane of four registers
This instruction stores a 4-element structure to memory from
corresponding elements of four SIMD&FP registers.
Depending on the settings in the CPACR_EL1,
CPTR_EL2, and CPTR_EL3 registers,
and the current Security state and Exception level,
an attempt to execute the instruction might be trapped.
If PSTATE.DIT is 1, the timing of this instruction is insensitive to the value of the data being loaded or stored.
It has encodings from 2 classes:
No offset
and
Post-index
0
0
0
1
1
0
1
0
0
1
0
0
0
0
0
x
x
1
0
0
ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<index>], [<Xn|SP>]
0
1
x
0
ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<index>], [<Xn|SP>]
1
0
0
0
ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<index>], [<Xn|SP>]
1
0
0
0
1
ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<index>], [<Xn|SP>]
integer t = UInt(Rt);
constant integer n = UInt(Rn);
constant integer m = integer UNKNOWN;
constant boolean wback = FALSE;
constant boolean nontemporal = FALSE;
constant boolean tagchecked = wback || n != 31;
0
0
0
1
1
0
1
1
0
1
x
x
1
1
1
1
1
1
0
0
ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<index>], [<Xn|SP>], #4
N
N
N
N
N
0
0
ST4 { <Vt>.B, <Vt2>.B, <Vt3>.B, <Vt4>.B }[<index>], [<Xn|SP>], <Xm>
1
1
1
1
1
0
1
x
0
ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<index>], [<Xn|SP>], #8
N
N
N
N
N
0
1
x
0
ST4 { <Vt>.H, <Vt2>.H, <Vt3>.H, <Vt4>.H }[<index>], [<Xn|SP>], <Xm>
1
1
1
1
1
1
0
0
0
ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<index>], [<Xn|SP>], #16
N
N
N
N
N
1
0
0
0
ST4 { <Vt>.S, <Vt2>.S, <Vt3>.S, <Vt4>.S }[<index>], [<Xn|SP>], <Xm>
1
1
1
1
1
1
0
0
0
1
ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<index>], [<Xn|SP>], #32
N
N
N
N
N
1
0
0
0
1
ST4 { <Vt>.D, <Vt2>.D, <Vt3>.D, <Vt4>.D }[<index>], [<Xn|SP>], <Xm>
integer t = UInt(Rt);
constant integer n = UInt(Rn);
constant integer m = UInt(Rm);
constant boolean wback = TRUE;
constant boolean nontemporal = FALSE;
constant boolean tagchecked = wback || n != 31;
<Vt>
Is the name of the first or only SIMD&FP register to be transferred, encoded in the "Rt" field.
<Vt2>
Is the name of the second SIMD&FP register to be transferred, encoded as "Rt" plus 1 modulo 32.
<Vt3>
Is the name of the third SIMD&FP register to be transferred, encoded as "Rt" plus 2 modulo 32.
<Vt4>
Is the name of the fourth SIMD&FP register to be transferred, encoded as "Rt" plus 3 modulo 32.
<index>
For the 8-bit variant: is the element index, encoded in "Q:S:size".
<index>
For the 16-bit variant: is the element index, encoded in "Q:S:size<1>".
<index>
For the 32-bit variant: is the element index, encoded in "Q:S".
<index>
For the 64-bit variant: is the element index, encoded in "Q".
<Xn|SP>
Is the 64-bit name of the general-purpose base register or stack pointer, encoded in the "Rn" field.
<Xm>
Is the 64-bit name of the general-purpose post-index register, excluding XZR, encoded in the "Rm" field.
bits(2) scale = opcode<2:1>;
constant integer selem = UInt(opcode<0>:R) + 1;
boolean replicate = FALSE;
integer index;
case scale of
when '11'
// load and replicate
if L == '0' || S == '1' then UNDEFINED;
scale = size;
replicate = TRUE;
when '00'
index = UInt(Q:S:size); // B[0-15]
when '01'
if size<0> == '1' then UNDEFINED;
index = UInt(Q:S:size<1>); // H[0-7]
when '10'
if size<1> == '1' then UNDEFINED;
if size<0> == '0' then
index = UInt(Q:S); // S[0-3]
else
if S == '1' then UNDEFINED;
index = UInt(Q); // D[0-1]
scale = '11';
constant integer datasize = 64 << UInt(Q);
constant integer esize = 8 << UInt(scale);
CheckFPAdvSIMDEnabled64();
bits(64) address;
bits(64) eaddr;
bits(64) offs;
bits(128) rval;
bits(esize) element;
constant integer ebytes = esize DIV 8;
constant boolean privileged = PSTATE.EL != EL0;
constant AccessDescriptor accdesc = CreateAccDescASIMD(MemOp_STORE, nontemporal, tagchecked,
privileged);
if n == 31 then
CheckSPAlignment();
address = SP[];
else
address = X[n, 64];
offs = Zeros(64);
if replicate then
// load and replicate to all elements
for s = 0 to selem-1
eaddr = AddressIncrement(address, offs, accdesc);
element = Mem[eaddr, ebytes, accdesc];
// replicate to fill 128- or 64-bit register
V[t, datasize] = Replicate(element, datasize DIV esize);
offs = offs + ebytes;
t = (t + 1) MOD 32;
else
// load/store one element per register
for s = 0 to selem-1
rval = V[t, 128];
eaddr = AddressIncrement(address, offs, accdesc);
// extract from one lane of 128-bit register
Mem[eaddr, ebytes, accdesc] = Elem[rval, index, esize];
offs = offs + ebytes;
t = ( t + 1 ) MOD 32;
if wback then
if m != 31 then
offs = X[m, 64];
address = AddressAdd(address, offs, accdesc);
if n == 31 then
SP[] = address;
else
X[n, 64] = address;