Presentations
MMX 1996
3DNow! 1998
SSE 1999
SSE2 2001
SSE3 2004
SSSE3 2006
SSE4 2006
SSE5 2007
AVX 2008
F16C 2009
XOP 2009
FMA4 2011
FMA3 2012
AVX2 2013
AVX-512 2015
AMX 2022
MAX-1 Multimedia Acceleration eXtensions v1 HP-PA RISC
MAX-2 Multimedia Acceleration eXtensions v2 HP-PA RISC
VIS 1 Visual Instruction v1 Set SPARC V9
VIS 2 Visual Instruction v2 Set SPARC V9
AltiVec (obchodní názvy Velocity Engine, VMX) PowerPC
MDMX MIPS Digital Media eXtension (MaDMaX) MIPS
MIPS-3D MIPS-3D MIPS
MVI Motion Video Instructions DEC Alpha
NEON Advanced SIMD Cortex (ARMv7, ARMv8)
Packed SIMD Packed SIMD RISC-V
Vector Set Vector Set RISC-V
SVE Scalable Vector Extension ARMv8.2-A and newer
#include <stdio.h>
typedef unsigned short int v16us __attribute__((vector_size(16)));
int main(void)
{
printf("scalar: %ld bytes\n", sizeof(unsigned short int));
printf("vector: %ld bytes\n", sizeof(v16us));
return 0;
}
#include <stdio.h>
typedef unsigned char v16ub __attribute__((vector_size(16)));
typedef unsigned short int v16us __attribute__((vector_size(16)));
typedef unsigned int v16ui __attribute__((vector_size(16)));
typedef unsigned long int v16ul __attribute__((vector_size(16)));
int main(void)
{
printf("unsigned char: %ld bytes\n", sizeof(unsigned char));
printf("unsigned short: %ld bytes\n", sizeof(unsigned short int));
printf("unsigned int: %ld bytes\n", sizeof(unsigned int));
printf("unsigned long: %ld bytes\n", sizeof(unsigned long int));
printf("vector unsigned char: %ld bytes\n", sizeof(v16ub));
printf("vector unsigned short: %ld bytes\n", sizeof(v16us));
printf("vector unsigned int: %ld bytes\n", sizeof(v16ui));
printf("vector unsigned long: %ld bytes\n", sizeof(v16ul));
return 0;
}
#include <stdio.h>
typedef signed char v16ub __attribute__((vector_size(16)));
typedef signed short int v16us __attribute__((vector_size(16)));
typedef signed int v16ui __attribute__((vector_size(16)));
typedef signed long int v16ul __attribute__((vector_size(16)));
int main(void)
{
{
v16ub x = { 1, 2, 3, 4, 5, 6, 7, 8 };
v16ub y = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
v16ub z = x + y;
}
{
v16us x = { 1, 2, 3, 4, 5, 6, 7, 8 };
v16us y = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
v16us z = x + y;
}
{
v16ui x = { 1, 2, 3, 4 };
v16ui y = { 0xff, 0xff, 0xff, 0xff };
v16ui z = x + y;
}
{
v16ul x = { 1, 2 };
v16ul y = { 0xff, 0xff };
v16ul z = x + y;
}
return 0;
}
#include <stdio.h>
typedef unsigned short int v16us __attribute__((vector_size(16)));
int main(void)
{
v16us x = { 1, 2, 3, 4, 5, 6, 7, 8 };
v16us y = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
v16us z = x + y;
int i;
for (i = 0; i < 8; i++) {
printf("%d %d\n", i, z[i]);
}
return 0;
}
typedef float v1024f __attribute__((vector_size(1024)));
void addVectors(v1024f * x, v1024f * y, v1024f * z)
{
*z = *x + *y;
}
int main(void)
{
v1024f x = { 1.0 };
v1024f y = { 1.0 };
v1024f z;
addVectors(&x, &y, &z);
return 0;
}
#include <stdio.h>
typedef signed char v16ib __attribute__((vector_size(16)));
void add16ib(v16ib x, v16ib y, v16ib * z)
{
*z = x + y;
}
void sub16ib(v16ib x, v16ib y, v16ib * z)
{
*z = x - y;
}
void mul16ib(v16ib x, v16ib y, v16ib * z)
{
*z = x * y;
}
void div16ib(v16ib x, v16ib y, v16ib * z)
{
*z = x / y;
}
void mod16ib(v16ib x, v16ib y, v16ib * z)
{
*z = x % y;
}
void and16ib(v16ib x, v16ib y, v16ib * z)
{
*z = x & y;
}
void or16ib(v16ib x, v16ib y, v16ib * z)
{
*z = x | y;
}
void xor16ib(v16ib x, v16ib y, v16ib * z)
{
*z = x ^ y;
}
void rshift16ib(v16ib x, v16ib y, v16ib * z)
{
*z = x >> y;
}
void lshift16ib(v16ib x, v16ib y, v16ib * z)
{
*z = x << y;
}
void print_vectors(const char *message, const char *op, v16ib * x,
v16ib * y, v16ib * z)
{
int i;
puts(message);
for (i = 0; i < sizeof(v16ib) / sizeof(signed char); i++) {
printf("%2d %d %s %d = %d\n", i, (*x)[i], op, (*y)[i], (*z)[i]);
}
putchar('\n');
}
int main(void)
{
v16ib x;
v16ib y;
v16ib z;
int i;
for (i = 0; i < sizeof(v16ib) / sizeof(signed char); i++) {
x[i] = i * 2;
y[i] = 16 - i;
}
add16ib(x, y, &z);
print_vectors("vector addition", "+", &x, &y, &z);
sub16ib(x, y, &z);
print_vectors("vector subtraction", "-", &x, &y, &z);
mul16ib(x, y, &z);
print_vectors("vector multiply", "*", &x, &y, &z);
div16ib(x, y, &z);
print_vectors("vector divide", "/", &x, &y, &z);
mod16ib(x, y, &z);
print_vectors("vector modulo", "%", &x, &y, &z);
and16ib(x, y, &z);
print_vectors("vector bitwise and", "&", &x, &y, &z);
or16ib(x, y, &z);
print_vectors("vector bitwise or", "|", &x, &y, &z);
xor16ib(x, y, &z);
print_vectors("vector bitwise xor", "^", &x, &y, &z);
rshift16ib(x, y, &z);
print_vectors("vector right shift", ">>", &x, &y, &z);
lshift16ib(x, y, &z);
print_vectors("vector left shift", "<<", &x, &y, &z);
return 0;
}
#include <stdio.h>
typedef float v16float __attribute__((vector_size(16)));
void add16float(v16float x, v16float y, v16float * z)
{
*z = x + y;
}
void sub16float(v16float x, v16float y, v16float * z)
{
*z = x - y;
}
void mul16float(v16float x, v16float y, v16float * z)
{
*z = x * y;
}
void div16float(v16float x, v16float y, v16float * z)
{
*z = x / y;
}
void print_vectors(const char *message, const char op, v16float * x,
v16float * y, v16float * z)
{
int i;
puts(message);
for (i = 0; i < sizeof(v16float) / sizeof(float); i++) {
printf("%2d %5.3f %c %5.3f = %5.3f\n", i, (*x)[i], op, (*y)[i],
(*z)[i]);
}
putchar('\n');
}
int main(void)
{
v16float x;
v16float y;
v16float z;
int i;
for (i = 0; i < sizeof(v16float) / sizeof(float); i++) {
x[i] = i;
y[i] = i + 0.1;
}
add16float(x, y, &z);
print_vectors("vector addition", '+', &x, &y, &z);
sub16float(x, y, &z);
print_vectors("vector subtraction", '-', &x, &y, &z);
mul16float(x, y, &z);
print_vectors("vector multiply", '*', &x, &y, &z);
div16float(x, y, &z);
print_vectors("vector divide", '/', &x, &y, &z);
return 0;
}