From 07b54cd51193883a1a4f8e1c1dd66d3d55a47bbe Mon Sep 17 00:00:00 2001 From: Loïc Guégan Date: Fri, 26 Sep 2025 20:03:04 +0200 Subject: Add code --- .gitignore | 1 + Makefile | 14 +++ avx_results | 100 ++++++++++++++++++ avx_results.txt | 301 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ bench.sh | 26 +++++ main.c | 55 ++++++++++ noavx_results.txt | 301 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 798 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 avx_results create mode 100644 avx_results.txt create mode 100755 bench.sh create mode 100644 main.c create mode 100644 noavx_results.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e4bd7b3 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +main_* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0d3f6ab --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ + + +all: main_noavx main_avx + +main_noavx: main.c + gcc -mno-avx -fno-tree-vectorize $^ -o $@ + +main_avx: main.c + gcc -mavx2 -D USE_VECTOR $^ -o $@ + +clean: + @rm main_avx main_noavx + +.PHONY: clean diff --git a/avx_results b/avx_results new file mode 100644 index 0000000..19c5969 --- /dev/null +++ b/avx_results @@ -0,0 +1,100 @@ +Vectorize... done! (duration=0.007359s) +Vectorize... done! (duration=0.005866s) +Vectorize... done! (duration=0.004919s) +Vectorize... done! (duration=0.005684s) +Vectorize... done! (duration=0.005440s) +Vectorize... done! (duration=0.005136s) +Vectorize... done! (duration=0.005211s) +Vectorize... done! (duration=0.006229s) +Vectorize... done! (duration=0.004816s) +Vectorize... done! (duration=0.004676s) +Vectorize... done! (duration=0.004297s) +Vectorize... done! (duration=0.004617s) +Vectorize... done! (duration=0.003972s) +Vectorize... done! (duration=0.003814s) +Vectorize... done! (duration=0.004041s) +Vectorize... done! (duration=0.004847s) +Vectorize... done! (duration=0.004813s) +Vectorize... done! (duration=0.004191s) +Vectorize... done! (duration=0.004584s) +Vectorize... done! (duration=0.003710s) +Vectorize... done! (duration=0.004831s) +Vectorize... done! (duration=0.004228s) +Vectorize... done! (duration=0.004211s) +Vectorize... done! (duration=0.004488s) +Vectorize... done! (duration=0.004527s) +Vectorize... done! (duration=0.004185s) +Vectorize... done! (duration=0.004367s) +Vectorize... done! (duration=0.004283s) +Vectorize... done! (duration=0.004272s) +Vectorize... done! (duration=0.004480s) +Vectorize... done! (duration=0.004116s) +Vectorize... done! (duration=0.004024s) +Vectorize... done! (duration=0.003890s) +Vectorize... done! (duration=0.003864s) +Vectorize... done! (duration=0.003799s) +Vectorize... done! (duration=0.003456s) +Vectorize... done! (duration=0.004407s) +Vectorize... done! (duration=0.004296s) +Vectorize... done! (duration=0.003905s) +Vectorize... done! (duration=0.004406s) +Vectorize... done! (duration=0.004389s) +Vectorize... done! (duration=0.004518s) +Vectorize... done! (duration=0.004573s) +Vectorize... done! (duration=0.003854s) +Vectorize... done! (duration=0.004592s) +Vectorize... done! (duration=0.004972s) +Vectorize... done! (duration=0.004391s) +Vectorize... done! (duration=0.003410s) +Vectorize... done! (duration=0.004195s) +Vectorize... done! (duration=0.003982s) +Vectorize... done! (duration=0.002699s) +Vectorize... done! (duration=0.003477s) +Vectorize... done! (duration=0.004454s) +Vectorize... done! (duration=0.003632s) +Vectorize... done! (duration=0.004496s) +Vectorize... done! (duration=0.004199s) +Vectorize... done! (duration=0.004514s) +Vectorize... done! (duration=0.004048s) +Vectorize... done! (duration=0.003613s) +Vectorize... done! (duration=0.004315s) +Vectorize... done! (duration=0.004370s) +Vectorize... done! (duration=0.004275s) +Vectorize... done! (duration=0.003013s) +Vectorize... done! (duration=0.004564s) +Vectorize... done! (duration=0.004233s) +Vectorize... done! (duration=0.004555s) +Vectorize... done! (duration=0.003330s) +Vectorize... done! (duration=0.003939s) +Vectorize... done! (duration=0.004354s) +Vectorize... done! (duration=0.004415s) +Vectorize... done! (duration=0.003795s) +Vectorize... done! (duration=0.002989s) +Vectorize... done! (duration=0.004349s) +Vectorize... done! (duration=0.003815s) +Vectorize... done! (duration=0.003920s) +Vectorize... done! (duration=0.004219s) +Vectorize... done! (duration=0.003809s) +Vectorize... done! (duration=0.004418s) +Vectorize... done! (duration=0.003735s) +Vectorize... done! (duration=0.004354s) +Vectorize... done! (duration=0.004488s) +Vectorize... done! (duration=0.004474s) +Vectorize... done! (duration=0.003238s) +Vectorize... done! (duration=0.004283s) +Vectorize... done! (duration=0.004002s) +Vectorize... done! (duration=0.004600s) +Vectorize... done! (duration=0.004231s) +Vectorize... done! (duration=0.004454s) +Vectorize... done! (duration=0.003981s) +Vectorize... done! (duration=0.004575s) +Vectorize... done! (duration=0.004447s) +Vectorize... done! (duration=0.004461s) +Vectorize... done! (duration=0.003770s) +Vectorize... done! (duration=0.004422s) +Vectorize... done! (duration=0.004662s) +Vectorize... done! (duration=0.003447s) +Vectorize... done! (duration=0.004445s) +Vectorize... done! (duration=0.003966s) +Vectorize... done! (duration=0.004525s) +Vectorize... done! (duration=0.004356s) diff --git a/avx_results.txt b/avx_results.txt new file mode 100644 index 0000000..33da200 --- /dev/null +++ b/avx_results.txt @@ -0,0 +1,301 @@ + +Vectorize... done! (duration=0.003413s) +Vectorize... done! (duration=0.005341s) +Vectorize... done! (duration=0.004528s) +Vectorize... done! (duration=0.003938s) +Vectorize... done! (duration=0.004752s) +Vectorize... done! (duration=0.004774s) +Vectorize... done! (duration=0.004778s) +Vectorize... done! (duration=0.005604s) +Vectorize... done! (duration=0.005156s) +Vectorize... done! (duration=0.005589s) +Vectorize... done! (duration=0.005635s) +Vectorize... done! (duration=0.005929s) +Vectorize... done! (duration=0.005110s) +Vectorize... done! (duration=0.004883s) +Vectorize... done! (duration=0.004130s) +Vectorize... done! (duration=0.004543s) +Vectorize... done! (duration=0.003638s) +Vectorize... done! (duration=0.002249s) +Vectorize... done! (duration=0.004053s) +Vectorize... done! (duration=0.003511s) +Vectorize... done! (duration=0.003905s) +Vectorize... done! (duration=0.003611s) +Vectorize... done! (duration=0.002495s) +Vectorize... done! (duration=0.003872s) +Vectorize... done! (duration=0.003388s) +Vectorize... done! (duration=0.003415s) +Vectorize... done! (duration=0.003442s) +Vectorize... done! (duration=0.002360s) +Vectorize... done! (duration=0.004072s) +Vectorize... done! (duration=0.003574s) +Vectorize... done! (duration=0.003694s) +Vectorize... done! (duration=0.004275s) +Vectorize... done! (duration=0.003158s) +Vectorize... done! (duration=0.004245s) +Vectorize... done! (duration=0.004015s) +Vectorize... done! (duration=0.003969s) +Vectorize... done! (duration=0.004378s) +Vectorize... done! (duration=0.004507s) +Vectorize... done! (duration=0.004485s) +Vectorize... done! (duration=0.004451s) +Vectorize... done! (duration=0.004669s) +Vectorize... done! (duration=0.004866s) +Vectorize... done! (duration=0.004372s) +Vectorize... done! (duration=0.004722s) +Vectorize... done! (duration=0.003550s) +Vectorize... done! (duration=0.004524s) +Vectorize... done! (duration=0.004954s) +Vectorize... done! (duration=0.004355s) +Vectorize... done! (duration=0.003765s) +Vectorize... done! (duration=0.003975s) +Vectorize... done! (duration=0.003201s) +Vectorize... done! (duration=0.002842s) +Vectorize... done! (duration=0.003985s) +Vectorize... done! (duration=0.003989s) +Vectorize... done! (duration=0.004549s) +Vectorize... done! (duration=0.004641s) +Vectorize... done! (duration=0.004851s) +Vectorize... done! (duration=0.004524s) +Vectorize... done! (duration=0.004511s) +Vectorize... done! (duration=0.004544s) +Vectorize... done! (duration=0.004619s) +Vectorize... done! (duration=0.003695s) +Vectorize... done! (duration=0.004732s) +Vectorize... done! (duration=0.004472s) +Vectorize... done! (duration=0.004373s) +Vectorize... done! (duration=0.003653s) +Vectorize... done! (duration=0.004605s) +Vectorize... done! (duration=0.004603s) +Vectorize... done! (duration=0.004426s) +Vectorize... done! (duration=0.004426s) +Vectorize... done! (duration=0.004007s) +Vectorize... done! (duration=0.003722s) +Vectorize... done! (duration=0.004445s) +Vectorize... done! (duration=0.003771s) +Vectorize... done! (duration=0.004416s) +Vectorize... done! (duration=0.004592s) +Vectorize... done! (duration=0.004406s) +Vectorize... done! (duration=0.004185s) +Vectorize... done! (duration=0.004440s) +Vectorize... done! (duration=0.004805s) +Vectorize... done! (duration=0.004380s) +Vectorize... done! (duration=0.004320s) +Vectorize... done! (duration=0.003935s) +Vectorize... done! (duration=0.004378s) +Vectorize... done! (duration=0.003521s) +Vectorize... done! (duration=0.003980s) +Vectorize... done! (duration=0.004078s) +Vectorize... done! (duration=0.004335s) +Vectorize... done! (duration=0.004367s) +Vectorize... done! (duration=0.003853s) +Vectorize... done! (duration=0.004274s) +Vectorize... done! (duration=0.004163s) +Vectorize... done! (duration=0.004377s) +Vectorize... done! (duration=0.004306s) +Vectorize... done! (duration=0.003940s) +Vectorize... done! (duration=0.004709s) +Vectorize... done! (duration=0.004442s) +Vectorize... done! (duration=0.003731s) +Vectorize... done! (duration=0.004382s) +Vectorize... done! (duration=0.004409s) +Vectorize... done! (duration=0.004501s) +Vectorize... done! (duration=0.004296s) +Vectorize... done! (duration=0.004065s) +Vectorize... done! (duration=0.004383s) +Vectorize... done! (duration=0.004623s) +Vectorize... done! (duration=0.004011s) +Vectorize... done! (duration=0.003600s) +Vectorize... done! (duration=0.005209s) +Vectorize... done! (duration=0.002896s) +Vectorize... done! (duration=0.003643s) +Vectorize... done! (duration=0.004068s) +Vectorize... done! (duration=0.003695s) +Vectorize... done! (duration=0.003646s) +Vectorize... done! (duration=0.004999s) +Vectorize... done! (duration=0.004775s) +Vectorize... done! (duration=0.004024s) +Vectorize... done! (duration=0.003658s) +Vectorize... done! (duration=0.003096s) +Vectorize... done! (duration=0.003532s) +Vectorize... done! (duration=0.002951s) +Vectorize... done! (duration=0.003903s) +Vectorize... done! (duration=0.002921s) +Vectorize... done! (duration=0.003337s) +Vectorize... done! (duration=0.003996s) +Vectorize... done! (duration=0.003484s) +Vectorize... done! (duration=0.004574s) +Vectorize... done! (duration=0.004216s) +Vectorize... done! (duration=0.004824s) +Vectorize... done! (duration=0.004035s) +Vectorize... done! (duration=0.004712s) +Vectorize... done! (duration=0.004408s) +Vectorize... done! (duration=0.003769s) +Vectorize... done! (duration=0.004317s) +Vectorize... done! (duration=0.003630s) +Vectorize... done! (duration=0.003282s) +Vectorize... done! (duration=0.002978s) +Vectorize... done! (duration=0.004519s) +Vectorize... done! (duration=0.004255s) +Vectorize... done! (duration=0.004389s) +Vectorize... done! (duration=0.003687s) +Vectorize... done! (duration=0.003543s) +Vectorize... done! (duration=0.003279s) +Vectorize... done! (duration=0.003054s) +Vectorize... done! (duration=0.003953s) +Vectorize... done! (duration=0.003603s) +Vectorize... done! (duration=0.003522s) +Vectorize... done! (duration=0.002986s) +Vectorize... done! (duration=0.003935s) +Vectorize... done! (duration=0.003646s) +Vectorize... done! (duration=0.003624s) +Vectorize... done! (duration=0.003093s) +Vectorize... done! (duration=0.002718s) +Vectorize... done! (duration=0.003953s) +Vectorize... done! (duration=0.003622s) +Vectorize... done! (duration=0.003475s) +Vectorize... done! (duration=0.003473s) +Vectorize... done! (duration=0.003971s) +Vectorize... done! (duration=0.002957s) +Vectorize... done! (duration=0.002362s) +Vectorize... done! (duration=0.003973s) +Vectorize... done! (duration=0.004079s) +Vectorize... done! (duration=0.003271s) +Vectorize... done! (duration=0.004418s) +Vectorize... done! (duration=0.004043s) +Vectorize... done! (duration=0.004048s) +Vectorize... done! (duration=0.002783s) +Vectorize... done! (duration=0.004078s) +Vectorize... done! (duration=0.003341s) +Vectorize... done! (duration=0.003959s) +Vectorize... done! (duration=0.002739s) +Vectorize... done! (duration=0.002839s) +Vectorize... done! (duration=0.004114s) +Vectorize... done! (duration=0.003298s) +Vectorize... done! (duration=0.003963s) +Vectorize... done! (duration=0.003629s) +Vectorize... done! (duration=0.004601s) +Vectorize... done! (duration=0.004087s) +Vectorize... done! (duration=0.002929s) +Vectorize... done! (duration=0.004106s) +Vectorize... done! (duration=0.003195s) +Vectorize... done! (duration=0.003082s) +Vectorize... done! (duration=0.003973s) +Vectorize... done! (duration=0.003121s) +Vectorize... done! (duration=0.003899s) +Vectorize... done! (duration=0.003633s) +Vectorize... done! (duration=0.004067s) +Vectorize... done! (duration=0.004078s) +Vectorize... done! (duration=0.004086s) +Vectorize... done! (duration=0.004623s) +Vectorize... done! (duration=0.004267s) +Vectorize... done! (duration=0.004103s) +Vectorize... done! (duration=0.004231s) +Vectorize... done! (duration=0.004492s) +Vectorize... done! (duration=0.005434s) +Vectorize... done! (duration=0.003771s) +Vectorize... done! (duration=0.004411s) +Vectorize... done! (duration=0.004414s) +Vectorize... done! (duration=0.004208s) +Vectorize... done! (duration=0.004385s) +Vectorize... done! (duration=0.004233s) +Vectorize... done! (duration=0.004333s) +Vectorize... done! (duration=0.003242s) +Vectorize... done! (duration=0.004103s) +Vectorize... done! (duration=0.004466s) +Vectorize... done! (duration=0.003531s) +Vectorize... done! (duration=0.003920s) +Vectorize... done! (duration=0.004078s) +Vectorize... done! (duration=0.003740s) +Vectorize... done! (duration=0.004402s) +Vectorize... done! (duration=0.003880s) +Vectorize... done! (duration=0.004434s) +Vectorize... done! (duration=0.004415s) +Vectorize... done! (duration=0.003994s) +Vectorize... done! (duration=0.004477s) +Vectorize... done! (duration=0.003319s) +Vectorize... done! (duration=0.003887s) +Vectorize... done! (duration=0.004393s) +Vectorize... done! (duration=0.004175s) +Vectorize... done! (duration=0.004351s) +Vectorize... done! (duration=0.003499s) +Vectorize... done! (duration=0.002867s) +Vectorize... done! (duration=0.004242s) +Vectorize... done! (duration=0.003411s) +Vectorize... done! (duration=0.003124s) +Vectorize... done! (duration=0.003072s) +Vectorize... done! (duration=0.003167s) +Vectorize... done! (duration=0.003424s) +Vectorize... done! (duration=0.005039s) +Vectorize... done! (duration=0.004751s) +Vectorize... done! (duration=0.004337s) +Vectorize... done! (duration=0.005358s) +Vectorize... done! (duration=0.004834s) +Vectorize... done! (duration=0.004378s) +Vectorize... done! (duration=0.004672s) +Vectorize... done! (duration=0.004463s) +Vectorize... done! (duration=0.004789s) +Vectorize... done! (duration=0.003595s) +Vectorize... done! (duration=0.004759s) +Vectorize... done! (duration=0.004140s) +Vectorize... done! (duration=0.004624s) +Vectorize... done! (duration=0.004699s) +Vectorize... done! (duration=0.004596s) +Vectorize... done! (duration=0.004245s) +Vectorize... done! (duration=0.004705s) +Vectorize... done! (duration=0.004152s) +Vectorize... done! (duration=0.004245s) +Vectorize... done! (duration=0.004698s) +Vectorize... done! (duration=0.004611s) +Vectorize... done! (duration=0.004152s) +Vectorize... done! (duration=0.004151s) +Vectorize... done! (duration=0.004485s) +Vectorize... done! (duration=0.003275s) +Vectorize... done! (duration=0.003682s) +Vectorize... done! (duration=0.003539s) +Vectorize... done! (duration=0.002899s) +Vectorize... done! (duration=0.004004s) +Vectorize... done! (duration=0.004313s) +Vectorize... done! (duration=0.003605s) +Vectorize... done! (duration=0.003180s) +Vectorize... done! (duration=0.004011s) +Vectorize... done! (duration=0.003907s) +Vectorize... done! (duration=0.003931s) +Vectorize... done! (duration=0.003704s) +Vectorize... done! (duration=0.003561s) +Vectorize... done! (duration=0.003083s) +Vectorize... done! (duration=0.002998s) +Vectorize... done! (duration=0.003927s) +Vectorize... done! (duration=0.003800s) +Vectorize... done! (duration=0.003962s) +Vectorize... done! (duration=0.002957s) +Vectorize... done! (duration=0.003754s) +Vectorize... done! (duration=0.002612s) +Vectorize... done! (duration=0.003960s) +Vectorize... done! (duration=0.003402s) +Vectorize... done! (duration=0.003533s) +Vectorize... done! (duration=0.003780s) +Vectorize... done! (duration=0.003097s) +Vectorize... done! (duration=0.003860s) +Vectorize... done! (duration=0.003379s) +Vectorize... done! (duration=0.003951s) +Vectorize... done! (duration=0.002655s) +Vectorize... done! (duration=0.002800s) +Vectorize... done! (duration=0.004117s) +Vectorize... done! (duration=0.003426s) +Vectorize... done! (duration=0.003835s) +Vectorize... done! (duration=0.003999s) +Vectorize... done! (duration=0.004407s) +Vectorize... done! (duration=0.002871s) +Vectorize... done! (duration=0.003666s) +Vectorize... done! (duration=0.004444s) +Vectorize... done! (duration=0.004659s) +Vectorize... done! (duration=0.002631s) +Vectorize... done! (duration=0.004117s) +Vectorize... done! (duration=0.003680s) +Vectorize... done! (duration=0.003382s) +Vectorize... done! (duration=0.003537s) +Vectorize... done! (duration=0.003991s) +Vectorize... done! (duration=0.003182s) +Vectorize... done! (duration=0.003520s) +Vectorize... done! (duration=0.003987s) diff --git a/bench.sh b/bench.sh new file mode 100755 index 0000000..e63c790 --- /dev/null +++ b/bench.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +iter=300 + +echo "Compiling..." +make clean +make + + +echo "Launching NOAVX..." +echo > noavx_results.txt +for i in $(seq 1 $iter) +do + ./main_noavx >> noavx_results.txt +done + +echo "Launching AVX..." +echo > avx_results.txt +for i in $(seq 1 $iter) +do + ./main_avx >> avx_results.txt +done + + +echo "noavx avg=" $(cat noavx_results.txt |grep -Eo "[0-9]+.[0-9]+"|awk 'BEGIN{A=0}{A=A+$1}END{print(A/'$iter')}') +echo "avx avg=" $(cat avx_results.txt |grep -Eo "[0-9]+.[0-9]+"|awk 'BEGIN{A=0}{A=A+$1}END{print(A/'$iter')}') diff --git a/main.c b/main.c new file mode 100644 index 0000000..99a0eca --- /dev/null +++ b/main.c @@ -0,0 +1,55 @@ +#ifdef USE_VECTOR +#include +#endif +#include +#include +#include + +#define STRIDE (256/32) // How many integers fit in a 256 vector register +#define VSIZE (STRIDE*200000) + +int main(int argc, char *argv[]) { + + float start, end, duration; + + int *v1; + posix_memalign((void**)&v1, 32, VSIZE * sizeof(int)); + int *v2; + posix_memalign((void**)&v2, 32, VSIZE * sizeof(int)); + int result[VSIZE]; + +#ifndef USE_VECTOR + printf("Sequential.."); + start = (float)clock()/CLOCKS_PER_SEC; + for(int i=0;i