diff --git a/0000-bugfix-test-md5-check-failure-1.patch b/0000-bugfix-test-md5-check-failure-1.patch deleted file mode 100644 index 2e8b3d3c5a0316185315750bec8b98ad61824909..0000000000000000000000000000000000000000 --- a/0000-bugfix-test-md5-check-failure-1.patch +++ /dev/null @@ -1,551 +0,0 @@ -From b81a5095563776397a4239132d2b737a1083e02f Mon Sep 17 00:00:00 2001 -From: Wayne Davison -Date: Thu, 3 Mar 2022 17:00:57 -0800 -Subject: [PATCH] Make asm use more selectable - -- Make the SIMD ASM code off by default. Use configure --enable-simd-asm - to enable. -- Allow MD5 ASM code to be requested even when OpenSSL is handling MD4 - checksums. Use configure --enable-md5-asm to enable. ---- - Makefile.in | 15 ++-- - checksum.c | 34 ++++----- - lib/md5-asm-x86_64.S | 4 +- - lib/md5.c | 19 ++--- - lib/mdigest.h | 13 ++-- - rsync.h | 9 +-- - simd-checksum-avx2.S | 14 +++- - simd-checksum-x86_64.cpp | 151 ++++++++++++++++++++++++++++++++++++--- - 8 files changed, 198 insertions(+), 62 deletions(-) - -diff --git a/Makefile.in b/Makefile.in -index 8817edab..3cde9557 100644 ---- a/Makefile.in -+++ b/Makefile.in -@@ -30,8 +30,9 @@ SHELL=/bin/sh - .SUFFIXES: - .SUFFIXES: .c .o - --SIMD_x86_64=simd-checksum-x86_64.o simd-checksum-avx2.o --ASM_x86_64=lib/md5-asm-x86_64.o -+ROLL_SIMD_x86_64=simd-checksum-x86_64.o -+ROLL_ASM_x86_64=simd-checksum-avx2.o -+MD5_ASM_x86_64=lib/md5-asm-x86_64.o - - GENFILES=configure.sh aclocal.m4 config.h.in rsync.1 rsync.1.html \ - rsync-ssl.1 rsync-ssl.1.html rsyncd.conf.5 rsyncd.conf.5.html \ -@@ -46,7 +47,7 @@ OBJS1=flist.o rsync.o generator.o receiver.o cleanup.o sender.o exclude.o \ - util1.o util2.o main.o checksum.o match.o syscall.o log.o backup.o delete.o - OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o hashtable.o \ - usage.o fileio.o batch.o clientname.o chmod.o acls.o xattrs.o --OBJS3=progress.o pipe.o @ASM@ @SIMD@ -+OBJS3=progress.o pipe.o @MD5_ASM@ @ROLL_SIMD@ @ROLL_ASM@ - DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o - popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \ - popt/popthelp.o popt/poptparse.o -@@ -147,13 +148,13 @@ git-version.h: ALWAYS_RUN - ALWAYS_RUN: - - simd-checksum-x86_64.o: simd-checksum-x86_64.cpp -- @$(srcdir)/cmd-or-msg disable-simd $(CXX) -I. $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $(srcdir)/simd-checksum-x86_64.cpp -+ @$(srcdir)/cmd-or-msg disable-roll-simd $(CXX) -I. $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $(srcdir)/simd-checksum-x86_64.cpp - - simd-checksum-avx2.o: simd-checksum-avx2.S -- @$(srcdir)/cmd-or-msg disable-asm $(CC) $(CFLAGS) --include=$(srcdir)/rsync.h -DAVX2_ASM -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/simd-checksum-avx2.S -+ @$(srcdir)/cmd-or-msg disable-roll-asm $(CC) $(CFLAGS) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/simd-checksum-avx2.S - --lib/md5-asm-x86_64.o: lib/md5-asm-x86_64.S config.h lib/md-defines.h -- @$(srcdir)/cmd-or-msg disable-asm $(CC) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/lib/md5-asm-x86_64.S -+lib/md5-asm-x86_64.o: lib/md5-asm-x86_64.S lib/md-defines.h -+ @$(srcdir)/cmd-or-msg disable-md5-asm $(CC) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/lib/md5-asm-x86_64.S - - tls$(EXEEXT): $(TLS_OBJ) - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(TLS_OBJ) $(LIBS) -diff --git a/checksum.c b/checksum.c -index 1ed76828..77848585 100644 ---- a/checksum.c -+++ b/checksum.c -@@ -179,7 +179,7 @@ int canonical_checksum(int csum_type) - return 0; - } - --#ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */ -+#ifndef USE_ROLL_SIMD /* See simd-checksum-*.cpp. */ - /* - a simple 32 bit checksum that can be updated from either end - (inspired by Mark Adler's Adler-32 checksum) -@@ -222,23 +222,23 @@ void get_checksum2(char *buf, int32 len, char *sum) - } - #endif - case CSUM_MD5: { -- MD5_CTX m5; -+ md5_context m5; - uchar seedbuf[4]; -- MD5_Init(&m5); -+ md5_begin(&m5); - if (proper_seed_order) { - if (checksum_seed) { - SIVALu(seedbuf, 0, checksum_seed); -- MD5_Update(&m5, seedbuf, 4); -+ md5_update(&m5, seedbuf, 4); - } -- MD5_Update(&m5, (uchar *)buf, len); -+ md5_update(&m5, (uchar *)buf, len); - } else { -- MD5_Update(&m5, (uchar *)buf, len); -+ md5_update(&m5, (uchar *)buf, len); - if (checksum_seed) { - SIVALu(seedbuf, 0, checksum_seed); -- MD5_Update(&m5, seedbuf, 4); -+ md5_update(&m5, seedbuf, 4); - } - } -- MD5_Final((uchar *)sum, &m5); -+ md5_result(&m5, (uchar *)sum); - break; - } - case CSUM_MD4: -@@ -374,18 +374,18 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) - } - #endif - case CSUM_MD5: { -- MD5_CTX m5; -+ md5_context m5; - -- MD5_Init(&m5); -+ md5_begin(&m5); - - for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) -- MD5_Update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE); -+ md5_update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE); - - remainder = (int32)(len - i); - if (remainder > 0) -- MD5_Update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder); -+ md5_update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder); - -- MD5_Final((uchar *)sum, &m5); -+ md5_result(&m5, (uchar *)sum); - break; - } - case CSUM_MD4: -@@ -443,7 +443,7 @@ static union { - #ifdef USE_OPENSSL - MD4_CTX m4; - #endif -- MD5_CTX m5; -+ md5_context m5; - } ctx; - #ifdef SUPPORT_XXHASH - static XXH64_state_t* xxh64_state; -@@ -482,7 +482,7 @@ void sum_init(int csum_type, int seed) - break; - #endif - case CSUM_MD5: -- MD5_Init(&ctx.m5); -+ md5_begin(&ctx.m5); - break; - case CSUM_MD4: - #ifdef USE_OPENSSL -@@ -532,7 +532,7 @@ void sum_update(const char *p, int32 len) - break; - #endif - case CSUM_MD5: -- MD5_Update(&ctx.m5, (uchar *)p, len); -+ md5_update(&ctx.m5, (uchar *)p, len); - break; - case CSUM_MD4: - #ifdef USE_OPENSSL -@@ -597,7 +597,7 @@ int sum_end(char *sum) - } - #endif - case CSUM_MD5: -- MD5_Final((uchar *)sum, &ctx.m5); -+ md5_result(&ctx.m5, (uchar *)sum); - break; - case CSUM_MD4: - #ifdef USE_OPENSSL -diff --git a/lib/md5-asm-x86_64.S b/lib/md5-asm-x86_64.S -index 383f193a..3737058f 100644 ---- a/lib/md5-asm-x86_64.S -+++ b/lib/md5-asm-x86_64.S -@@ -27,7 +27,7 @@ - #include "config.h" - #include "md-defines.h" - --#if !defined USE_OPENSSL && CSUM_CHUNK == 64 -+#ifdef USE_MD5_ASM /* { */ - - #ifdef __APPLE__ - #define md5_process_asm _md5_process_asm -@@ -698,4 +698,4 @@ md5_process_asm: - pop %rbp - ret - --#endif /* !USE_OPENSSL ... */ -+#endif /* } USE_MD5_ASM */ -diff --git a/lib/md5.c b/lib/md5.c -index 41f158b8..07fd6147 100644 ---- a/lib/md5.c -+++ b/lib/md5.c -@@ -20,7 +20,7 @@ - - #include "rsync.h" - --#ifndef USE_OPENSSL -+#if !defined USE_OPENSSL || USE_MD5_ASM /* { */ - void md5_begin(md_context *ctx) - { - ctx->A = 0x67452301; -@@ -148,7 +148,10 @@ static void md5_process(md_context *ctx, const uchar data[CSUM_CHUNK]) - ctx->D += D; - } - --#if defined HAVE_ASM && CSUM_CHUNK == 64 -+#ifdef USE_MD5_ASM -+#if CSUM_CHUNK != 64 -+#error The MD5 ASM code does not support CSUM_CHUNK != 64 -+#endif - extern void md5_process_asm(md_context *ctx, const void *data, size_t num); - #endif - -@@ -176,20 +179,20 @@ void md5_update(md_context *ctx, const uchar *input, uint32 length) - left = 0; - } - --#if defined HAVE_ASM && CSUM_CHUNK == 64 -+#ifdef USE_MD5_ASM /* { */ - if (length >= CSUM_CHUNK) { - uint32 chunks = length / CSUM_CHUNK; - md5_process_asm(ctx, input, chunks); - length -= chunks * CSUM_CHUNK; - input += chunks * CSUM_CHUNK; - } --#else -+#else /* } { */ - while (length >= CSUM_CHUNK) { - md5_process(ctx, input); - length -= CSUM_CHUNK; - input += CSUM_CHUNK; - } --#endif -+#endif /* } */ - - if (length) - memcpy(ctx->buffer + left, input, length); -@@ -221,9 +224,9 @@ void md5_result(md_context *ctx, uchar digest[MD5_DIGEST_LEN]) - SIVALu(digest, 8, ctx->C); - SIVALu(digest, 12, ctx->D); - } --#endif -+#endif /* } */ - --#ifdef TEST_MD5 -+#ifdef TEST_MD5 /* { */ - - void get_md5(uchar *out, const uchar *input, int n) - { -@@ -317,4 +320,4 @@ int main(int argc, char *argv[]) - return 0; - } - --#endif -+#endif /* } */ -diff --git a/lib/mdigest.h b/lib/mdigest.h -index db174017..f1d6d934 100644 ---- a/lib/mdigest.h -+++ b/lib/mdigest.h -@@ -17,12 +17,13 @@ void mdfour_begin(md_context *md); - void mdfour_update(md_context *md, const uchar *in, uint32 length); - void mdfour_result(md_context *md, uchar digest[MD4_DIGEST_LEN]); - --#ifndef USE_OPENSSL --#define MD5_CTX md_context --#define MD5_Init md5_begin --#define MD5_Update md5_update --#define MD5_Final(digest, cptr) md5_result(cptr, digest) -- -+#if defined USE_OPENSSL && !defined USE_MD5_ASM -+#define md5_context MD5_CTX -+#define md5_begin MD5_Init -+#define md5_update MD5_Update -+#define md5_result(cptr, digest) MD5_Final(digest, cptr) -+#else -+#define md5_context md_context - void md5_begin(md_context *ctx); - void md5_update(md_context *ctx, const uchar *input, uint32 length); - void md5_result(md_context *ctx, uchar digest[MD5_DIGEST_LEN]); -diff --git a/rsync.h b/rsync.h -index 41a014c3..4b30570b 100644 ---- a/rsync.h -+++ b/rsync.h -@@ -18,11 +18,6 @@ - * with this program; if not, visit the http://fsf.org website. - */ - --/* a non-zero CHAR_OFFSET makes the rolling sum stronger, but is -- incompatible with older versions :-( */ --#define CHAR_OFFSET 0 -- --#ifndef AVX2_ASM /* do not include the rest of file for assembly */ - #define False 0 - #define True 1 - #define Unset (-1) /* Our BOOL values are always an int. */ -@@ -43,6 +38,9 @@ - - #define BACKUP_SUFFIX "~" - -+/* a non-zero CHAR_OFFSET makes the rolling sum stronger, but is -+ incompatible with older versions :-( */ -+#define CHAR_OFFSET 0 - - /* These flags are only used during the flist transfer. */ - -@@ -1477,7 +1475,6 @@ const char *get_panic_action(void); - fprintf(stderr, "%s in %s at line %d\n", msg, __FILE__, __LINE__); \ - exit_cleanup(RERR_UNSUPPORTED); \ - } while (0) --#endif /* AVX2_ASM */ - - #ifdef HAVE_MALLINFO2 - #define MEM_ALLOC_INFO mallinfo2 -diff --git a/simd-checksum-avx2.S b/simd-checksum-avx2.S -index dc8d145b..549cc3ef 100644 ---- a/simd-checksum-avx2.S -+++ b/simd-checksum-avx2.S -@@ -1,15 +1,21 @@ -+#include "config.h" -+ -+#ifdef USE_ROLL_ASM /* { */ -+ -+#define CHAR_OFFSET 0 /* Keep this the same as rsync.h, which isn't likely to change. */ -+ - #ifdef __APPLE__ --#define get_checksum1_avx2 _get_checksum1_avx2 -+#define get_checksum1_avx2_asm _get_checksum1_avx2_asm - #endif - - .intel_syntax noprefix - .text - - .p2align 5 -- .globl get_checksum1_avx2 -+ .globl get_checksum1_avx2_asm - - # rdi=*buf, esi=len, edx=i, rcx= *ps1, r8= *ps2 --get_checksum1_avx2: -+get_checksum1_avx2_asm: - vmovd xmm6,[rcx] # load *ps1 - lea eax, [rsi-128] # at least 128 bytes to process? - cmp edx, eax -@@ -167,3 +173,5 @@ get_checksum1_avx2: - .byte 3 - .byte 2 - .byte 1 -+ -+#endif /* } USE_ROLL_ASM */ -diff --git a/simd-checksum-x86_64.cpp b/simd-checksum-x86_64.cpp -index ebeeac2d..33f26e92 100644 ---- a/simd-checksum-x86_64.cpp -+++ b/simd-checksum-x86_64.cpp -@@ -51,12 +51,12 @@ - * GCC 4.x are not supported to ease configure.ac logic. - */ - --#ifdef __x86_64__ --#ifdef __cplusplus -+#ifdef __x86_64__ /* { */ -+#ifdef __cplusplus /* { */ - - #include "rsync.h" - --#ifdef HAVE_SIMD -+#ifdef USE_ROLL_SIMD /* { */ - - #include - -@@ -85,6 +85,9 @@ typedef long long __m256i_u __attribute__((__vector_size__(32), __may_alias__, _ - #define SSE2_HADDS_EPI16(a, b) _mm_adds_epi16(SSE2_INTERLEAVE_EVEN_EPI16(a, b), SSE2_INTERLEAVE_ODD_EPI16(a, b)) - #define SSE2_MADDUBS_EPI16(a, b) _mm_adds_epi16(SSE2_MULU_EVEN_EPI8(a, b), SSE2_MULU_ODD_EPI8(a, b)) - -+#ifndef USE_ROLL_ASM -+__attribute__ ((target("default"))) MVSTATIC int32 get_checksum1_avx2_64(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { return i; } -+#endif - __attribute__ ((target("default"))) MVSTATIC int32 get_checksum1_ssse3_32(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { return i; } - __attribute__ ((target("default"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { return i; } - -@@ -245,7 +248,7 @@ __attribute__ ((target("sse2"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf - - // (4*buf[i] + 3*buf[i+1]), (2*buf[i+2], buf[i+3]), ... 2*[int16*8] - __m128i mul_const = _mm_set1_epi32(4 + (3 << 8) + (2 << 16) + (1 << 24)); -- __m128i mul_add16_1 = SSE2_MADDUBS_EPI16(mul_const, in8_1); -+ __m128i mul_add16_1 = SSE2_MADDUBS_EPI16(mul_const, in8_1); - __m128i mul_add16_2 = SSE2_MADDUBS_EPI16(mul_const, in8_2); - - // s2 += 32*s1 -@@ -310,7 +313,127 @@ __attribute__ ((target("sse2"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf - return i; - } - --extern "C" __attribute__ ((target("avx2"))) int32 get_checksum1_avx2(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2); -+#ifdef USE_ROLL_ASM /* { */ -+ -+extern "C" __attribute__ ((target("avx2"))) int32 get_checksum1_avx2_asm(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2); -+ -+#else /* } { */ -+ -+/* -+ AVX2 loop per 64 bytes: -+ int16 t1[16]; -+ int16 t2[16]; -+ for (int j = 0; j < 16; j++) { -+ t1[j] = buf[j*4 + i] + buf[j*4 + i+1] + buf[j*4 + i+2] + buf[j*4 + i+3]; -+ t2[j] = 4*buf[j*4 + i] + 3*buf[j*4 + i+1] + 2*buf[j*4 + i+2] + buf[j*4 + i+3]; -+ } -+ s2 += 64*s1 + (uint32)( -+ 60*t1[0] + 56*t1[1] + 52*t1[2] + 48*t1[3] + 44*t1[4] + 40*t1[5] + 36*t1[6] + 32*t1[7] + 28*t1[8] + 24*t1[9] + 20*t1[10] + 16*t1[11] + 12*t1[12] + 8*t1[13] + 4*t1[14] + -+ t2[0] + t2[1] + t2[2] + t2[3] + t2[4] + t2[5] + t2[6] + t2[7] + t2[8] + t2[9] + t2[10] + t2[11] + t2[12] + t2[13] + t2[14] + t2[15] -+ ) + 2080*CHAR_OFFSET; -+ s1 += (uint32)(t1[0] + t1[1] + t1[2] + t1[3] + t1[4] + t1[5] + t1[6] + t1[7] + t1[8] + t1[9] + t1[10] + t1[11] + t1[12] + t1[13] + t1[14] + t1[15]) + -+ 64*CHAR_OFFSET; -+ */ -+ -+__attribute__ ((target("avx2"))) MVSTATIC int32 get_checksum1_avx2_64(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) -+{ -+ if (len > 64) { -+ -+ uint32 x[4] = {0}; -+ __m128i ss1 = _mm_cvtsi32_si128(*ps1); -+ __m128i ss2 = _mm_cvtsi32_si128(*ps2); -+ -+ const char mul_t1_buf[16] = {60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0}; -+ __m128i tmp = _mm_load_si128((__m128i*) mul_t1_buf); -+ __m256i mul_t1 = _mm256_cvtepu8_epi16(tmp); -+ __m256i mul_const = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(4 | (3 << 8) | (2 << 16) | (1 << 24))); -+ __m256i mul_one; -+ mul_one = _mm256_abs_epi8(_mm256_cmpeq_epi16(mul_one,mul_one)); // set all vector elements to 1 -+ -+ for (; i < (len-64); i+=64) { -+ // Load ... 4*[int8*16] -+ __m256i in8_1, in8_2; -+ __m128i in8_1_low, in8_2_low, in8_1_high, in8_2_high; -+ in8_1_low = _mm_loadu_si128((__m128i_u*)&buf[i]); -+ in8_2_low = _mm_loadu_si128((__m128i_u*)&buf[i+16]); -+ in8_1_high = _mm_loadu_si128((__m128i_u*)&buf[i+32]); -+ in8_2_high = _mm_loadu_si128((__m128i_u*)&buf[i+48]); -+ in8_1 = _mm256_inserti128_si256(_mm256_castsi128_si256(in8_1_low), in8_1_high,1); -+ in8_2 = _mm256_inserti128_si256(_mm256_castsi128_si256(in8_2_low), in8_2_high,1); -+ -+ // (1*buf[i] + 1*buf[i+1]), (1*buf[i+2], 1*buf[i+3]), ... 2*[int16*8] -+ // Fastest, even though multiply by 1 -+ __m256i add16_1 = _mm256_maddubs_epi16(mul_one, in8_1); -+ __m256i add16_2 = _mm256_maddubs_epi16(mul_one, in8_2); -+ -+ // (4*buf[i] + 3*buf[i+1]), (2*buf[i+2], buf[i+3]), ... 2*[int16*8] -+ __m256i mul_add16_1 = _mm256_maddubs_epi16(mul_const, in8_1); -+ __m256i mul_add16_2 = _mm256_maddubs_epi16(mul_const, in8_2); -+ -+ // s2 += 64*s1 -+ ss2 = _mm_add_epi32(ss2, _mm_slli_epi32(ss1, 6)); -+ -+ // [sum(t1[0]..t1[7]), X, X, X] [int32*4]; faster than multiple _mm_hadds_epi16 -+ __m256i sum_add32 = _mm256_add_epi16(add16_1, add16_2); -+ sum_add32 = _mm256_add_epi16(sum_add32, _mm256_srli_epi32(sum_add32, 16)); -+ sum_add32 = _mm256_add_epi16(sum_add32, _mm256_srli_si256(sum_add32, 4)); -+ sum_add32 = _mm256_add_epi16(sum_add32, _mm256_srli_si256(sum_add32, 8)); -+ -+ // [sum(t2[0]..t2[7]), X, X, X] [int32*4]; faster than multiple _mm_hadds_epi16 -+ __m256i sum_mul_add32 = _mm256_add_epi16(mul_add16_1, mul_add16_2); -+ sum_mul_add32 = _mm256_add_epi16(sum_mul_add32, _mm256_srli_epi32(sum_mul_add32, 16)); -+ sum_mul_add32 = _mm256_add_epi16(sum_mul_add32, _mm256_srli_si256(sum_mul_add32, 4)); -+ sum_mul_add32 = _mm256_add_epi16(sum_mul_add32, _mm256_srli_si256(sum_mul_add32, 8)); -+ -+ // s1 += t1[0] + t1[1] + t1[2] + t1[3] + t1[4] + t1[5] + t1[6] + t1[7] -+ __m128i sum_add32_hi = _mm256_extracti128_si256(sum_add32, 0x1); -+ ss1 = _mm_add_epi32(ss1, _mm256_castsi256_si128(sum_add32)); -+ ss1 = _mm_add_epi32(ss1, sum_add32_hi); -+ -+ // s2 += t2[0] + t2[1] + t2[2] + t2[3] + t2[4] + t2[5] + t2[6] + t2[7] -+ __m128i sum_mul_add32_hi = _mm256_extracti128_si256(sum_mul_add32, 0x1); -+ ss2 = _mm_add_epi32(ss2, _mm256_castsi256_si128(sum_mul_add32)); -+ ss2 = _mm_add_epi32(ss2, sum_mul_add32_hi); -+ -+ // [t1[0] + t1[1], t1[2] + t1[3] ...] [int16*8] -+ // We could've combined this with generating sum_add32 above and -+ // save an instruction but benchmarking shows that as being slower -+ __m256i add16 = _mm256_hadds_epi16(add16_1, add16_2); -+ -+ // [t1[0], t1[1], ...] -> [t1[0]*28 + t1[1]*24, ...] [int32*4] -+ __m256i mul32 = _mm256_madd_epi16(add16, mul_t1); -+ -+ // [sum(mul32), X, X, X] [int32*4]; faster than multiple _mm_hadd_epi32 -+ mul32 = _mm256_add_epi32(mul32, _mm256_srli_si256(mul32, 4)); -+ mul32 = _mm256_add_epi32(mul32, _mm256_srli_si256(mul32, 8)); -+ // prefetch 2 cacheline ahead -+ _mm_prefetch(&buf[i + 160], _MM_HINT_T0); -+ -+ // s2 += 28*t1[0] + 24*t1[1] + 20*t1[2] + 16*t1[3] + 12*t1[4] + 8*t1[5] + 4*t1[6] -+ __m128i mul32_hi = _mm256_extracti128_si256(mul32, 0x1); -+ ss2 = _mm_add_epi32(ss2, _mm256_castsi256_si128(mul32)); -+ ss2 = _mm_add_epi32(ss2, mul32_hi); -+ -+#if CHAR_OFFSET != 0 -+ // s1 += 32*CHAR_OFFSET -+ __m128i char_offset_multiplier = _mm_set1_epi32(32 * CHAR_OFFSET); -+ ss1 = _mm_add_epi32(ss1, char_offset_multiplier); -+ -+ // s2 += 528*CHAR_OFFSET -+ char_offset_multiplier = _mm_set1_epi32(528 * CHAR_OFFSET); -+ ss2 = _mm_add_epi32(ss2, char_offset_multiplier); -+#endif -+ } -+ -+ _mm_store_si128((__m128i_u*)x, ss1); -+ *ps1 = x[0]; -+ _mm_store_si128((__m128i_u*)x, ss2); -+ *ps2 = x[0]; -+ } -+ return i; -+} -+ -+#endif /* } !USE_ROLL_ASM */ - - static int32 get_checksum1_default_1(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) - { -@@ -338,7 +461,11 @@ static inline uint32 get_checksum1_cpp(char *buf1, int32 len) - uint32 s2 = 0; - - // multiples of 64 bytes using AVX2 (if available) -- i = get_checksum1_avx2((schar*)buf1, len, i, &s1, &s2); -+#ifdef USE_ROLL_ASM -+ i = get_checksum1_avx2_asm((schar*)buf1, len, i, &s1, &s2); -+#else -+ i = get_checksum1_avx2_64((schar*)buf1, len, i, &s1, &s2); -+#endif - - // multiples of 32 bytes using SSSE3 (if available) - i = get_checksum1_ssse3_32((schar*)buf1, len, i, &s1, &s2); -@@ -407,7 +534,11 @@ int main() { - benchmark("Raw-C", get_checksum1_default_1, (schar*)buf, BLOCK_LEN); - benchmark("SSE2", get_checksum1_sse2_32, (schar*)buf, BLOCK_LEN); - benchmark("SSSE3", get_checksum1_ssse3_32, (schar*)buf, BLOCK_LEN); -- benchmark("AVX2", get_checksum1_avx2, (schar*)buf, BLOCK_LEN); -+#ifdef USE_ROLL_ASM -+ benchmark("AVX2-ASM", get_checksum1_avx2_asm, (schar*)buf, BLOCK_LEN); -+#else -+ benchmark("AVX2", get_checksum1_avx2_64, (schar*)buf, BLOCK_LEN); -+#endif - - free(buf); - return 0; -@@ -417,6 +548,6 @@ int main() { - #pragma clang optimize on - #endif /* BENCHMARK_SIMD_CHECKSUM1 */ - --#endif /* HAVE_SIMD */ --#endif /* __cplusplus */ --#endif /* __x86_64__ */ -+#endif /* } USE_ROLL_SIMD */ -+#endif /* } __cplusplus */ -+#endif /* } __x86_64__ */ - diff --git a/0001-bugfix-test-md5-check-failure-2.patch b/0001-bugfix-test-md5-check-failure-2.patch deleted file mode 100644 index faf9321376ce208f261291a7433198831b481a78..0000000000000000000000000000000000000000 --- a/0001-bugfix-test-md5-check-failure-2.patch +++ /dev/null @@ -1,258 +0,0 @@ -From b81a5095563776397a4239132d2b737a1083e02f Mon Sep 17 00:00:00 2001 -From: Wayne Davison -Date: Thu, 3 Mar 2022 17:00:57 -0800 -Subject: [PATCH] Make asm use more selectable - -- Make the SIMD ASM code off by default. Use configure --enable-simd-asm - to enable. -- Allow MD5 ASM code to be requested even when OpenSSL is handling MD4 - checksums. Use configure --enable-md5-asm to enable. ---- - NEWS.md | 23 +++++++++---- - configure.ac | 96 ++++++++++++++++++++++++++++++++-------------------- - usage.c | 12 ++++--- - 3 files changed, 84 insertions(+), 47 deletions(-) - -diff --git a/NEWS.md b/NEWS.md -index 3083ca3..ed19449 100644 ---- a/NEWS.md -+++ b/NEWS.md -@@ -136,8 +136,9 @@ - (keeping the behavior the same as before), so specifying `--info=nonreg0` - can be used to turn the warnings off. - -- - More ASM optimizations from Shark64. -- -+ - An optional asm optimization for the rolling checksum from Shark64. Enable -+ it with `./configure --enable-roll-asm`. -+ - - Transformed rrsync into a python script with improvements: - - Security has been beefed up. - - The known rsync options were updated to include recent additions. -@@ -189,14 +190,24 @@ - using the output of `git describe` when building inside a non-shallow git - checkout, though.) - -- - Improved the IPv6 determination in configure. -+ - Renamed configure's `--enable-simd` option to `--enable-roll-simd` and added -+ the option `--enable-roll-asm` to use the new asm version of the code. Both -+ are x86_64/amd64 only. -+ -+ - Renamed configure's `--enable-asm` option to `--enable-md5-asm` to avoid -+ confusion with the asm option for the rolling checksum. It is also honored -+ even when openssl crypto is in use. This allows: normal MD4 & MD5, normal -+ MD4 + asm MD5, openssl MD4 & MD5, or openssl MD4 + asm MD5. - -- - Made SIMD & ASM configure default to "no" on non-Linux hosts due to various -- reports of problems on NetBSD & macOS hosts. These tests were also tweaked -- to allow enabling the feature on a host_cpu of amd64 (was only x86_64). -+ - Made SIMD & asm configure checks default to "no" on non-Linux hosts due to -+ various reports of problems on NetBSD & macOS hosts. These were also -+ tweaked to allow enabling the feature on a host_cpu of amd64 (was only -+ allowed on x86_64 before). - - - Fixed configure to not fail at the SIMD check when cross-compiling. - -+ - Improved the IPv6 determination in configure. -+ - - Compile the C files with `-pedantic-errors` (when possible) so that we will - get warned if a static initialization overflows in the future (among other - things). -diff --git a/configure.ac b/configure.ac -index 7031283..1dd3e8e 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -229,12 +229,13 @@ fi - AC_DEFINE_UNQUOTED(NOBODY_USER, "$NOBODY_USER", [unprivileged user--e.g. nobody]) - AC_DEFINE_UNQUOTED(NOBODY_GROUP, "$NOBODY_GROUP", [unprivileged group for unprivileged user]) - --# SIMD optimizations --SIMD= -+# rolling-checksum SIMD optimizations -+ROLL_SIMD= - --AC_MSG_CHECKING([whether to enable SIMD optimizations]) --AC_ARG_ENABLE(simd, -- AS_HELP_STRING([--enable-simd],[enable/disable to control SIMD optimizations (requires c++)])) -+AC_MSG_CHECKING([whether to enable rolling-checksum SIMD optimizations]) -+AC_ARG_ENABLE(roll-simd, -++ AS_HELP_STRING([--enable-roll-simd],[enable/disable to control rolling-checksum SIMD optimizations (requires c++)])) -+ - - # Clag is crashing with -g -O2, so we'll get rid of -g for now. - CXXFLAGS=`echo "$CXXFLAGS" | sed 's/-g //'` -@@ -263,14 +264,14 @@ __attribute__ ((target("ssse3"))) void more_testing(char* buf, int len) - } - ]]) - --if test x"$enable_simd" = x""; then -+if test x"$enable_roll_simd" = x""; then - case "$host_os" in - *linux*) ;; -- *) enable_simd=no ;; -- esac -+ *) enable_roll_simd=no ;; -+ esac - fi - --if test x"$enable_simd" != x"no"; then -+if test x"$enable_roll_simd" != x"no"; then - # For x86-64 SIMD, g++ >=5 or clang++ >=7 is required - if test x"$host_cpu" = x"x86_64" || test x"$host_cpu" = x"amd64"; then - AC_LANG(C++) -@@ -283,23 +284,23 @@ if test x"$enable_simd" != x"no"; then - AC_LANG(C) - if test x"$CXX_OK" = x"yes"; then - # AC_MSG_RESULT() is called below. -- SIMD="$host_cpu" -- elif test x"$enable_simd" = x"yes"; then -+ ROLL_SIMD="$host_cpu" -+ elif test x"$enable_roll_simd" = x"yes"; then - AC_MSG_RESULT(error) -- AC_MSG_ERROR(The SIMD compilation test failed. --Omit --enable-simd to continue without it.) -+ AC_MSG_ERROR(The rolling-checksum SIMD compilation test failed. -+Omit --enable-roll-simd to continue without it.) - fi -- elif test x"$enable_simd" = x"yes"; then -+ elif test x"$enable_roll_simd" = x"yes"; then - AC_MSG_RESULT(unavailable) -- AC_MSG_ERROR(The SIMD optimizations are currently x86_64|amd64 only. --Omit --enable-simd to continue without it.) -+ AC_MSG_ERROR(The rolling-checksum SIMD optimizations are currently x86_64|amd64 only. -+Omit --enable-roll-simd to continue without it.) - fi - fi - --if test x"$SIMD" != x""; then -- AC_MSG_RESULT([yes ($SIMD)]) -- AC_DEFINE(HAVE_SIMD, 1, [Define to 1 to enable SIMD optimizations]) -- SIMD='$(SIMD_'"$SIMD)" -+if test x"$ROLL_SIMD" != x""; then -+ AC_MSG_RESULT([yes ($ROLL_SIMD)]) -+ AC_DEFINE(USE_ROLL_SIMD, 1, [Define to 1 to enable rolling-checksum SIMD optimizations]) -+ ROLL_SIMD='$(ROLL_SIMD_'"$ROLL_SIMD)" - # We only use c++ for its target attribute dispatching, disable unneeded bulky features - CXXFLAGS="$CXXFLAGS -fno-exceptions -fno-rtti" - # Apple often has "g++" as a symlink for clang. Try to find out the truth. -@@ -311,7 +312,7 @@ else - AC_MSG_RESULT(no) - fi - --AC_SUBST(SIMD) -+AC_SUBST(ROLL_SIMD) - - AC_MSG_CHECKING([if assembler accepts noexecstack]) - OLD_CFLAGS="$CFLAGS" -@@ -322,38 +323,59 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ ]], [[return 0;]])], - CFLAGS="$OLD_CFLAGS" - AC_SUBST(NOEXECSTACK) - --ASM= -- --AC_MSG_CHECKING([whether to enable ASM optimizations]) --AC_ARG_ENABLE(asm, -- AS_HELP_STRING([--enable-asm],[enable/disable to control ASM optimizations])) -+MD5_ASM= - --if test x"$enable_asm" = x""; then -+AC_MSG_CHECKING([whether to enable MD5 ASM optimizations]) -+AC_ARG_ENABLE(md5-asm, -+ AS_HELP_STRING([--enable-md5-asm],[enable/disable to control MD5 ASM optimizations])) -+ -+if test x"$enable_md5_asm" = x""; then - case "$host_os" in - *linux*) ;; -- *) enable_asm=no ;; -+ *) enable_md5_asm=no ;; - esac - fi - --if test x"$enable_asm" != x"no"; then -+if test x"$enable_md5_asm" != x"no"; then - if test x"$host_cpu" = x"x86_64" || test x"$host_cpu" = x"amd64"; then -- ASM="$host_cpu" -- elif test x"$enable_asm" = x"yes"; then -+ MD5_ASM="$host_cpu" -+ elif test x"$enable_md5_asm" = x"yes"; then - AC_MSG_RESULT(unavailable) - AC_MSG_ERROR(The ASM optimizations are currently x86_64|amd64 only. --Omit --enable-asm to continue without it.) -+Omit --enable-md5-asm to continue without it.) - fi - fi - --if test x"$ASM" != x""; then -- AC_MSG_RESULT([yes ($ASM)]) -- AC_DEFINE(HAVE_ASM, 1, [Define to 1 to enable ASM optimizations]) -- ASM='$(ASM_'"$ASM)" -+if test x"$MD5_ASM" != x""; then -+ AC_MSG_RESULT([yes ($MD5_ASM)]) -+ AC_DEFINE(USE_MD5_ASM, 1, [Define to 1 to enable MD5 ASM optimizations]) -+ MD5_ASM='$(MD5_ASM_'"$MD5_ASM)" -+else -+ AC_MSG_RESULT(no) -+fi -+ -+AC_SUBST(MD5_ASM) -+ -+ROLL_ASM= -+ -+AC_MSG_CHECKING([whether to enable rolling-checksum ASM optimizations]) -+AC_ARG_ENABLE(roll-asm, -+ AS_HELP_STRING([--enable-roll-asm],[enable/disable to control rolling-checksum ASM optimizations (requires --enable-roll-simd)])) -+ -+if test x"$ROLL_SIMD" = x""; then -+ enable_roll_asm=no -+fi -+ -+if test x"$enable_roll_asm" = x"yes"; then -+ ROLL_ASM="$host_cpu" -+ AC_MSG_RESULT([yes ($ROLL_ASM)]) -+ AC_DEFINE(USE_ROLL_ASM, 1, [Define to 1 to enable rolling-checksum ASM optimizations (requires --enable-roll-simd)]) -+ ROLL_ASM='$(ROLL_ASM_'"$ROLL_ASM)" - else - AC_MSG_RESULT(no) - fi - --AC_SUBST(ASM) -+AC_SUBST(ROLL_ASM) - - # arrgh. libc in some old debian version screwed up the largefile - # stuff, getting byte range locking wrong -diff --git a/usage.c b/usage.c -index db13535..e710d84 100644 ---- a/usage.c -+++ b/usage.c -@@ -139,20 +139,24 @@ static void print_info_flags(enum logcode f) - - "*Optimizations", - --#ifndef HAVE_SIMD -+#ifndef USE_ROLL_SIMD - "no " - #endif -- "SIMD", -+ "SIMD-roll", - --#ifndef HAVE_ASM -+#ifndef USE_ROLL_ASM - "no " - #endif -- "asm", -+ "asm-roll", - - #ifndef USE_OPENSSL - "no " - #endif - "openssl-crypto", -+#ifndef USE_MD5_ASM -+ "no " -+#endif -+ "asm-MD5", - - NULL - }; --- -2.27.0 - diff --git a/0002-rsync-3.2.2-runtests.patch b/0002-rsync-3.2.2-runtests.patch deleted file mode 100644 index 0f682e56c921819d34bced97c3a3d2bb7ba03a76..0000000000000000000000000000000000000000 --- a/0002-rsync-3.2.2-runtests.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/runtests.sh.old b/runtests.sh -index ecb383e..1cd1d1a 100755 ---- a/runtests.sh.old -+++ b/runtests.sh -@@ -276,6 +276,7 @@ do - - case "$testscript" in - *hardlinks*) TESTRUN_TIMEOUT=600 ;; -+ *default-acls*) continue ;; - *) TESTRUN_TIMEOUT=300 ;; - esac - diff --git a/0003-rsync-3.2.4-hello-test.patch b/0003-rsync-3.2.4-hello-test.patch deleted file mode 100644 index e6bb4b727f7986d39505169cd9e4c08a53bd2fd7..0000000000000000000000000000000000000000 --- a/0003-rsync-3.2.4-hello-test.patch +++ /dev/null @@ -1,31 +0,0 @@ -diff --git a/testsuite/00-hello.test b/testsuite/00-hello.test -index a359753..ec0279a 100644 ---- a/testsuite/00-hello.test -+++ b/testsuite/00-hello.test -@@ -29,7 +29,7 @@ append_line test1 - checkit "$RSYNC -ai '$fromdir/' '$todir/'" "$fromdir" "$todir" - - copy_weird() { -- checkit "$RSYNC $1 \"$2$fromdir/$weird_name/\" \"$3$todir/$weird_name\"" "$fromdir" "$todir" -+ checkit "$RSYNC $1 --rsync-path='$RSYNC' '$2$fromdir/$weird_name/' '$3$todir/$weird_name'" "$fromdir" "$todir" - } - - append_line test2 -@@ -47,7 +47,7 @@ copy_weird '-ais' '' 'lh:' - echo test6 - - touch "$fromdir/one" "$fromdir/two" --(cd "$fromdir" && $RSYNC -ai --old-args lh:'one two' "$todir/") -+(cd "$fromdir" && $RSYNC -ai --old-args --rsync-path="$RSYNC" lh:'one two' "$todir/") - if [ ! -f "$todir/one" ] || [ ! -f "$todir/two" ]; then - test_fail "old-args copy of 'one two' failed" - fi -@@ -55,7 +55,7 @@ fi - echo test7 - - rm "$todir/one" "$todir/two" --(cd "$fromdir" && RSYNC_OLD_ARGS=1 $RSYNC -ai lh:'one two' "$todir/") -+(cd "$fromdir" && RSYNC_OLD_ARGS=1 $RSYNC -ai --rsync-path="$RSYNC" lh:'one two' "$todir/") - - # The script would have aborted on error, so getting here means we've won. - exit 0 diff --git a/0004-cve-2018-25032.patch b/0004-cve-2018-25032.patch deleted file mode 100644 index 6e558996110c2730474957fb2df960292618f7ae..0000000000000000000000000000000000000000 --- a/0004-cve-2018-25032.patch +++ /dev/null @@ -1,343 +0,0 @@ -From 5c44459c3b28a9bd3283aaceab7c615f8020c531 Mon Sep 17 00:00:00 2001 -From: Mark Adler -Date: Tue, 17 Apr 2018 22:09:22 -0700 -Subject: [PATCH] Fix a bug that can crash deflate on some input when using - Z_FIXED. - -This bug was reported by Danilo Ramos of Eideticom, Inc. It has -lain in wait 13 years before being found! The bug was introduced -in zlib 1.2.2.2, with the addition of the Z_FIXED option. That -option forces the use of fixed Huffman codes. For rare inputs with -a large number of distant matches, the pending buffer into which -the compressed data is written can overwrite the distance symbol -table which it overlays. That results in corrupted output due to -invalid distances, and can result in out-of-bound accesses, -crashing the application. - -The fix here combines the distance buffer and literal/length -buffers into a single symbol buffer. Now three bytes of pending -buffer space are opened up for each literal or length/distance -pair consumed, instead of the previous two bytes. This assures -that the pending buffer cannot overwrite the symbol table, since -the maximum fixed code compressed length/distance is 31 bits, and -since there are four bytes of pending space for every three bytes -of symbol space. ---- - deflate.c | 74 ++++++++++++++++++++++++++++++++++++++++--------------- - deflate.h | 25 +++++++++---------- - trees.c | 50 +++++++++++-------------------------- - 3 files changed, 79 insertions(+), 70 deletions(-) - -diff --git a/zlib/deflate.c b/zlib/deflate.c -index 425babc00..19cba873a 100644 ---- a/zlib/deflate.c -+++ b/zlib/deflate.c -@@ -255,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, - int wrap = 1; - static const char my_version[] = ZLIB_VERSION; - -- ushf *overlay; -- /* We overlay pending_buf and d_buf+l_buf. This works since the average -- * output size for (length,distance) codes is <= 24 bits. -- */ -- - if (version == Z_NULL || version[0] != my_version[0] || - stream_size != sizeof(z_stream)) { - return Z_VERSION_ERROR; -@@ -329,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, - - s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ - -- overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); -- s->pending_buf = (uchf *) overlay; -- s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); -+ /* We overlay pending_buf and sym_buf. This works since the average size -+ * for length/distance pairs over any compressed block is assured to be 31 -+ * bits or less. -+ * -+ * Analysis: The longest fixed codes are a length code of 8 bits plus 5 -+ * extra bits, for lengths 131 to 257. The longest fixed distance codes are -+ * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest -+ * possible fixed-codes length/distance pair is then 31 bits total. -+ * -+ * sym_buf starts one-fourth of the way into pending_buf. So there are -+ * three bytes in sym_buf for every four bytes in pending_buf. Each symbol -+ * in sym_buf is three bytes -- two for the distance and one for the -+ * literal/length. As each symbol is consumed, the pointer to the next -+ * sym_buf value to read moves forward three bytes. From that symbol, up to -+ * 31 bits are written to pending_buf. The closest the written pending_buf -+ * bits gets to the next sym_buf symbol to read is just before the last -+ * code is written. At that time, 31*(n-2) bits have been written, just -+ * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at -+ * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1 -+ * symbols are written.) The closest the writing gets to what is unread is -+ * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and -+ * can range from 128 to 32768. -+ * -+ * Therefore, at a minimum, there are 142 bits of space between what is -+ * written and what is read in the overlain buffers, so the symbols cannot -+ * be overwritten by the compressed data. That space is actually 139 bits, -+ * due to the three-bit fixed-code block header. -+ * -+ * That covers the case where either Z_FIXED is specified, forcing fixed -+ * codes, or when the use of fixed codes is chosen, because that choice -+ * results in a smaller compressed block than dynamic codes. That latter -+ * condition then assures that the above analysis also covers all dynamic -+ * blocks. A dynamic-code block will only be chosen to be emitted if it has -+ * fewer bits than a fixed-code block would for the same set of symbols. -+ * Therefore its average symbol length is assured to be less than 31. So -+ * the compressed data for a dynamic block also cannot overwrite the -+ * symbols from which it is being constructed. -+ */ -+ -+ s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); -+ s->pending_buf_size = (ulg)s->lit_bufsize * 4; - - if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || - s->pending_buf == Z_NULL) { -@@ -340,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, - deflateEnd (strm); - return Z_MEM_ERROR; - } -- s->d_buf = overlay + s->lit_bufsize/sizeof(ush); -- s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; -+ s->sym_buf = s->pending_buf + s->lit_bufsize; -+ s->sym_end = (s->lit_bufsize - 1) * 3; -+ /* We avoid equality with lit_bufsize*3 because of wraparound at 64K -+ * on 16 bit machines and because stored blocks are restricted to -+ * 64K-1 bytes. -+ */ - - s->level = level; - s->strategy = strategy; -@@ -552,7 +589,7 @@ int ZEXPORT deflatePrime (strm, bits, value) - - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; - s = strm->state; -- if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) -+ if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) - return Z_BUF_ERROR; - do { - put = Buf_size - s->bi_valid; -@@ -1113,7 +1150,6 @@ int ZEXPORT deflateCopy (dest, source) - #else - deflate_state *ds; - deflate_state *ss; -- ushf *overlay; - - - if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { -@@ -1133,8 +1169,7 @@ int ZEXPORT deflateCopy (dest, source) - ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); - ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); - ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); -- overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); -- ds->pending_buf = (uchf *) overlay; -+ ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); - - if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || - ds->pending_buf == Z_NULL) { -@@ -1148,8 +1183,7 @@ int ZEXPORT deflateCopy (dest, source) - zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); - - ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); -- ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); -- ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; -+ ds->sym_buf = ds->pending_buf + ds->lit_bufsize; - - ds->l_desc.dyn_tree = ds->dyn_ltree; - ds->d_desc.dyn_tree = ds->dyn_dtree; -@@ -1771,7 +1771,7 @@ local block_state deflate_fast(s, flush) - FLUSH_BLOCK(s, 1); - return finish_done; - } -- if (s->last_lit) -+ if (s->sym_next) - FLUSH_BLOCK(s, 0); - return block_done; - } -@@ -1912,7 +1912,7 @@ local block_state deflate_slow(s, flush) - FLUSH_BLOCK(s, 1); - return finish_done; - } -- if (s->last_lit) -+ if (s->sym_next) - FLUSH_BLOCK(s, 0); - return block_done; - } -@@ -1987,7 +1987,7 @@ local block_state deflate_rle(s, flush) - FLUSH_BLOCK(s, 1); - return finish_done; - } -- if (s->last_lit) -+ if (s->sym_next) - FLUSH_BLOCK(s, 0); - return block_done; - } -@@ -2026,7 +2026,7 @@ local block_state deflate_huff(s, flush) - FLUSH_BLOCK(s, 1); - return finish_done; - } -- if (s->last_lit) -+ if (s->sym_next) - FLUSH_BLOCK(s, 0); - return block_done; - } -diff --git a/zlib/deflate.h b/zlib/deflate.h -index 23ecdd312..d4cf1a98b 100644 ---- a/zlib/deflate.h -+++ b/zlib/deflate.h -@@ -217,7 +217,7 @@ typedef struct internal_state { - /* Depth of each subtree used as tie breaker for trees of equal frequency - */ - -- uchf *l_buf; /* buffer for literals or lengths */ -+ uchf *sym_buf; /* buffer for distances and literals/lengths */ - - uInt lit_bufsize; - /* Size of match buffer for literals/lengths. There are 4 reasons for -@@ -239,13 +239,8 @@ typedef struct internal_state { - * - I can't count above 4 - */ - -- uInt last_lit; /* running index in l_buf */ -- -- ushf *d_buf; -- /* Buffer for distances. To simplify the code, d_buf and l_buf have -- * the same number of elements. To use different lengths, an extra flag -- * array would be necessary. -- */ -+ uInt sym_next; /* running index in sym_buf */ -+ uInt sym_end; /* symbol table full when sym_next reaches this */ - - ulg opt_len; /* bit length of current block with optimal trees */ - ulg static_len; /* bit length of current block with static trees */ -@@ -317,20 +317,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, - - # define _tr_tally_lit(s, c, flush) \ - { uch cc = (c); \ -- s->d_buf[s->last_lit] = 0; \ -- s->l_buf[s->last_lit++] = cc; \ -+ s->sym_buf[s->sym_next++] = 0; \ -+ s->sym_buf[s->sym_next++] = 0; \ -+ s->sym_buf[s->sym_next++] = cc; \ - s->dyn_ltree[cc].Freq++; \ -- flush = (s->last_lit == s->lit_bufsize-1); \ -+ flush = (s->sym_next == s->sym_end); \ - } - # define _tr_tally_dist(s, distance, length, flush) \ - { uch len = (length); \ - ush dist = (distance); \ -- s->d_buf[s->last_lit] = dist; \ -- s->l_buf[s->last_lit++] = len; \ -+ s->sym_buf[s->sym_next++] = dist; \ -+ s->sym_buf[s->sym_next++] = dist >> 8; \ -+ s->sym_buf[s->sym_next++] = len; \ - dist--; \ - s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ - s->dyn_dtree[d_code(dist)].Freq++; \ -- flush = (s->last_lit == s->lit_bufsize-1); \ -+ flush = (s->sym_next == s->sym_end); \ - } - #else - # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) -diff --git a/zlib/trees.c b/zlib/trees.c -index 4f4a65011..decaeb7c3 100644 ---- a/zlib/trees.c -+++ b/zlib/trees.c -@@ -416,7 +416,7 @@ local void init_block(s) - - s->dyn_ltree[END_BLOCK].Freq = 1; - s->opt_len = s->static_len = 0L; -- s->last_lit = s->matches = 0; -+ s->sym_next = s->matches = 0; - } - - #define SMALLEST 1 -@@ -948,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) - - Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", - opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, -- s->last_lit)); -+ s->sym_next / 3)); - - if (static_lenb <= opt_lenb) opt_lenb = static_lenb; - -@@ -1017,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) - unsigned dist; /* distance of matched string */ - unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ - { -- s->d_buf[s->last_lit] = (ush)dist; -- s->l_buf[s->last_lit++] = (uch)lc; -+ s->sym_buf[s->sym_next++] = dist; -+ s->sym_buf[s->sym_next++] = dist >> 8; -+ s->sym_buf[s->sym_next++] = lc; - if (dist == 0) { - /* lc is the unmatched char */ - s->dyn_ltree[lc].Freq++; -@@ -1033,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) - s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; - s->dyn_dtree[d_code(dist)].Freq++; - } -- --#ifdef TRUNCATE_BLOCK -- /* Try to guess if it is profitable to stop the current block here */ -- if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { -- /* Compute an upper bound for the compressed length */ -- ulg out_length = (ulg)s->last_lit*8L; -- ulg in_length = (ulg)((long)s->strstart - s->block_start); -- int dcode; -- for (dcode = 0; dcode < D_CODES; dcode++) { -- out_length += (ulg)s->dyn_dtree[dcode].Freq * -- (5L+extra_dbits[dcode]); -- } -- out_length >>= 3; -- Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", -- s->last_lit, in_length, out_length, -- 100L - out_length*100L/in_length)); -- if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; -- } --#endif -- return (s->last_lit == s->lit_bufsize-1); -- /* We avoid equality with lit_bufsize because of wraparound at 64K -- * on 16 bit machines and because stored blocks are restricted to -- * 64K-1 bytes. -- */ -+ return (s->sym_next == s->sym_end); - } - - /* =========================================================================== -@@ -1069,13 +1047,14 @@ local void compress_block(s, ltree, dtree) - { - unsigned dist; /* distance of matched string */ - int lc; /* match length or unmatched char (if dist == 0) */ -- unsigned lx = 0; /* running index in l_buf */ -+ unsigned sx = 0; /* running index in sym_buf */ - unsigned code; /* the code to send */ - int extra; /* number of extra bits to send */ - -- if (s->last_lit != 0) do { -- dist = s->d_buf[lx]; -- lc = s->l_buf[lx++]; -+ if (s->sym_next != 0) do { -+ dist = s->sym_buf[sx++] & 0xff; -+ dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; -+ lc = s->sym_buf[sx++]; - if (dist == 0) { - send_code(s, lc, ltree); /* send a literal byte */ - Tracecv(isgraph(lc), (stderr," '%c' ", lc)); -@@ -1100,11 +1079,10 @@ local void compress_block(s, ltree, dtree) - } - } /* literal or match pair ? */ - -- /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ -- Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, -- "pendingBuf overflow"); -+ /* Check that the overlay between pending_buf and sym_buf is ok: */ -+ Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); - -- } while (lx < s->last_lit); -+ } while (sx < s->sym_next); - - send_code(s, END_BLOCK, ltree); - } diff --git a/0005-restart-daemon-on-failure.patch b/0005-restart-daemon-on-failure.patch deleted file mode 100644 index 0afba61f7f7db42122b7bf68950932fb2d5f0c8c..0000000000000000000000000000000000000000 --- a/0005-restart-daemon-on-failure.patch +++ /dev/null @@ -1,27 +0,0 @@ -From d41bb98c09bf0b999c4eee4e2125c7e5d0747ec4 Mon Sep 17 00:00:00 2001 -From: Simon Deziel -Date: Mon, 11 Apr 2022 12:08:11 -0400 -Subject: [PATCH] systemd: restart daemon on-failure (#302) - -man 5 systemd.service: -> Setting this to on-failure is the recommended choice for long-running services - -Partial fix for https://bugzilla.samba.org/show_bug.cgi?id=13463 - -Signed-off-by: Simon Deziel ---- - packaging/systemd/rsync.service | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/packaging/systemd/rsync.service b/packaging/systemd/rsync.service -index 8a0b5820..8a867ca6 100644 ---- a/packaging/systemd/rsync.service -+++ b/packaging/systemd/rsync.service -@@ -7,6 +7,7 @@ Documentation=man:rsync(1) man:rsyncd.conf(5) - [Service] - ExecStart=/usr/bin/rsync --daemon --no-detach - RestartSec=1 -+Restart=on-failure - - # Citing README.md: - # diff --git a/rsync-patches-3.2.4pre3.tar.gz b/rsync-patches-3.2.4pre3.tar.gz deleted file mode 100644 index 9991ca62145feabb05bcc18a9578d1e6363c1f46..0000000000000000000000000000000000000000 Binary files a/rsync-patches-3.2.4pre3.tar.gz and /dev/null differ diff --git a/rsync.spec b/rsync.spec index b75c124b1fdf52ca7f26efe9a300410fcbd08a6d..632f216c645dfcb8086e3cb70ac95c195c283c92 100644 --- a/rsync.spec +++ b/rsync.spec @@ -1,16 +1,14 @@ %define anolis_release 1 -%define pre_release pre3 -%define version_num 3.2.4 +%define version_num 3.2.5 Name: rsync -Version: %{version_num}~%{pre_release} +Version: %{version_num} Release: %{anolis_release}%{?dist} Summary: A program for synchronizing files over a network License: GPLv3+ URL: https://github.com/WayneD/rsync -Source0: https://github.com/WayneD/rsync/archive/refs/tags/v%{version_num}%{pre_release}.tar.gz -Source1: https://github.com/WayneD/rsync/archive/refs/tags/%{name}-patches-%{version_num}%{pre_release}.tar.gz +Source0: https://github.com/WayneD/rsync/archive/refs/tags/v%{version_num}.tar.gz Source2: rsyncd.socket Source3: rsyncd.service Source4: rsyncd.conf @@ -31,14 +29,6 @@ BuildRequires: libzstd-devel BuildRequires: xxhash-devel BuildRequires: python3-cmarkgfm -#needed to make hello test run correctly -Patch0: 0000-bugfix-test-md5-check-failure-1.patch -Patch1: 0001-bugfix-test-md5-check-failure-2.patch -Patch2: 0002-rsync-3.2.2-runtests.patch -Patch3: 0003-rsync-3.2.4-hello-test.patch - -Patch4: 0004-cve-2018-25032.patch -Patch5: 0005-restart-daemon-on-failure.patch %description Rsync uses a reliable algorithm to bring remote and host files into @@ -49,6 +39,13 @@ just as a more capable replacement for the rcp command. A technical report which describes the rsync algorithm is included in this package. +%package doc +Summary: Documentation files for %{name} +Requires: %{name} = %{version}-%{release} + +%description doc +The %{name}-doc package contains documentation files for %{name}. + %package daemon Summary: Service for anonymous access to rsync BuildArch: noarch @@ -60,10 +57,8 @@ Rsync can be used to offer read only access to anonymous clients. This package provides the anonymous rsync service. %prep -%autosetup -b 1 -n %{name}-%{version_num}%{pre_release} -p1 +%autosetup v%{version_num} -p1 -#Enable --copy-devices parameter -patch -p1 -i patches/copy-devices.diff %build %configure \ @@ -98,7 +93,6 @@ chmod -x support/* %files %license COPYING -%doc support/ tech_report.tex %{_bindir}/%{name} %{_bindir}/%{name}-ssl %{_mandir}/man1/%{name}.1* @@ -106,6 +100,9 @@ chmod -x support/* %{_mandir}/man5/rsyncd.conf.5* %config(noreplace) %{_sysconfdir}/rsyncd.conf +%files doc +%doc support/ tech_report.tex + %files daemon %config(noreplace) %{_sysconfdir}/sysconfig/rsyncd %{_unitdir}/rsyncd.socket @@ -113,5 +110,8 @@ chmod -x support/* %{_unitdir}/rsyncd@.service %changelog +* Tue Aug 30 2022 mgb01105731 - 3.2.5-1 +- update rsync to 3.2.5 + * Thu Apr 14 2022 happy_orange - 3.2.4~pre3-1 - Init package from upstream diff --git a/v3.2.4pre3.tar.gz b/v3.2.4pre3.tar.gz deleted file mode 100644 index 41ff6344ff07166e9ce79d0953eb11e3229bf461..0000000000000000000000000000000000000000 Binary files a/v3.2.4pre3.tar.gz and /dev/null differ diff --git a/v3.2.5.tar.gz b/v3.2.5.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..70d6afaaeadb63721616b7cf717f1f735557fc45 Binary files /dev/null and b/v3.2.5.tar.gz differ