
(* ---------------------------------------------------------------
Title         Q&D Adler-32
Author        PhG
Overview      self-explanatory !
Notes         >= 386 !
Bugs
Wish List     assembly ? ah ah, only serious ! more general a subrs (buffer, count)

--------------------------------------------------------------- *)

IMPLEMENTATION MODULE QD_Adler;

IMPORT FIO;
IMPORT Lib;

CONST
    ADLERfast = TRUE; (* slightly faster M2 code *)
    FULL32    = TRUE; (* changes NMAX but no apparent effect ! *)

(* ------------------------------------------------------------ *)

CONST
    ioBufferSize      = (8 * 512) + FIO.BufferOverhead;
    firstioBufferByte = 1;
    lastioBufferByte  = ioBufferSize;
TYPE
    ioBufferType  = ARRAY [firstioBufferByte..lastioBufferByte] OF BYTE;
VAR
    sourceBuffer : ioBufferType;

CONST
    firstDataByte = 0;
    lastDataByte  = 8*512-1; (* was 32 then 16 *)
    DataBufferSize= lastDataByte - firstDataByte + 1;
TYPE
    dataBufferType = ARRAY [firstDataByte..lastDataByte] OF BYTE;
VAR
    dataBuffer : dataBufferType;

(* ------------------------------------------------------------ *)

TYPE
    pbyte = POINTER TO dataBufferType;

(*
   from original C code :
   Update a running Adler-32 checksum with the bytes buf[0..len-1]
   and return the updated checksum.
   If buf is NIL, this function returns the required initial value for the checksum.
   An Adler-32 checksum is almost as reliable as a CRC32
   but can be computed much faster.
*)

PROCEDURE DO1 (buf:pbyte;i:CARDINAL;VAR s1,s2:LONGCARD);
BEGIN
    INC(s1, LONGCARD(buf^[i]) );
    INC(s2,s1);
END DO1;

PROCEDURE DO2 (buf:pbyte;i:CARDINAL;VAR s1,s2:LONGCARD);
BEGIN
    DO1(buf,i  ,s1,s2);
    DO1(buf,i+1,s1,s2);
END DO2;

PROCEDURE DO4 (buf:pbyte;i:CARDINAL;VAR s1,s2:LONGCARD);
BEGIN
    DO2(buf,i  ,s1,s2);
    DO2(buf,i+2,s1,s2);
END DO4;

PROCEDURE DO8 (buf:pbyte;i:CARDINAL;VAR s1,s2:LONGCARD);
BEGIN
    DO4(buf,i  ,s1,s2);
    DO4(buf,i+4,s1,s2);
END DO8;

PROCEDURE DO16 (buf:pbyte;VAR s1,s2:LONGCARD);
BEGIN
    DO8(buf,0  ,s1,s2);
    DO8(buf,8  ,s1,s2);
END DO16;

PROCEDURE doadler32 (adler:LONGCARD; buf : pbyte; len:CARDINAL):LONGCARD;
CONST
    BASE =LONGCARD(65521); (* largest prime smaller than 65536 *)
(*%T FULL32 *)
    NMAX =5552; (* the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 *)
(*%E  *)
(*%F FULL32 *)
    NMAX =3854; (* the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^31-1 *)
(*%E *)
VAR
    s1,s2 : LONGCARD;
    k,i,j,x,y:CARDINAL;
BEGIN
    IF buf = NIL THEN RETURN 1; END;

    s1 := adler         AND LONGCARD(0000FFFFH);
    s2 := (adler >> 16) AND LONGCARD(0000FFFFH);

    WHILE len > 0 DO
        IF len < NMAX THEN
            k:=len;
        ELSE
            k:=NMAX;
        END;
        DEC(len,k);
        WHILE k >= 16 DO
(*%F ADLERfast *)
            DO16(buf,s1,s2);
(*%E  *)
(*%T ADLERfast *)
            i:=0; j:=i+4; x:=i+2; y:=j+2;

            INC(s1, LONGCARD(buf^[i  ]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[i+1]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[x  ]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[x+1]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[j  ]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[j+1]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[y  ]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[y+1]) );  INC(s2,s1);

            i:=8; j:=i+4; x:=i+2; y:=j+2;

            INC(s1, LONGCARD(buf^[i  ]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[i+1]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[x  ]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[x+1]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[j  ]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[j+1]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[y  ]) );  INC(s2,s1);
            INC(s1, LONGCARD(buf^[y+1]) );  INC(s2,s1);

(*%E  *)
	        Lib.IncAddr(buf,16);
            DEC(k,16);
        END;
        WHILE k # 0 DO
           INC(s1, LONGCARD(buf^[0]) );
           Lib.IncAddr(buf,1);
           INC(s2,s1);
           DEC(k);
        END;
        s1 := s1 MOD BASE;
        s2 := s2 MOD BASE;
    END;
    RETURN ( (s2 << 16) OR s1 );
END doadler32;

(* assume S exists *)

PROCEDURE ComputeAdler32 (S:ARRAY OF CHAR):LONGCARD;
VAR
    hnd:FIO.File;
    got:CARDINAL;
    cs :LONGCARD;
    p  :pbyte;
BEGIN
    hnd:=FIO.OpenRead(S);
    FIO.AssignBuffer(hnd,sourceBuffer);

    p :=ADR(dataBuffer);
    cs:=doadler32(0,NIL,0);

    LOOP
        got:=FIO.RdBin(hnd,dataBuffer,DataBufferSize);
        IF got = 0 THEN EXIT; END;
        cs:=doadler32(cs,p,got);
        IF got # DataBufferSize THEN EXIT; END;
    END;
    FIO.Close(hnd);
    RETURN cs;
END ComputeAdler32;

(* ------------------------------------------------------------ *)

BEGIN
END QD_Adler.


(*

Both Adler-32 and CRC-32 (cyclic redundancy check) are 32-bit checks. But
while the CRC can take on any 32-bit value (232 possibilities), Adler-32 is
limited to 655212 possibilities. So the probability of a false positive on
random errors for CRC-32 is 2.3283 x 10-10, whereas it is very slightly
higher for Adler-32 at 2.3294 x 10-10.

The above assumes that all the values are accessible given the amount of
data. That is true after only four bytes for the CRC-32, but Adler-32
requires, on the average, about 0.5 KB of data to get rolling--or 1 KB if
it's ASCII data (text). So if the Adler-32 is used on significantly less than
about a kilobyte, it will be noticeably weaker than a CRC-32 on the same
small block.

A properly constructed CRC-n has the nice property that less than n bits in
error is always detectable. This is not always true for Adler-32--it can
detect all one- or two-byte errors but can miss some three-byte errors.
However, Adler-32 has been constructed to minimize the ways to make small
changes in the data that result in the same check value, through the use of
sums significantly larger than the bytes and by using a prime (65521) for the
modulus. It is in this area that some analysis is deserved, but it has not
yet been done.

This last potential weakness is not a major concern in the application of
Adler-32 to zlib (or any other history-based compressor), since if there is
an error at some point in a stream, it will be massively propagated after
that. It would be of concern in an application with transmission or storage
that has a borderline signal-to-noise ratio, for which small numbers of
random errors are expected. For that sort of application one would certainly
want to use a CRC or, better yet, Reed-Solomon error-correction coding. But
even in this case, if the data being transmitted or stored uses some sort of
history-dependent compression (as in zlib) and was compressible to begin
with, then an Adler-32 used after decompression would be adequate since the
decompressor would significantly amplify any small errors in the compressed
stream. (For incompressible data, most modern compressors operate in a
pass-through mode, so the original comment about using a CRC or ECC holds.)

The main reason for Adler-32 is, of course, speed in software
implementations. The authors wanted a check on zlib's decompression, but not
a significant speed penalty just for the check. So Mark came up with the
Adler-32 as a faster but still effective alternative to the CRC-32.

*)

(*

original adler32.c

/* adler32.c -- compute the Adler-32 checksum of a data stream
 * Copyright (C) 1995 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

#include "zlib.h"

#define BASE 65521L /* largest prime smaller than 65536 */
#define NMAX 5552
/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */

#define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
#define DO16(buf)   DO8(buf,0); DO8(buf,8);

/* ========================================================================= */

uLong ZEXPORT adler32(adler, buf, len)
    uLong adler;
    const Bytef *buf;
    uInt len;
{
    unsigned long s1 = adler & 0xffff;
    unsigned long s2 = (adler >> 16) & 0xffff;
    int k;

    if (buf == Z_NULL) return 1L;

    while (len > 0) {
        k = len < NMAX ? len : NMAX;
        len -= k;
        while (k >= 16) {
            DO16(buf);
	    buf += 16;
            k -= 16;
        }
        if (k != 0) do {
            s1 += *buf++;
	    s2 += s1;
        } while (--k);
        s1 %= BASE;
        s2 %= BASE;
    }
    return (s2 << 16) | s1;
}

*)
