(* ---------------------------------------------------------------
Title         see help !
Author        who cares ?
Overview      see help !
Usage         see help !
Notes         try and replace my old PowerBasic code : dupscrc, samecrc, idcrc
              filter out columns interval used to check for duplicate state ?
Bugs
Wish List

--------------------------------------------------------------- *)

MODULE dupLines;

IMPORT Lib;
IMPORT FIO;
IMPORT Str;
IMPORT IO;
IMPORT SYSTEM;

FROM IO IMPORT WrStr,WrLn, WrLngCard;

FROM QD_Box IMPORT str80, str2, cmdInit, cmdShow, cmdStop, delim,
Work, video, Ltrim, Rtrim, UpperCase, LowerCase, ReplaceChar,
ChkEscape, Waitkey, WaitkeyDelay, Flushkey, IsRedirected, chkJoker,
isOption, GetOptIndex, GetLongCard, GetLongInt, GetString, CharCount,
same, aR, aH, aS, aD, aA, everything, isDirectory, fixDirectory,
str128, str256, Animation, allfiles, Belongs, FixAE, CodePhonetic,
CodeSoundex, CodeSoundexOrg, isReadOnly, LtrimBlanks, RtrimBlanks,
getStrIndex, cmdSHOW,BiosWaitkey,BiosWaitkeyShifted,BiosFlushkey,
str1024, isoleItemS, dmpTTX, str2048, Elapsed, TerminalReadString,
getDosVersion, DosVersion, warning95, runningWindows,
aV, reallyeverything, chkClassicTextMode, setClassicTextMode,
AltAnimation, str16, getCurrentDirectory, setReadWrite,
getFileSize, verifyString, str4096, unfixDirectory,
animShow, animSHOW, animAdvance, animEnd, animClear,
animInit, animGetSdone, anim, cleantabs,
completedInit, completedShow, completedSHOW, completedEnd, completed;

(* ------------------------------------------------------------ *)

CONST
    cr            = CHR(13);
    lf            = CHR(10);
    nl            = cr+lf;
    extLOG        = ".LOG";
    (*
    extRPT        = ".RPT";
    REPORT        = "DUPLINES"+extRPT;
    *)
    backslash     = "\";
    dot           = ".";
    semicolon     = ";";
    pound         = "#";
    remark1       = semicolon;
    remark2       = pound;
    blank         = " ";
    star          = "*";
    sEOL          = star;
    endOfString   = CHR(0);
    dash          = "-";
    prefixKeep    = "# ";
    prefixKill    = "; ";
CONST
    progEXEname   = "DUPLINES";
    progTitle     = "Q&D Duplicate Lines Finder";
    progVersion   = "v1.0f";
    progCopyright = "by PhG";
    banner        = progTitle+" "+progVersion+" "+progCopyright;
CONST
    errNone             = 0;
    errHelp             = 1;
    errOption           = 2;
    errParm             = 3;
    errExpected         = 4;
    errJoker            = 5;
    errNotFound         = 6;
    errRange            = 7;
    errAborted          = 8;
    errUseless          = 9;

PROCEDURE abort (e : CARDINAL; einfo : ARRAY OF CHAR);
CONST
(*
 00000000011111111112222222222333333333344444444445555555555666666666677777777778
 1...'....0....'....0....'....0....'....0....'....0....'....0....'....0....'....0
*)
    helpmsg =
banner+nl+
nl+
"Syntax 1 : "+progEXEname+" <range> <list["+extLOG+"]> [-option]..."+nl+
"Syntax 2 : "+progEXEname+" <-d> <list["+extLOG+"]>"+nl+
nl+
"This program reads sorted entries from <list>,"+nl+
"filtering out duplicates using specified <range> columns interval."+nl+
nl+
"<range> is either <first[..last]> or <first[,count]> :"+nl+
'<first> column is 1-based ; [last] or [count] may be "'+sEOL+'" for End Of Line.'+nl+
'Empty lines and lines beginning with either "'+remark1+'" or "'+remark2+'" are ignored.'+nl+
"Tabs are not expanded. Lines should not be greater than 4095 characters."+nl+
nl+
"    -e keep case"+nl+
"    -p remove path indication from analyzed portion of line"+nl+
"    -a do not favor first duplicate"+nl+
"    -r reverse display, showing unique lines"+nl+
"    -t left-trim each line before processing"+nl+
"    -d process DIRBAT (v1.2h or better) sorted listing built from DOS or /lfn"+nl+
"       (using date/time)"+nl+

nl+
"Examples : "+progEXEname+" -p 23 cs.log"+nl+
"           "+progEXEname+" -a 31..43 chkdata.log"+nl+
"           "+progEXEname+" 31,13 chkdata.log"+nl;

VAR
    S : str256;
BEGIN
    CASE e OF
    | errHelp :
        WrStr(helpmsg);
    | errOption :
        Str.Concat(S,"Unknown ",einfo); Str.Append(S," option !");
    | errParm :
        Str.Concat(S,"Uneeded ",einfo); Str.Append(S," parameter !");
    | errExpected:
        Str.Concat(S,"Missing ",einfo); Str.Append(S," parameter !");
    | errJoker:
        Str.Concat(S,einfo," should not contain any joker !");
    | errNotFound:
        Str.Concat(S,einfo," does not exist !");
    | errRange:
        Str.Concat(S,"Illegal ",einfo); Str.Append(S," range !");
    | errAborted:
        S := "Aborted by user !";
    | errUseless:
        S := "-d command does not require any option !";
    ELSE
        S := "This is illogical, Captain !";
    END;
    CASE e OF
    | errNone,errHelp :
        ; (* nada *)
    ELSE
        WrStr(progEXEname+" : "); WrStr(S); WrLn;
    END;
    Lib.SetReturnCode(SHORTCARD(e));
    HALT;
END abort;

(* ------------------------------------------------------------ *)

CONST
    ioBufferSize    = (8 * 512) + FIO.BufferOverhead;
    firstBufferByte = 1;
    lastBufferByte  = ioBufferSize;
TYPE
    ioBufferType  = ARRAY [firstBufferByte..lastBufferByte] OF BYTE;
VAR
    bufferIn : ioBufferType;

(* ------------------------------------------------------------ *)

(*
legal :
      #
      #..#
      #,#
*)

PROCEDURE parserange (S:ARRAY OF CHAR;VAR firstcolumn,columncount:CARDINAL):BOOLEAN;
CONST
    MAXRANGE      = SIZE(str4096);
    sMAXRANGE     = "4096";
    delimcount    = ",";
    delimrange    = "..";
    delimrangealt = "-";
    trycount      = 1;
    tryrange      = 2;
    tryrangealt   = 3;
VAR
    j,p : CARDINAL;
    lc:LONGCARD;
    sep:str2; (* either a char or two chars *)
    pattern,snum:str128; (* snum could be str16 but "who knows what evil..." etc. *)
    rc:BOOLEAN;
BEGIN
    rc:=FALSE;
    j:=trycount-1; (* sic ! *)
    LOOP
        INC(j);
        IF j > tryrangealt THEN (* no separator found *)
            Str.Concat(snum,"=",S); (* ugly trick *)
            IF GetLongCard(snum,lc)=FALSE THEN EXIT; END;
            IF ( (lc < 1) OR (lc > MAXRANGE) ) THEN EXIT; END;
            firstcolumn:=CARDINAL(lc)-1; (* remember : from 0 ! *)
            columncount:=MAXRANGE; (* always use maximum *)
            rc:=TRUE;
            EXIT;
        END;
        CASE j OF
        | trycount    : sep:=delimcount;
        | tryrange    : sep:=delimrange;
        | tryrangealt : sep:=delimrangealt;
        END;
        Str.Concat(pattern,"*",sep); Str.Append(pattern,"*");
        IF Str.Match(S, pattern) THEN
            p:=Str.Pos(S,sep);
            IF p > 0 THEN
                Str.Slice(snum,S,0,p);
                Str.Delete(S,0,p);
            ELSE
                snum:="";
            END;
            Str.Subst(S,sep,"");
            (* after all, do not allow lazy default trailing "*" *)
            (* IF same(S,"") THEN Str.Copy(S,sEOL);END; *)

            Str.Prepend(snum,"="); (* ugly trick *)
            IF GetLongCard(snum,lc)=FALSE THEN EXIT; END;
            IF ( (lc < 1) OR (lc > MAXRANGE) ) THEN EXIT; END;
            firstcolumn:=CARDINAL(lc)-1; (* 0-based for Str functions *)

            IF same(S,sEOL) THEN
                columncount:=MAXRANGE; (* always use maximum *)
            ELSE
                Str.Concat(snum,"=",S);
                IF GetLongCard(snum,lc)=FALSE THEN EXIT; END;
                IF ( (lc < 1) OR (lc > MAXRANGE) ) THEN EXIT; END;
                CASE j OF
                | trycount :
                    columncount:=CARDINAL(lc);
                | tryrange,tryrangealt :
                    IF CARDINAL(lc) < (firstcolumn+1) THEN EXIT; END;
                    columncount:=CARDINAL(lc)-1-firstcolumn+1;
                END;
            END;
            rc:=TRUE;
            EXIT;
        END;
    END;
    RETURN rc;
END parserange;

(* ------------------------------------------------------------ *)

PROCEDURE pad (VAR R:ARRAY OF CHAR;wi:CARDINAL;ch:CHAR; S:ARRAY OF CHAR   );
BEGIN
    Str.Copy(R,S);
    WHILE Str.Length(R) < wi DO
        Str.Append(R,ch);
    END;
END pad;

PROCEDURE numToStr (v:CARDINAL):str80;
VAR
    ok:BOOLEAN;
    R:str80;
BEGIN
    Str.CardToStr( LONGCARD(v), R, 10, ok);
    RETURN R;
END numToStr;

PROCEDURE wrboolean (wi:CARDINAL;flag:BOOLEAN;S:ARRAY OF CHAR);
VAR
    R:str128;
BEGIN
    pad(R,wi,blank,S);
    Str.Append(R," : ");
    IF flag THEN
        Str.Append(R,"yes");
    ELSE
        Str.Append(R,"no");
    END;
    WrStr(R);WrLn;
END wrboolean;

PROCEDURE wrvalue (wi,v:CARDINAL;S:ARRAY OF CHAR );
VAR
    R:str128;
BEGIN
    pad(R,wi,blank,S);
    Str.Append(R," : ");
    Str.Append(R,numToStr(v));
    WrStr(R);WrLn;
END wrvalue;

(* ------------------------------------------------------------ *)

(*
DIRBAT v1.2h /LFN
0        1         2         3         4         5         6         7
1...'....0....'....0....'....0....'....0....'....0....'....0.........0
srs     .rpt            4,230  09-Apr-2007  10:02:34    c:\mesdoc~1\street~1\
*)

CONST
    firstdup  = 1;
    maxdup    = 250; (* should do *)
VAR
    sdup      : ARRAY [firstdup..maxdup] OF str256;

PROCEDURE isLess (i,j:CARDINAL):BOOLEAN;
CONST
    dirbatDMY = 32-1;
    dirbatHMS = 45-1;
VAR
    d,t,stampi,stampj:str16;
    S:str256;
    R:str80; (* for numToStr *)
    ndx,m:CARDINAL;
BEGIN
    FOR ndx:=1 TO 2 DO
        CASE ndx OF
        | 1: S:=sdup[i];
        | 2: S:=sdup[j];
        END;
        Str.Slice(d,S,dirbatDMY,11); (* dd-MMM-yyyy *)
        Str.Slice(t,S,dirbatHMS,8);  (* hh:mm:ss *)
        Str.Slice(R,d,4-1,3);   (* MMM *)
        Str.Caps(R);
        Str.Prepend(R,dash);
        m := GetOptIndex(R,"JAN"+delim+"JAN"+delim+
                           "FEB"+delim+"FEV"+delim+
                           "MAR"+delim+"MAR"+delim+
                           "APR"+delim+"AVR"+delim+
                           "MAY"+delim+"MAI"+delim+
                           "JUN"+delim+"JUN"+delim+
                           "JUL"+delim+"JUI"+delim+
                           "AUG"+delim+"AOU"+delim+
                           "SEP"+delim+"SEP"+delim+
                           "OCT"+delim+"OCT"+delim+
                           "NOV"+delim+"NOV"+delim+
                           "DEC"+delim+"DEC");
        (* assume month was legal, i.e. m is 1..24 *)
        m := (m+1) DIV 2;
        R := numToStr(m);
        IF m < 10 THEN Str.Prepend(R,"0");END;          (* mm *)
        Str.Slice(S,d,8-1,4);
        Str.Append(S,R);         (* yyyymm *)
        Str.Slice(R,d,1-1,2);
        Str.Append(S,R);         (* yyyymmdd *)
        Str.Append(S,t);         (* yyyymmddhh:mm:ss *)
        ReplaceChar(S," ","0");
        ReplaceChar(S,":","");

        CASE ndx OF
        | 1: Str.Copy(stampi,S);
        | 2: Str.Copy(stampj,S);
        END;
    END;
(*WrLn;
WrStr("::: ");WrStr(stampi);WrStr(" ::: ");WrStr(sdup[i]);WrLn;
WrStr("::: ");WrStr(stampj);WrStr(" ::: ");WrStr(sdup[j]);WrLn;
*)
    RETURN (Str.Compare(stampi,stampj) < 0);
END isLess;

PROCEDURE doSwap (i,j:CARDINAL);
VAR
    tmp:str256;
BEGIN
    tmp:=sdup[i];
    sdup[i]:=sdup[j];
    sdup[j]:=tmp;
END doSwap;

(* should we check path too ? yes, but this may become complicated ! *)

PROCEDURE sortAndDump (count:CARDINAL  );
CONST
    dirbatF8E3 = 1-1;
VAR
    i:CARDINAL;
    S:str256;
    n:str16;
BEGIN
    Lib.QSort(count,isLess,doSwap);
    FOR i:=firstdup TO count DO
        Str.Slice(n, sdup[i], dirbatF8E3, 12); (* nnnnnnnn.eee *)
        ReplaceChar(n," ","");
        Str.Concat(S,sdup[i],n);
        IF i < count THEN
            WrStr(prefixKill);
        ELSE
            WrStr(prefixKeep);
        END;
        WrStr(S);WrLn;
    END;
    WrLn;
END sortAndDump;

(* ------------------------------------------------------------ *)

CONST
    msgWorking  = "Working...";
CONST
    wi        = 22; (* should do to format debug infos *)
    firstparm = 1;
    maxparm   = 2;
VAR
    lastparm  : CARDINAL;
    parm      : ARRAY [firstparm..maxparm] OF str128;
    parmcount,i,opt:CARDINAL;
    S,R:str128;
    debug,keepcase,keeppath,keepfirst,reverse,dirbat,dotrim,LFNlist:BOOLEAN;
    infile:str128;
    hin:FIO.File;
    firstcolumn,columncount,dups,cmdopt:CARDINAL;
    hugestr,SS,oldSS,portion,oldportion : str4096;
    esc:BOOLEAN;
    errcode:CARDINAL;
BEGIN
    Lib.DisableBreakCheck();
    FIO.IOcheck := FALSE;
    WrLn;

    parmcount := Lib.ParamCount();
    IF parmcount = 0 THEN abort(errHelp,"");END;

    keepcase := FALSE;
    keeppath := TRUE;
    keepfirst:= TRUE;
    reverse  := FALSE;
    dirbat   := FALSE;
    dotrim   := FALSE;
    debug    := FALSE;
    lastparm := firstparm-1; (* 1.. *)

    cmdopt := 0;
    FOR i := 1 TO parmcount DO
        Lib.ParamStr(S,i); cleantabs(S);
        Str.Copy(R,S);
        UpperCase(R);
        IF isOption(R) THEN
            opt := GetOptIndex(R,"?"+delim+"H"+delim+"HELP"+delim+
                                 "E"+delim+"EXACT"+delim+"KEEPCASE"+delim+
                                 "P"+delim+"PATH"+delim+
                                 "A"+delim+"ALL"+delim+
                                 "R"+delim+"REVERSE"+delim+
                                 "D"+delim+"DIRBAT"+delim+
                                 "T"+delim+"TRIM"+delim+"LEFTTRIM"+delim+
                                 "DEBUG"
                              );
            CASE opt OF
            | 1,2,3 : abort(errHelp,"");
            | 4,5,6 : keepcase  := TRUE;   INC(cmdopt);
            | 7,8   : keeppath  := FALSE;  INC(cmdopt);
            | 9,10  : keepfirst := FALSE;  INC(cmdopt);
            | 11,12 : reverse   := TRUE;   INC(cmdopt);
            | 13,14 : dirbat    := TRUE;   LFNlist:=FALSE;
            | 15,16,17 : dotrim := TRUE;
            | 18    : debug     := TRUE;
            ELSE
                abort(errOption,S);
            END;
        ELSE
            (* 1=range, 2=file *)
            INC(lastparm); IF lastparm > maxparm THEN abort(errParm,S);END;
            Str.Copy(parm[lastparm],S);
        END;
    END;

    IF dirbat THEN
        IF cmdopt # 0 THEN abort(errUseless,"");END;
        CASE lastparm OF
        | firstparm-1:
            abort(errExpected,"<list>");
        | firstparm+1:
            abort(errParm,parm[firstparm+1]);
        END;
        Str.Copy(S     ,"1..12");
        Str.Copy(infile,parm[firstparm]);
    ELSE
        CASE lastparm OF
        | firstparm-1 : (* complicated way to specify 0 ! *)
            abort(errExpected,"<range>");
        | firstparm   :
            abort(errExpected,"<list>");
        END;
        (* lastparm is maxparm *)
        Str.Copy(S     ,parm[firstparm]);
        Str.Copy(infile,parm[firstparm+1]);
    END;

    IF parserange(S,firstcolumn,columncount)=FALSE THEN abort(errRange,S);END;

    IF same(infile,".") THEN infile:="*.*";END;
    IF chkJoker(infile) THEN abort(errJoker,infile);END;
    IF Str.CharPos(infile,dot)=MAX(CARDINAL) THEN Str.Append(infile,extLOG);END;
    IF FIO.Exists(infile)=FALSE THEN abort(errNotFound,infile);END;

    IF debug THEN
        wrboolean(wi,dirbat         ,"; DIRBAT mode");
        wrboolean(wi,reverse        ,"; Reverse mode");
        wrboolean(wi,keepcase       ,"; Case-sensitive");
        wrboolean(wi,NOT(keeppath)  ,"; Filter out path");
        wrboolean(wi,keepfirst      ,"; Flag first duplicate");
        wrboolean(wi,dotrim         ,"; Left-trim each line");
        wrvalue  (wi,firstcolumn+1  ,"; Column"); (* parserange() made it 0-based *)
        wrvalue  (wi,columncount    ,"; Count");
        WrLn;
    END;

    video(msgWorking,TRUE);

    hin:=FIO.OpenRead(infile);
    FIO.AssignBuffer(hin,bufferIn);

    oldportion :="1..12";
    dups       :=1;
    esc        := FALSE;

    WHILE NOT( FIO.EOF ) DO        (* ok to use it because it's the only file opened *)
        FIO.RdStr(hin,hugestr);
        Str.Copy(SS,hugestr);
        IF NOT(dirbat) THEN
            IF dotrim THEN LtrimBlanks(SS); END;
        END;
        RtrimBlanks(SS);
        CASE SS[0] OF
        | remark1,remark2,endOfString:
            ;
        ELSE
            Str.Slice(portion,hugestr,firstcolumn,columncount);
            IF NOT(keeppath) THEN
                i:=Str.RCharPos(portion,backslash);
                IF i # MAX(CARDINAL) THEN
                    Str.Delete(portion,0,i+1);
                END;
            END;
            IF NOT(keepcase) THEN UpperCase(portion);END;

            IF same(portion,oldportion) THEN
                IF dirbat THEN
                    Str.Copy(sdup[dups],oldSS);
                ELSE
                    CASE dups OF
                    | 1 :
                        IF keepfirst THEN
                            Str.Prepend(oldSS,prefixKeep);
                        ELSE
                            Str.Prepend(oldSS,prefixKill);
                        END;
                    ELSE
                        Str.Prepend(oldSS,prefixKill);
                    END;
                    IF NOT(reverse) THEN
                        video(msgWorking,FALSE);
                        WrStr(oldSS);WrLn;
                        video(msgWorking,TRUE);
                    END;
                END;
                INC(dups);
            ELSE
                IF dirbat THEN
                    IF dups > 1 THEN
                        Str.Copy(sdup[dups],oldSS);
                        video(msgWorking,FALSE);
                        sortAndDump(dups);
                        video(msgWorking,TRUE);
                    END;
                    Str.Copy(sdup[firstdup],SS);
                ELSE
                    CASE dups OF
                    | 1 :
                        IF reverse THEN
                            video(msgWorking,FALSE);
                            WrStr(oldSS);WrLn;
                            video(msgWorking,TRUE);
                        END;
                    ELSE
                        IF NOT(reverse) THEN
                            Str.Prepend(oldSS,prefixKill);
                            video(msgWorking,FALSE);
                            WrStr(oldSS);WrLn;
                            WrLn;
                            video(msgWorking,TRUE);
                        END;
                    END;
                END;
                oldportion:=portion;
                dups:=1;
            END;
            oldSS:=SS;
        END;
        IF ChkEscape() THEN esc:=TRUE; FIO.EOF:=TRUE; END; (* force exit *)
    END;
    video(msgWorking,FALSE);

    IF NOT (esc) THEN
        IF dirbat THEN
            IF dups > 1 THEN
                Str.Copy(sdup[dups],oldSS);
                sortAndDump(dups);
            END;
        ELSE
            CASE dups OF
            | 1 :
                IF reverse THEN
                    WrStr(oldSS);WrLn;
                END;
            ELSE
                IF NOT(reverse) THEN
                    Str.Prepend(oldSS,prefixKill);
                    WrStr(oldSS);WrLn;
                END;
            END;
        END;
    END;

    FIO.Close(hin);

    IF esc THEN abort(errAborted,"");END;

    abort(errNone,"");
END dupLines.



(*
0        1         2         3         4         5         6         7
1...'....0....'....0....'....0....'....0....'....0....'....0.........0

DIRBAT /L
      318,281  14-Jun-1998   3:27:50    "C:\Mes documents\games.lst"

DIRBAT /L /LFN
valkyrie.mid           59,288  14-Jul-1995   3:20:00    c:\mesdoc~1\tunes\

CS
$a4dc2794      154002  c:\modula\tsc.exe

CHKDATA
----a  02-Aug-1996  01:28:14        344,760  $aa9998b4  $32eeec88  c:\modula\src\m2_src.zip

*)

