\ 4tH CSVSCAN - Copyright 2009,2012 J.L. Bezemer
\ You can redistribute this file and/or modify it under
\ the terms of the GNU General Public License

\ This program analyzes a CSV file, lists the number of lines and columns
\ and lists all columns with the maximum length of the fields found.
\ The first line of the CSV file MUST contain a list of all fields involved.

include lib/parsing.4th                \ for "PARSE"?
include lib/padding.4th                \ for .PADDING
include lib/argopen.4th                \ for ARG-OPEN

255 constant max-fields                \ maximum number of fields
 32 constant max-length                \ maximum length of field
max-fields max-length * constant /buffer

0 value delimiter                      \ the delimiter (ASCII value)
0 value #fields                        \ number of fields

max-fields array length-fields         \ array with field lengths
/buffer string field-names             \ array with field names
/buffer string bigTIB                  \ alternate TIB
                                       \ buffer with field names
: Preprocess                           ( --)
  bigTIB /buffer source!               \ initialize big TIB
  refill 0= abort" Cannot read header" \ get header line
  0 field-names >r                     \ initialize variables
  begin                                \ start scanning field names
    delimiter "parse"?                 \ parse the header
  while                                \ if not EOL
    r@ place r> count 1+ chars + >r 1+ \ save column name, increment counter
  repeat                               \ next field
  2drop r> drop dup to #fields         \ clean up and save number of columns
  0 ?do 0 length-fields i th ! loop    \ initialize column length 
;

: scan-fields                          ( --)
  #fields 0 do                         \ scan all fields
    delimiter "parse" nip              \ get length of field and update
    length-fields i th dup @ rot max swap !
  loop                                 \ next field
;
                                       ( -- n)
: Process 0 begin refill while scan-fields 1+ repeat ;

: PostProcess                          ( h n --)
  2 args type ." : " #fields . ." columns, " . ." rows" cr cr close
  field-names #fields 0 do             \ list all fields
    ." [" i 1+ 3 .r ." ]  "            \ print field number
    dup count max-length .padding      \ show name of field
    ." : " length-fields i th ? cr     \ show maximum length
    count 1+ chars +                   \ get next name
  loop drop                            \ drop buffer address
;

: OpenFile                             ( -- h)
  argn 3 < abort" Usage: csvscan delimiter csv-file"
  1 args number to delimiter           \ get delimiter
  input 2 arg-open                     \ open file
;

: csvscan                              ( --)
  OpenFile                             \ open file
  Preprocess                           \ scan the header
  Process                              \ process the file
  PostProcess                          \ show results
;

csvscan