;-------------------------------------------------------------
;+
; NAME:
;        GTE_READ
;
; PURPOSE:
;        Read GTE data file, i.e. a specifically formatted ASCII
;        file which contains extensive header information.
;
; CATEGORY:
;        GTE tools
;
; CALLING SEQUENCE:
;        GTE_READ,filename,data,descriptor [,keywords]
;
; INPUTS:
;        FILENAME -> A name of a GTE file or a file mask that will
;            be used in the PICKFILE dialog (see OPEN_FILE). If the
;            file mask is a named variable, the actual filename will
;            be returned.
;
; KEYWORD PARAMETERS:
;        ILUN -> logical unit number for the file. This keyword serves
;            two purposes: (1) if given, the file will be opened with
;            this number. (2) If a named variable is passed, it will
;            contain the file number with which the file was opened.
;            This can be useful if a type > 2 file is read, which is
;            not supported by this routine. In this case, the program
;            will extract all the header information and return it in
;            the descriptor structure, then stop at the beginning of
;            the data.
;
;        NAMES -> A named variable that will contain the variable names.
;            Note that this information is also stored in the variable
;            descriptor array in DESCRIPTOR.
;
;        UNITS -> ... the physical units of the variables ...
;
;        NO_SCALE -> Set this keyword if you do not want to scale the
;            data according to the scaling information given in the
;            header. Normally, this information is tested for a scale
;            factor of 1.0 and an offset of 0.0, and the user is prompted
;            if different values are found.
;
;        FORCE_SCALING -> Set this keyword to automatically scale
;            all data where necessary. Useful for repeated batch
;            processing.
;
;        _EXTRA keywords are passed to OPEN_FILE
;
; OUTPUTS:
;        DATA -> The data array in double(!) precision format. It is
;            arranged as LINES,VARIABLES. Double precision is necessary
;            to preserve full time resolution.
;            Test for n_elements(data) gt 1 to see if reading was
;            successful.
;
;        DESCRIPTOR -> A file descriptor structure (see GTE_FILEDESC).
;            This contains all the header infromation from the file
;            including the variable definitions, which can be extracted
;            with vardef = *descriptor.pvardesc
;
; SUBROUTINES:
;        pro gte_getdate,tmp,expdate,revdate
;              Tries to extract the experiment date and revision date
;              from one single string. Delimiters can be ',', '/', or
;              ';'. All delimiters must be identical.
;
;        pro gte_timetest,time,varcol
;              Tests the standardized time for negative changes and
;              changes > 80000 secs
;
;        pro gte_stdtime,data,pdesc,filetype
;              Standardize the time information. Depending on filetype,
;              1 or 3 time columns are converted so that they are
;              monotonically increasing. The DAY variable is changed to
;              remain constant throughout the file.
;              The variables are renamed JDAY, MIDTIME (type 0 and 1)
;              or JDAY, STARTTIME, STOPTIME, MIDTIME (type 2)
;
;        pro gte_stdmiss,data,pdesc
;              Set all missing values to the common value -9.99E30.
;              LOD codes are also converted to -8.88E30 and -7.77E30.
;
;        pro gte_stdscale,data,pdesc
;              Perform the intrinsic scaling of the data after the
;              user is prompted.
;
; REQUIREMENTS:
;        Uses OPEN_FILE, GTE_FILEDESC, GTE_VARDESC
;
; NOTES:
;        Current version of the program contains quite a bit of
;        debug output. Could be useful to save in a journal file.
;
; EXAMPLE:
;        GTE_READ,'/data/pem-t/dc8/raw/*.pmt',data,desc
;
; MODIFICATION HISTORY:
;        mgs, 24 Aug 1998: VERSION 1.00
;
;-
; Copyright (C) 1998, Martin Schultz, Harvard University
; This software is provided as is without any warranty
; whatsoever. It may be freely used, copied or distributed
; for non-commercial purposes. This copyright notice must be
; kept with any copy of this software. If this software shall
; be used commercially or sold as part of a larger package,
; please contact the author to arrange payment.
; Bugs and comments should be directed to mgs@io.harvard.edu
; with subject "IDL routine gte_read"
;-------------------------------------------------------------


pro dum
end


;   NOTE: Data is returned in double precision in order to ensure
;    that no time information gets lost (single precision can lead
;    to roundoff errors for resolution >1 Hz) !!
;    However: the codes for missing value and LOD are stored as
;    single precision values. This makes it necessary to test for
;    these values with ( abs(data[*,i]-miss[i]) lt 1.D-6 ) instead
;    of (.. eq ..) !!
;    [this is better programming style anyhow ...]


pro gte_getdate,tmp,expdate,revdate

    ; extract experiment and revision date from date string

    ; set defaults
    expdate = '00/00/00'
    revdate = '00/00/00'

    ; try to seperate string using ',', ';', or '/'

    test = str_sep(tmp,',')
    if (n_elements(test) eq 6) then goto,compose_it
    test = str_sep(tmp,';')
    if (n_elements(test) eq 6) then goto,compose_it
    test = str_sep(tmp,'/')
    if (n_elements(test) eq 6) then goto,compose_it


    ; If we arrive here, the procedure was not able to extract
    ; the date information correctly
    ; Query user

    message,'Could not retrieve date information!',/CONT
    message,'Date string is : '+tmp,/INFO
    read,tmp,prompt='Please reenter string as YY,MM,DD,  YY,MM,DD : '

    test = str_sep(tmp,',')
    if (n_elements(test) eq 6) then goto,compose_it

    message,'Sorry, that was wrong! Will continue with dummies.',/INFO
    return


compose_it:

    test = strcompress(test,/remove_all)

    expdate = string( test[0:2],format='(i2.2,"/",i2.2,"/",i2.2)' )
    revdate = string( test[3:5],format='(i2.2,"/",i2.2,"/",i2.2)' )
end


pro gte_timetest,time,varcol

    ; Test 1: test for negative time changes
    test = time-shift(time,1)
    index = where(test lt 0)   ; first value should always be
                               ; negative: ignore

    if (n_elements(index) gt 1) then begin
       message,'Time '+strtrim(varcol)+' has negative change!',/CONT
       stop
    endif


    ; Test 2: test for time changes > 80000
    index = where(test gt 80000.)

    if (n_elements(index) gt 1) then begin
       message,'Time '+strtrim(varcol)+' has change > 80000 secs!',/CONT
       stop
    endif

    return
end


pro gte_stdtime,data,pdesc,filetype

    ; standardize day and time information:
    ; remove day changes and let UTC seconds exceed 86400 instead
    ; rename time variables to JDAY,MIDTIME (type 0, 1)
    ; or JDAY,STARTTIME,STOPTIME,MIDTIME    (type 2)

    ; Test 1: look for day changes
    day = data[*,0]
    change = day-shift(day,1)
    index = where(change gt 0)
    ; for each day change, increase seconds accordingly
    for i=0,n_elements(index)-1 do $
       if (index[i] gt 0) then begin
          data[index[i]:*,1] = data[index[i]:*,1] + 86400.0D
          if (filetype eq 2) then begin
             data[index[i]:*,2] = data[index[i]:*,2] + 86400.0D
             data[index[i]:*,3] = data[index[i]:*,3] + 86400.0D
          endif
       endif

    ; set all days to first value
    day0 = day[0]
    data[*,0] = day0

    ; test for negative time changes and changes >80000
    gte_timetest,data[*,1],1

    if (filetype eq 2) then begin
       gte_timetest,data[*,2],2
       gte_timetest,data[*,3],3
    endif


    ; rename time variables
    ntime = 2 + 2*(filetype eq 2)
    message,'Old names of time variables:'+  $
          string( (*pdesc)[0:ntime-1].name, format='(255(1X,A))'), $
          /INFO,/NONAME

    (*pdesc)[0].name = 'JDAY'
    (*pdesc)[1].name = 'MIDTIME'

    if (filetype eq 2) then begin
       (*pdesc)[1].name = 'STARTTIME'
       (*pdesc)[2].name = 'STOPTIME'
       (*pdesc)[3].name = 'MIDTIME'
    endif

    (*pdesc)[0].unit = 'days'
    (*pdesc)[1:ntime-1].unit = 'secs'

    return
end


pro gte_stdmiss,data,pdesc

    ; set all missing data and codes to -9.99E30,
    ; all LLODCODES to -8.88E30, and all ULODCODES to -7.77E30
    ; This should be sufficiently far out of any data range, but
    ; is still discernible in single precision resolution

    ; Get number of variables
    s = size(data)
    nvars = s[2]

    ; Extract LOD types and codes from descriptor pointer
    miss = (*pdesc).miss
    lodtype = (*pdesc).lodtype
    llodcode = (*pdesc).llodcode
    ulodcode = (*pdesc).ulodcode

    ; get magnitude of missing code
    magcode = 10^fix( alog10(abs(miss)) )

    ; loop through variables
    for i=0,nvars-1 do begin
       ; missing values: circumvent numerical roundoff errors !!
       index = where(abs(data[*,i]-miss[i]) lt 1.D-6*magcode[i])
       if (index[0] ge 0) then data[index,i] = -9.99E30

       if (lodtype[i] gt 0) then begin
          ; lower LOD
          index = where(abs(data[*,i]-llodcode[i]) lt 1.D-6*magcode[i])
          if (index[0] ge 0) then data[index,i] = -8.88E30
          ; upper LOD
          index = where(abs(data[*,i]-ulodcode[i]) lt 1.D-6*magcode[i])
          if (index[0] ge 0) then data[index,i] = -7.77E30
       endif else begin   ; reset LODcodes to 0
          (*pdesc)[i].llodcode = 0.0
          (*pdesc)[i].llodcode = 0.0
       endelse
    endfor

    ; set all codes to standard values
    (*pdesc)[*].miss = -9.99E30
    (*pdesc)[*].llodcode = -8.88E30
    (*pdesc)[*].ulodcode = -7.77E30

    return
end


pro gte_stdscale,data,pdesc,force_scaling=force_scaling

    ; Get number of variables
    s = size(data)
    nvars = s[2]

    ; Extract scale information from variable description pointer
    names = (*pdesc).name
    scale = (*pdesc).scale
    offset = (*pdesc).offset


    ; Loop through variables
    for i=0,nvars-1 do begin
       if (abs(scale[i]-1.0) gt 1.0E-6 OR abs(offset[i]) gt 1.0E-30) then begin
          message,'!! Variable '+names[i]+' requires scaling: '+ $
               strtrim(scale[i],2)+' '+strtrim(offset[i],2), $
               /INFO,/NOPREF,/NONAME
          char = ''
          if (not keyword_set(force_scaling)) then begin
             while (char eq '') do $
                read,char,prompt='Scale now (Y/N) ? : '
          endif
          if (strupcase(char) eq 'Y' or keyword_set(force_scaling)) then begin
          ; scale only valid data (!)
             okind = where(abs(data[*,i]+9.99E30) gt 1.0E-5  AND  $
                           abs(data[*,i]+8.88E30) gt 1.0E-5  AND  $
                           abs(data[*,i]+7.77E30) gt 1.0E-5 )
             if (okind[0] ge 0) then $
                data[okind,i] = scale[i]*data[okind,i] + offset[i]
             (*pdesc)[i].scale = 1.0
             (*pdesc)[i].offset = 0.0
          endif
       endif
    endfor

    return
end


pro gte_read,filename,data,descriptor,ilun=ilun,  $
          names=names,units=units,no_scale=no_scale, $
          force_scaling=force_scaling,debug=debug,_EXTRA=e


    FORWARD_FUNCTION gte_filedesc, gte_vardesc

    data = -1L


    ; ============================================================
    ; open a GTE data file
    ; ============================================================
    open_file,filename,ilun,filename=truename,_EXTRA=e

    if (ilun le 0) then begin
       err = !error_state.code
       message,'Could not open file '+truename,/CONT
       !error_state.code = err
       return
    endif

    filename = truename


    ; ============================================================
    ; make template for file descriptor
    ; NOTE: file descriptor not completely in same order as file
    ;       header
    ; ============================================================

    filedesc = gte_filedesc()


    ; ============================================================
    ; Read header information
    ; ============================================================

    on_ioerror,read_error

    count = 0L   ; number of *DATA* lines read

    ; get total number of header lines
    readf,ilun,nlines

    ; get filename, investigator name, content, experiment name
    tmp = ''
    readf,ilun,tmp
print,tmp
    filedesc.filename = strlowcase(tmp)
    readf,ilun,tmp
print,tmp
    filedesc.investigator = tmp
    readf,ilun,tmp
print,tmp
    filedesc.contents = tmp
    readf,ilun,tmp
print,tmp
    filedesc.experiment_name = strupcase(tmp)

    ; get experiment date and revision date
    readf,ilun,tmp
    gte_getdate,tmp,expdate,revdate
print,expdate,'   ',revdate
    filedesc.experiment_date = expdate
    filedesc.revision_date = revdate

    ; get experiment number, number of vars, number of comment lines
    readf,ilun,expnum
print,'Experiment number:',expnum
    filedesc.experiment_number = fix(expnum)
    readf,ilun,nvars
    nvars = fix(nvars)
    filedesc.nvars = nvars
    readf,ilun,ncomment
    ncomment = fix(ncomment)
    filedesc.ncomment = ncomment
print,'#comments:',ncomment

    ; get file type (0, 1, 2)
    readf,ilun,filetype
    filedesc.filetype = fix(filetype)

    ; read data averaging period and sampling frequency
    readf,ilun,avperiod
print,'Averaging period:',avperiod
if (filetype ne 2 AND avperiod lt 0.1) then begin
   message,'!! WARNING: Averaging period too small!', $
          /INFO,/NOPREF,/NONAME
   read,avperiod,prompt='Enter true value:'
   avperiod = (float(avperiod) > 1.0)
endif
    filedesc.average_period = float(avperiod)
    readf,ilun,sampfreq
    filedesc.sampling_freq = float(sampfreq)


    ; ------------------------------------------------------------
    ; Read variable descriptions
    ; ------------------------------------------------------------

    ; array to hold descriptors
    variables = gte_vardesc(nvars)

    for i=0,nvars-1 do begin
       ; read line as string
       tmp = ''
       readf,ilun,tmp
       ; get tokens
       params = str_sep(tmp,',')
       ; replace quotes by blanks and trim
       params = strrepl(params,strwhere(params,'"'),' ')
       params = strtrim(params,2)
       if (n_elements(params) lt 7) then begin
          message,'Invalid variable definition!',/CONT
          print,params
          goto,ignore_variable
       endif else $
          if (n_elements(params) eq 7) then params = [ params, '0' ]
       ; assume LOD type 0 as default

       variables[i].name = params[0]
       variables[i].unit = params[1]
       variables[i].scale = float(params[2])
       variables[i].offset = float(params[3])
       variables[i].range = float([params[4],params[5]])
       variables[i].miss = float(params[6])
       variables[i].lodtype = fix(params[7])
       if (n_elements(params) gt 8) then begin
          variables[i].llodcode = float(params[8])
          variables[i].llodval = float(params[9])
          variables[i].ulodcode = float(params[10])
          variables[i].ulodval = float(params[11])
       endif
print,variables[i].name,variables[i].unit, $
      variables[i].lodtype,variables[i].range, $
      format='(A,T24," ",A,T39," ",I4,2G12.3)'

ignore_variable:
    endfor


    ; ------------------------------------------------------------
    ; Read comment lines
    ; ------------------------------------------------------------

    if (ncomment gt 0) then begin
       comments = strarr(ncomment)

       for i=0,ncomment-1 do begin
       tmp = ''
          readf,ilun,tmp
          comments[i] = tmp
 print,i,'    ',tmp
       endfor
    endif


    ; ============================================================
    ; Add variable descriptors and comments as pointers
    ; to filedescriptor
    ; ============================================================

    filedesc.pvardesc = ptr_new(variables,/NO_COPY)
    if (ncomment gt 0) then $
       filedesc.pcomments = ptr_new(comments,/NO_COPY)

    ; return filedesc as descriptor
    descriptor = filedesc

    ; to simplify life ...
    names = (*(descriptor.pvardesc)).name
    units = (*(descriptor.pvardesc)).unit

    message,'Finished parsing header. Reading data ...',/INFO,/NONAME

    ; ============================================================
    ; Read data, dynamically allocating memory as needed
    ; ============================================================

    if (filetype lt 0 OR filetype gt 2) then begin
       message,'Filetype '+strtrim(filetype)+' not implemented.',/CONT
;      message,'Will leave file open. File pointer is now at start of data.', $
;             /INFO,/NONAME
       message,'File closed.',/INFO,/NONAME
       close,ilun
       return
    endif


    ; file types 0, 1, or 2 : simply read data array
    MAXLINES = 4000
    temp = dblarr(MAXLINES,nvars)
    dataline = dblarr(nvars)

    data = temp
    debug = keyword_set(Debug)
    t0 = systime(1)   ; timing information
    while (not eof(ilun)) do begin
if (DEBUG) then begin
      readf,ilun,tmp
      print,string(count,format='(i4.4)'),':',tmp
      reads,tmp,dataline
endif else begin
       readf,ilun,dataline
endelse
       count = count+1
       if (count mod MAXLINES eq 0) then $
          data = [ data, temp ]
       data[count-1,*] = transpose(dataline)
    endwhile

    data = data[0:count-1,*]

    t1 = systime(1)

    ; ============================================================
    ; Close file, that's it
    ; ============================================================

    free_lun,ilun

    on_ioerror,null

    message,strtrim(count,2)+' lines of data read in '+ $
          strtrim(t1-t0,2)+' seconds.',/INFO,/NONAME

    if (count gt 1) then begin
       message,'Standardize time ...',/INFO,/NONAME
       gte_stdtime,data,descriptor.pvardesc,filetype
    endif

    if (count gt 0) then begin
       message,'Standardize MISSING CODE and LOD CODES ..',/INFO,/NONAME
       gte_stdmiss,data,descriptor.pvardesc
    endif

    if (count gt 0 AND not keyword_set(no_scale)) then begin
       message,'Scale data if necessary ...',/INFO,/NONAME
       gte_stdscale,data,descriptor.pvardesc,force_scaling=force_scaling
    endif

    ; to simplify life once again (time variables renamed) ...
    names = (*(descriptor.pvardesc)).name
    units = (*(descriptor.pvardesc)).unit


    message,'Done.',/INFO
    return

    ; ------------------------------------------------------------


read_error:

    print,'*** Error ',strtrim(!ERR,2),  $
          ' (',!ERR_STRING,') encountered reading file ',filename, $
          ' line ',strtrim(count,2)

stop
    free_lun,ilun


    return


end