;------------------------------------------------------------- ;+ ; NAME: ; GTE_READ ; ; PURPOSE: ; Read GTE data file, i.e. a specifically formatted ASCII ; file which contains extensive header information. ; ; CATEGORY: ; GTE tools ; ; CALLING SEQUENCE: ; GTE_READ,filename,data,descriptor [,keywords] ; ; INPUTS: ; FILENAME -> A name of a GTE file or a file mask that will ; be used in the PICKFILE dialog (see OPEN_FILE). If the ; file mask is a named variable, the actual filename will ; be returned. ; ; KEYWORD PARAMETERS: ; ILUN -> logical unit number for the file. This keyword serves ; two purposes: (1) if given, the file will be opened with ; this number. (2) If a named variable is passed, it will ; contain the file number with which the file was opened. ; This can be useful if a type > 2 file is read, which is ; not supported by this routine. In this case, the program ; will extract all the header information and return it in ; the descriptor structure, then stop at the beginning of ; the data. ; ; NAMES -> A named variable that will contain the variable names. ; Note that this information is also stored in the variable ; descriptor array in DESCRIPTOR. ; ; UNITS -> ... the physical units of the variables ... ; ; NO_SCALE -> Set this keyword if you do not want to scale the ; data according to the scaling information given in the ; header. Normally, this information is tested for a scale ; factor of 1.0 and an offset of 0.0, and the user is prompted ; if different values are found. ; ; FORCE_SCALING -> Set this keyword to automatically scale ; all data where necessary. Useful for repeated batch ; processing. ; ; _EXTRA keywords are passed to OPEN_FILE ; ; OUTPUTS: ; DATA -> The data array in double(!) precision format. It is ; arranged as LINES,VARIABLES. Double precision is necessary ; to preserve full time resolution. ; Test for n_elements(data) gt 1 to see if reading was ; successful. ; ; DESCRIPTOR -> A file descriptor structure (see GTE_FILEDESC). ; This contains all the header infromation from the file ; including the variable definitions, which can be extracted ; with vardef = *descriptor.pvardesc ; ; SUBROUTINES: ; pro gte_getdate,tmp,expdate,revdate ; Tries to extract the experiment date and revision date ; from one single string. Delimiters can be ',', '/', or ; ';'. All delimiters must be identical. ; ; pro gte_timetest,time,varcol ; Tests the standardized time for negative changes and ; changes > 80000 secs ; ; pro gte_stdtime,data,pdesc,filetype ; Standardize the time information. Depending on filetype, ; 1 or 3 time columns are converted so that they are ; monotonically increasing. The DAY variable is changed to ; remain constant throughout the file. ; The variables are renamed JDAY, MIDTIME (type 0 and 1) ; or JDAY, STARTTIME, STOPTIME, MIDTIME (type 2) ; ; pro gte_stdmiss,data,pdesc ; Set all missing values to the common value -9.99E30. ; LOD codes are also converted to -8.88E30 and -7.77E30. ; ; pro gte_stdscale,data,pdesc ; Perform the intrinsic scaling of the data after the ; user is prompted. ; ; REQUIREMENTS: ; Uses OPEN_FILE, GTE_FILEDESC, GTE_VARDESC ; ; NOTES: ; Current version of the program contains quite a bit of ; debug output. Could be useful to save in a journal file. ; ; EXAMPLE: ; GTE_READ,'/data/pem-t/dc8/raw/*.pmt',data,desc ; ; MODIFICATION HISTORY: ; mgs, 24 Aug 1998: VERSION 1.00 ; ;- ; Copyright (C) 1998, Martin Schultz, Harvard University ; This software is provided as is without any warranty ; whatsoever. It may be freely used, copied or distributed ; for non-commercial purposes. This copyright notice must be ; kept with any copy of this software. If this software shall ; be used commercially or sold as part of a larger package, ; please contact the author to arrange payment. ; Bugs and comments should be directed to mgs@io.harvard.edu ; with subject "IDL routine gte_read" ;------------------------------------------------------------- pro dum end ; NOTE: Data is returned in double precision in order to ensure ; that no time information gets lost (single precision can lead ; to roundoff errors for resolution >1 Hz) !! ; However: the codes for missing value and LOD are stored as ; single precision values. This makes it necessary to test for ; these values with ( abs(data[*,i]-miss[i]) lt 1.D-6 ) instead ; of (.. eq ..) !! ; [this is better programming style anyhow ...] pro gte_getdate,tmp,expdate,revdate ; extract experiment and revision date from date string ; set defaults expdate = '00/00/00' revdate = '00/00/00' ; try to seperate string using ',', ';', or '/' test = str_sep(tmp,',') if (n_elements(test) eq 6) then goto,compose_it test = str_sep(tmp,';') if (n_elements(test) eq 6) then goto,compose_it test = str_sep(tmp,'/') if (n_elements(test) eq 6) then goto,compose_it ; If we arrive here, the procedure was not able to extract ; the date information correctly ; Query user message,'Could not retrieve date information!',/CONT message,'Date string is : '+tmp,/INFO read,tmp,prompt='Please reenter string as YY,MM,DD, YY,MM,DD : ' test = str_sep(tmp,',') if (n_elements(test) eq 6) then goto,compose_it message,'Sorry, that was wrong! Will continue with dummies.',/INFO return compose_it: test = strcompress(test,/remove_all) expdate = string( test[0:2],format='(i2.2,"/",i2.2,"/",i2.2)' ) revdate = string( test[3:5],format='(i2.2,"/",i2.2,"/",i2.2)' ) end pro gte_timetest,time,varcol ; Test 1: test for negative time changes test = time-shift(time,1) index = where(test lt 0) ; first value should always be ; negative: ignore if (n_elements(index) gt 1) then begin message,'Time '+strtrim(varcol)+' has negative change!',/CONT stop endif ; Test 2: test for time changes > 80000 index = where(test gt 80000.) if (n_elements(index) gt 1) then begin message,'Time '+strtrim(varcol)+' has change > 80000 secs!',/CONT stop endif return end pro gte_stdtime,data,pdesc,filetype ; standardize day and time information: ; remove day changes and let UTC seconds exceed 86400 instead ; rename time variables to JDAY,MIDTIME (type 0, 1) ; or JDAY,STARTTIME,STOPTIME,MIDTIME (type 2) ; Test 1: look for day changes day = data[*,0] change = day-shift(day,1) index = where(change gt 0) ; for each day change, increase seconds accordingly for i=0,n_elements(index)-1 do $ if (index[i] gt 0) then begin data[index[i]:*,1] = data[index[i]:*,1] + 86400.0D if (filetype eq 2) then begin data[index[i]:*,2] = data[index[i]:*,2] + 86400.0D data[index[i]:*,3] = data[index[i]:*,3] + 86400.0D endif endif ; set all days to first value day0 = day[0] data[*,0] = day0 ; test for negative time changes and changes >80000 gte_timetest,data[*,1],1 if (filetype eq 2) then begin gte_timetest,data[*,2],2 gte_timetest,data[*,3],3 endif ; rename time variables ntime = 2 + 2*(filetype eq 2) message,'Old names of time variables:'+ $ string( (*pdesc)[0:ntime-1].name, format='(255(1X,A))'), $ /INFO,/NONAME (*pdesc)[0].name = 'JDAY' (*pdesc)[1].name = 'MIDTIME' if (filetype eq 2) then begin (*pdesc)[1].name = 'STARTTIME' (*pdesc)[2].name = 'STOPTIME' (*pdesc)[3].name = 'MIDTIME' endif (*pdesc)[0].unit = 'days' (*pdesc)[1:ntime-1].unit = 'secs' return end pro gte_stdmiss,data,pdesc ; set all missing data and codes to -9.99E30, ; all LLODCODES to -8.88E30, and all ULODCODES to -7.77E30 ; This should be sufficiently far out of any data range, but ; is still discernible in single precision resolution ; Get number of variables s = size(data) nvars = s[2] ; Extract LOD types and codes from descriptor pointer miss = (*pdesc).miss lodtype = (*pdesc).lodtype llodcode = (*pdesc).llodcode ulodcode = (*pdesc).ulodcode ; get magnitude of missing code magcode = 10^fix( alog10(abs(miss)) ) ; loop through variables for i=0,nvars-1 do begin ; missing values: circumvent numerical roundoff errors !! index = where(abs(data[*,i]-miss[i]) lt 1.D-6*magcode[i]) if (index[0] ge 0) then data[index,i] = -9.99E30 if (lodtype[i] gt 0) then begin ; lower LOD index = where(abs(data[*,i]-llodcode[i]) lt 1.D-6*magcode[i]) if (index[0] ge 0) then data[index,i] = -8.88E30 ; upper LOD index = where(abs(data[*,i]-ulodcode[i]) lt 1.D-6*magcode[i]) if (index[0] ge 0) then data[index,i] = -7.77E30 endif else begin ; reset LODcodes to 0 (*pdesc)[i].llodcode = 0.0 (*pdesc)[i].llodcode = 0.0 endelse endfor ; set all codes to standard values (*pdesc)[*].miss = -9.99E30 (*pdesc)[*].llodcode = -8.88E30 (*pdesc)[*].ulodcode = -7.77E30 return end pro gte_stdscale,data,pdesc,force_scaling=force_scaling ; Get number of variables s = size(data) nvars = s[2] ; Extract scale information from variable description pointer names = (*pdesc).name scale = (*pdesc).scale offset = (*pdesc).offset ; Loop through variables for i=0,nvars-1 do begin if (abs(scale[i]-1.0) gt 1.0E-6 OR abs(offset[i]) gt 1.0E-30) then begin message,'!! Variable '+names[i]+' requires scaling: '+ $ strtrim(scale[i],2)+' '+strtrim(offset[i],2), $ /INFO,/NOPREF,/NONAME char = '' if (not keyword_set(force_scaling)) then begin while (char eq '') do $ read,char,prompt='Scale now (Y/N) ? : ' endif if (strupcase(char) eq 'Y' or keyword_set(force_scaling)) then begin ; scale only valid data (!) okind = where(abs(data[*,i]+9.99E30) gt 1.0E-5 AND $ abs(data[*,i]+8.88E30) gt 1.0E-5 AND $ abs(data[*,i]+7.77E30) gt 1.0E-5 ) if (okind[0] ge 0) then $ data[okind,i] = scale[i]*data[okind,i] + offset[i] (*pdesc)[i].scale = 1.0 (*pdesc)[i].offset = 0.0 endif endif endfor return end pro gte_read,filename,data,descriptor,ilun=ilun, $ names=names,units=units,no_scale=no_scale, $ force_scaling=force_scaling,debug=debug,_EXTRA=e FORWARD_FUNCTION gte_filedesc, gte_vardesc data = -1L ; ============================================================ ; open a GTE data file ; ============================================================ open_file,filename,ilun,filename=truename,_EXTRA=e if (ilun le 0) then begin err = !error_state.code message,'Could not open file '+truename,/CONT !error_state.code = err return endif filename = truename ; ============================================================ ; make template for file descriptor ; NOTE: file descriptor not completely in same order as file ; header ; ============================================================ filedesc = gte_filedesc() ; ============================================================ ; Read header information ; ============================================================ on_ioerror,read_error count = 0L ; number of *DATA* lines read ; get total number of header lines readf,ilun,nlines ; get filename, investigator name, content, experiment name tmp = '' readf,ilun,tmp print,tmp filedesc.filename = strlowcase(tmp) readf,ilun,tmp print,tmp filedesc.investigator = tmp readf,ilun,tmp print,tmp filedesc.contents = tmp readf,ilun,tmp print,tmp filedesc.experiment_name = strupcase(tmp) ; get experiment date and revision date readf,ilun,tmp gte_getdate,tmp,expdate,revdate print,expdate,' ',revdate filedesc.experiment_date = expdate filedesc.revision_date = revdate ; get experiment number, number of vars, number of comment lines readf,ilun,expnum print,'Experiment number:',expnum filedesc.experiment_number = fix(expnum) readf,ilun,nvars nvars = fix(nvars) filedesc.nvars = nvars readf,ilun,ncomment ncomment = fix(ncomment) filedesc.ncomment = ncomment print,'#comments:',ncomment ; get file type (0, 1, 2) readf,ilun,filetype filedesc.filetype = fix(filetype) ; read data averaging period and sampling frequency readf,ilun,avperiod print,'Averaging period:',avperiod if (filetype ne 2 AND avperiod lt 0.1) then begin message,'!! WARNING: Averaging period too small!', $ /INFO,/NOPREF,/NONAME read,avperiod,prompt='Enter true value:' avperiod = (float(avperiod) > 1.0) endif filedesc.average_period = float(avperiod) readf,ilun,sampfreq filedesc.sampling_freq = float(sampfreq) ; ------------------------------------------------------------ ; Read variable descriptions ; ------------------------------------------------------------ ; array to hold descriptors variables = gte_vardesc(nvars) for i=0,nvars-1 do begin ; read line as string tmp = '' readf,ilun,tmp ; get tokens params = str_sep(tmp,',') ; replace quotes by blanks and trim params = strrepl(params,strwhere(params,'"'),' ') params = strtrim(params,2) if (n_elements(params) lt 7) then begin message,'Invalid variable definition!',/CONT print,params goto,ignore_variable endif else $ if (n_elements(params) eq 7) then params = [ params, '0' ] ; assume LOD type 0 as default variables[i].name = params[0] variables[i].unit = params[1] variables[i].scale = float(params[2]) variables[i].offset = float(params[3]) variables[i].range = float([params[4],params[5]]) variables[i].miss = float(params[6]) variables[i].lodtype = fix(params[7]) if (n_elements(params) gt 8) then begin variables[i].llodcode = float(params[8]) variables[i].llodval = float(params[9]) variables[i].ulodcode = float(params[10]) variables[i].ulodval = float(params[11]) endif print,variables[i].name,variables[i].unit, $ variables[i].lodtype,variables[i].range, $ format='(A,T24," ",A,T39," ",I4,2G12.3)' ignore_variable: endfor ; ------------------------------------------------------------ ; Read comment lines ; ------------------------------------------------------------ if (ncomment gt 0) then begin comments = strarr(ncomment) for i=0,ncomment-1 do begin tmp = '' readf,ilun,tmp comments[i] = tmp print,i,' ',tmp endfor endif ; ============================================================ ; Add variable descriptors and comments as pointers ; to filedescriptor ; ============================================================ filedesc.pvardesc = ptr_new(variables,/NO_COPY) if (ncomment gt 0) then $ filedesc.pcomments = ptr_new(comments,/NO_COPY) ; return filedesc as descriptor descriptor = filedesc ; to simplify life ... names = (*(descriptor.pvardesc)).name units = (*(descriptor.pvardesc)).unit message,'Finished parsing header. Reading data ...',/INFO,/NONAME ; ============================================================ ; Read data, dynamically allocating memory as needed ; ============================================================ if (filetype lt 0 OR filetype gt 2) then begin message,'Filetype '+strtrim(filetype)+' not implemented.',/CONT ; message,'Will leave file open. File pointer is now at start of data.', $ ; /INFO,/NONAME message,'File closed.',/INFO,/NONAME close,ilun return endif ; file types 0, 1, or 2 : simply read data array MAXLINES = 4000 temp = dblarr(MAXLINES,nvars) dataline = dblarr(nvars) data = temp debug = keyword_set(Debug) t0 = systime(1) ; timing information while (not eof(ilun)) do begin if (DEBUG) then begin readf,ilun,tmp print,string(count,format='(i4.4)'),':',tmp reads,tmp,dataline endif else begin readf,ilun,dataline endelse count = count+1 if (count mod MAXLINES eq 0) then $ data = [ data, temp ] data[count-1,*] = transpose(dataline) endwhile data = data[0:count-1,*] t1 = systime(1) ; ============================================================ ; Close file, that's it ; ============================================================ free_lun,ilun on_ioerror,null message,strtrim(count,2)+' lines of data read in '+ $ strtrim(t1-t0,2)+' seconds.',/INFO,/NONAME if (count gt 1) then begin message,'Standardize time ...',/INFO,/NONAME gte_stdtime,data,descriptor.pvardesc,filetype endif if (count gt 0) then begin message,'Standardize MISSING CODE and LOD CODES ..',/INFO,/NONAME gte_stdmiss,data,descriptor.pvardesc endif if (count gt 0 AND not keyword_set(no_scale)) then begin message,'Scale data if necessary ...',/INFO,/NONAME gte_stdscale,data,descriptor.pvardesc,force_scaling=force_scaling endif ; to simplify life once again (time variables renamed) ... names = (*(descriptor.pvardesc)).name units = (*(descriptor.pvardesc)).unit message,'Done.',/INFO return ; ------------------------------------------------------------ read_error: print,'*** Error ',strtrim(!ERR,2), $ ' (',!ERR_STRING,') encountered reading file ',filename, $ ' line ',strtrim(count,2) stop free_lun,ilun return end