# Reference implementation of the FPAT record parsing. # # Each loop iteration identifies a (separator[n-1],field[n]) pair. # Each loop iteration must consume some characters, except for the first field. # So a null field is only valid as a first field or after a non-null separator. # A null record has no fields (not a single null field). function refpatsplit(string, fields, pattern, seps, parse_start, sep_start, field_start, field_length, field_found, nf) # locals { # Local state variables: # - parse_start: pointer to the first not yet consumed character # - sep_start: pointer to the beginning of the parsed separator # - field start: pointer to the beginning of the parsed field # - field length: length of the parsed field # - field_found: flag for succesful field match # - nf: Number of fields found so far # Prepare for parsing parse_start = 1 # first not yet parsed char nf = 0 # fields found so far delete fields delete seps # Loop that consumes the whole record while (parse_start <= length(string)) { # still something to parse # first attempt to match a field sep_start = parse_start field_found = match(substr(string, parse_start), pattern) # check for an invalid null field and retry one character away if (nf > 0 && field_found && RSTART==1 && RLENGTH==0) { parse_start++ field_found = match(substr(string, parse_start), pattern) } # store the (sep[n-1],field[n]) pair if (field_found) { field_start = parse_start + RSTART - 1 field_length = RLENGTH seps[nf] = substr(string, sep_start, field_start-sep_start) fields[++nf] = substr(string, field_start, field_length) parse_start = field_start + field_length # store the final extra sep after the last field } else { seps[nf] = substr(string, sep_start) parse_start = length(string) + 1 } } return nf }