Logo Search packages:      
Sourcecode: ffe version File versions  Download package

ffe.c

/*
 *    ffe - Flat File Extractor
 *
 *    Copyright (C) 2006 Timo Savinen
 *    This file is part of ffe.
 * 
 *    ffe is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    ffe is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with ffe; if not, write to the Free Software
 *    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 */

/* $Id: ffe.c,v 1.51 2007-05-30 07:32:48 timo Exp $ */

#include "ffe.h"

#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif

#include <ctype.h>
#include <stdlib.h>

#ifdef HAVE_STRING_H
#include <string.h>
#endif

#ifdef PACKAGE
static char *program = PACKAGE;
#else
static char *program = "ffe";
#endif

#ifdef PACKAGE_VERSION
static char *version = PACKAGE_VERSION;
#else
static char *version = "0.2.2";
#endif

#ifdef HOST
static char *host = HOST;
#else
static char *host = "";
#endif

#ifdef PACKAGE_BUGREPORT
static char *email_address = PACKAGE_BUGREPORT;
#else
static char *email_address = "tjsa@iki.fi";
#endif

static char short_opts[] = "c:s:o:p:f:e:r:l?Vav";

#ifdef HAVE_GETOPT_LONG
static struct option long_opts[] = {
    {"configuration",1,NULL,'c'},
    {"structure",1,NULL,'s'},
    {"output",1,NULL,'o'},
    {"print",1,NULL,'p'},
    {"field-list",1,NULL,'f'},
    {"loose",0,NULL,'l'},
    {"expression",1,NULL,'e'},
    {"help",0,NULL,'?'},
    {"version",0,NULL,'V'},
    {"and",0,NULL,'a'},
    {"invert-match",0,NULL,'v'},
    {"replace",1,NULL,'r'},
    {NULL,0,NULL,0}
};
#endif

/* global rc-data */
struct structure *structure = NULL;
struct output *output = NULL;
struct expression *expression = NULL;
struct lookup *lookup = NULL;
struct replace *replace = NULL;
struct field *const_field = NULL;

/* output no marker */
struct output dummy;
struct output *no_output = &dummy;

void
panic(char *msg,char *info,char *syserror)
{
    if (info == NULL && syserror == NULL)
    {
        fprintf(stderr,"%s: %s\n",program,msg);
    } else if(info != NULL && syserror == NULL)
    {
        fprintf(stderr,"%s: %s: %s\n",program,msg,info);
    } else if(info != NULL && syserror != NULL)
    {
        fprintf(stderr,"%s: %s: %s; %s\n",program,msg,info,syserror);
    } else if(info == NULL && syserror != NULL)
    {
        fprintf(stderr,"%s: %s; %s\n",program,msg,syserror);
    }
    exit(EXIT_FAILURE);
}



char *
get_default_rc_name()
{
    char *home;
    char *result;
#ifdef WIN32
    char *file = "ffe.rc";
#else
    char *file = ".fferc";
#endif

    result = NULL;
    home = getenv("HOME");
    if(home != NULL)
    {
            result = xmalloc(strlen(home) + strlen(file) + strlen(PATH_SEPARATOR_STRING) + 2);
            strcpy(result,home);
            strcat(result,PATH_SEPARATOR_STRING);
            strcat(result,file);
    } else
    {
        result = file;
    }
    return result;
}
        
void
help(FILE *stream)
{
    fprintf(stream,"Usage: %s [OPTION]...\n\n",program);
#ifdef HAVE_GETOPT_LONG
    fprintf(stream,"-c, --configuration=FILE\n");
    fprintf(stream,"\t\tRead configuration from FILE, default is \'%s\'.\n",get_default_rc_name());
    fprintf(stream,"-s, --structure=STRUCTURE\n");
    fprintf(stream,"\t\tUse structure STRUCTURE for input file, suppresses guessing.\n");
    fprintf(stream,"-p, --print=FORMAT\n");
    fprintf(stream,"\t\tUse output format FORMAT for printing.\n");
    fprintf(stream,"-o, --output=NAME\n");
    fprintf(stream,"\t\tWrite output to NAME instead of standard output.\n");
    fprintf(stream,"-f, --field-list=LIST\n");
    fprintf(stream,"\t\tPrint only fields and constants listed in comma separated list LIST.\n");
    fprintf(stream,"-e, --expression=EXPRESSION\n");
    fprintf(stream,"\t\tPrint only those records for which the EXPRESSION evaluates to true.\n");
    fprintf(stream,"-a, --and\n");
    fprintf(stream,"\t\tExpressions are combined with logical and, default is logical or.\n");
    fprintf(stream,"-v, --invert-match\n");
    fprintf(stream,"\t\tPrint only those records which don't match the expression.\n");
    fprintf(stream,"-l, --loose\n");
    fprintf(stream,"\t\tAn invalid input line does not cause %s to abort.\n",program);
    fprintf(stream,"-r, --replace=FIELD=VALUE\n");
    fprintf(stream,"\t\tReplace FIELDs contents with VALUE in output.\n");
    fprintf(stream,"-?, --help\n");
    fprintf(stream,"\t\tDisplay this help and exit.\n");
    fprintf(stream,"-V, --version\n");
#else
    fprintf(stream,"-c FILE\n");
    fprintf(stream,"\t\tRead configuration from FILE, default is \'%s\'.\n",get_default_rc_name());
    fprintf(stream,"-s STRUCTURE\n");
    fprintf(stream,"\t\tUse structure STRUCTURE for input file, suppresses guessing.\n");
    fprintf(stream,"-p FORMAT\n");
    fprintf(stream,"\t\tUse output format FORMAT for printing.\n");
    fprintf(stream,"-o NAME\n");
    fprintf(stream,"\t\tWrite output to NAME instead of standard output.\n");
    fprintf(stream,"-f LIST\n");
    fprintf(stream,"\t\tPrint only fields and constants listed in comma separated list LIST.\n");
    fprintf(stream,"-e EXPRESSION\n");
    fprintf(stream,"\t\tPrint only those records for which the EXPRESSION evaluates to true.\n");
    fprintf(stream,"-a\n");
    fprintf(stream,"\t\tExpressions are combined with logical and, default is logical or.\n");
    fprintf(stream,"-v\n");
    fprintf(stream,"\t\tPrint only those records which don't match the expression.\n");
    fprintf(stream,"-l\n");
    fprintf(stream,"\t\tAn invalid input line does not cause %s to abort.\n",program);
    fprintf(stream,"-r FIELD=VALUE\n");
    fprintf(stream,"\t\tReplace FIELDs contents with VALUE in output.\n");
    fprintf(stream,"-?\n");
    fprintf(stream,"\t\tDisplay this help and exit.\n");
    fprintf(stream,"-V\n");
#endif
    fprintf(stream,"\t\tShow version and exit.\n");
    fprintf(stream,"\nAll remaining arguments are names of input files;\n");
    fprintf(stream,"if no input files are specified, then the standard input is read.\n");
    fprintf(stream,"\nSend bug reports to %s.\n",email_address);
}

void
usage(int opt)
{
        fprintf(stderr,"Unknown option '-%c'\n",(char) opt);
        help(stderr);
}

void
print_version()
{
    printf("%s version %s (%s)\n",program,version,host);
    printf("Copyright (c) 2007 Timo Savinen\n\n");
    printf("This is free software; see the source for copying conditions.\n");
    printf("There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
}


struct output *
search_output(char *name)
{
    struct output *o = output;

    if(strcmp(name,"no") == 0) return no_output;

    while(o != NULL)
    {
        if(strcmp(name,o->name) == 0) return o;
        o = o->next;
    }
    fprintf(stderr,"%s: Unknown output \'%s\'\n",program,name);
    return NULL;
}

/* returns a record after name */
struct record *
find_record(struct structure *s,char *name)
{
    struct record *ret = s->r;

    while(ret != NULL)
    {
        if(strcmp(ret->name,name) == 0) return ret;
        ret = ret->next;
    }
    return NULL;
}

/* find a structure after a name */
struct structure *
find_structure(char *name)
{
    struct structure *s = structure;

    while(s != NULL)
    {
        if(strcmp(s->name,name) == 0) return s;
        s = s->next;
    }
    return NULL;
}

/* check structure and output integrity */
/* and initialize some things */
void 
check_rc(char *use_output)
{
    struct structure *s;
    struct output *o;
    struct record *r,*fr;
    struct field *f;
    struct lookup *l;
    int several_records;
    int errors = 0;
    int ordinal;
    int field_count_first;
    char num[64];

    s = structure;
    o = output;

    if(s == NULL)
    {
        errors++;
        fprintf(stderr,"%s: No structure definitions in rc-file\n",program);
    }

    while(s != NULL)
    {
        if(use_output != NULL)
        {
            s->output_name = xstrdup(use_output);
        }

        s->o = search_output(s->output_name);
        if(s->o == NULL) errors++;
        r = s->r;
        if(r == NULL) 
        {
            errors++;
            fprintf(stderr,"%s: No records in structure \'%s\'\n",program,s->name);
        } else
        {
            several_records = r->next != NULL ? 1 : 0;
        }
        if(s->quote && s->type[0] == SEPARATED)
        {
            if(s->quote == s->type[1])
            {
                errors++;
                fprintf(stderr,"%s: Quotation and separator cannot be the same character, structure \'%s\'\n",program,s->name);
            }
        }
        if(s->header && s->type[0] != SEPARATED)
        {
            errors++;
            fprintf(stderr,"%s: Headers are valid only in separated input, structure \'%s\'\n",program,s->name);
        }

        field_count_first = 0;

        while(r != NULL)
        {
            if(r->output_name == NULL) 
            {
                r->output_name = s->output_name;
                r->o = s->o;
            } else
            {
                r->o = search_output(r->output_name);
                if(r->o == NULL) errors++;
            }
            if(r->fields_from != NULL)
            {
                if(r->f != NULL)
                {
                    errors++;
                    fprintf(stderr,"%s: field and fields-from are mutually exclusive, structure \'%s\', record \'%s\'\n",program,s->name,r->name);
                }
                fr = find_record(s,r->fields_from);
                if(fr != NULL)
                {
                    r->f = fr->f;
                } else
                {
                    errors++;
                    fprintf(stderr,"%s: No record named as '\%s\' in structure \'%s\'\n",program,r->fields_from,s->name);
                }
            }
            f = r->f;
            if(f == NULL)
            {
                errors++;
                fprintf(stderr,"%s: No fields in record \'%s\'\n",program,r->name);
            }
            r->length = 0;
            ordinal = 1;
            while(f != NULL) 
            {
                if(s->type[0] == FIXED_LENGTH)
                {
                    f->position = r->length;
                    r->length += f->length;
                } else
                {
                    f->position = ordinal;
                    r->length++;
                    if(s->header)
                    {
                        if(r == s->r) 
                        {
                            field_count_first++;
                        } 
                    }
                }

                if(!s->header && f->name == NULL)
                {
                    sprintf(num,"%d",ordinal);
                    f->name = xstrdup(num);
                }
                if(s->type[0] == FIXED_LENGTH && !f->length)
                {
                    errors++;
                    fprintf(stderr,"%s: A field \'%s\' must have length in fixed length structure \'%s\' record \'%s\'\n",program,f->name,s->name,r->name);
                }

                if(f->lookup_table_name != NULL)
                {
                    l = lookup;

                    while(l != NULL && f->lookup == NULL)
                    {
                        if(strcmp(l->name,f->lookup_table_name) == 0)
                        {
                            f->lookup = l;
                        }
                        l = l->next;
                    }

                    if(f->lookup == NULL)
                    {
                        errors++;
                        fprintf(stderr,"%s: No lookup table named as '%s'\n",program,f->lookup_table_name);
                    }
                }

                f = f->next;
                ordinal++;
            }
            if(s->header && r->length != field_count_first)
            {
                errors++;
                fprintf(stderr,"%s: All records in separated structure with header must have equal count of fields, structure \'%s\'\n",program,s->name);
            }
            r = r->next;
        }
        s = s->next;
    }
    
    if(o == NULL)
    {
        errors++;
        fprintf(stderr,"%s: No output definitions in rc-file\n",program);
    }

    while(o != NULL)
    {
        if(o->lookup == NULL) o->lookup = o->data;
        o = o->next;
    }

    if(errors)
    {
        panic("Errors in rc-file",NULL,NULL);
    }
}

void
add_replace(char *optarg)
{
    char *op_pos;
    struct replace *r;

    if((op_pos = strchr(optarg,'=')) == NULL)
    {
        panic("Replace expression must contain =-character",optarg,NULL);
    }

    *op_pos = 0;

    r = replace;

    if(r == NULL)
    {
        replace = xmalloc(sizeof(struct replace));
        replace->next =  NULL;
        r = replace;
    } else
    {
        while(r->next != NULL) r = r->next;
        r->next = xmalloc(sizeof(struct replace));
        r = r->next;
        r->next = NULL;
    }

    r->field = xstrdup(optarg);
    op_pos++;
    r->value = xstrdup(op_pos);
    r->found = 0;
}


void
add_expression(char *optarg)
{
    char *op_pos;
    char op = 0;
    struct expression *e;
    int rc;
    size_t buflen;
    char *errbuf;

    if((op_pos = strchr(optarg,OP_REQEXP)) != NULL)
    {
#ifdef HAVE_REGEX
        op = OP_REQEXP;    
#else
        panic("Regular expressions are not supported in this system",optarg,NULL);
#endif
    } else if((op_pos = strchr(optarg,OP_EQUAL)) != NULL)
    {
        op = OP_EQUAL;
    } else if((op_pos = strchr(optarg,OP_START)) != NULL)
    {
        op = OP_START;
    } else if((op_pos = strchr(optarg,OP_CONTAINS)) != NULL)
    {
        op = OP_CONTAINS;
    } else if((op_pos = strchr(optarg,OP_NOT_EQUAL)) != NULL)
    {
        op = OP_NOT_EQUAL;
    } else
    {
        panic("Expression must contain an operator: =,^,~,? or !",optarg,NULL);
    }

    *op_pos = 0;

    e = expression;

    if(e == NULL)
    {
        expression = xmalloc(sizeof(struct expression));
        expression->next =  NULL;
        e = expression;
    } else
    {
        while(e->next != NULL) e = e->next;
        e->next = xmalloc(sizeof(struct expression));
        e = e->next;
        e->next = NULL;
    }

    e->field = xstrdup(optarg);
    op_pos++;
    e->value = xstrdup(op_pos);
    e->value_len = strlen(e->value);
    e->found = 0;
    e->op = op;
#ifdef HAVE_REGEX
    if(e->op == OP_REQEXP)
    {
        rc = regcomp(&e->reg,e->value,REG_EXTENDED | REG_NOSUB);
        if(rc)
        {
            buflen = regerror(rc,&e->reg,NULL,0);
            errbuf = xmalloc(buflen + 1);
            regerror(rc,&e->reg,errbuf,buflen);
            panic("Error in regular expression",e->value,errbuf);
        }
    }
#endif
}
       

int
main(int argc, char **argv)
{
    int opt;
    int strict = 1;
    int expression_and = 0;
    int expression_invert = 0;
    struct structure *s;
    char *structure_to_use = NULL;
    char *output_to_use = NULL;
    char *config_to_use = NULL;
    char *ofile_to_use = NULL;
    char *field_list = NULL;
#ifdef HAVE_GETOPT_LONG
    while ((opt = getopt_long(argc,argv,short_opts,long_opts,NULL)) != -1)
#else
    while ((opt = getopt(argc,argv,short_opts)) != -1)
#endif
        {
            switch(opt)
            {
                case 'c':
                    if(config_to_use == NULL)
                    {
                        config_to_use = xstrdup(optarg);
                    } else
                    {
                        panic("Only one -c option allowed",NULL,NULL);
                    }
                    break;
                case 's':
                    if(structure_to_use == NULL)
                    {
                        structure_to_use = xstrdup(optarg);
                    } else
                    {
                        panic("Only one -s option allowed",NULL,NULL);
                    }
                    break;
                case 'p':
                    if(output_to_use == NULL)
                    {
                        output_to_use = xstrdup(optarg);
                    } else
                    {
                        panic("Only one -p option allowed",NULL,NULL);
                    }
                    break;
                case 'f':
                    if(field_list == NULL)
                    {
                        field_list = xstrdup(optarg);
                    } else
                    {
                        panic("Only one -f option allowed",NULL,NULL);
                    }
                    break;
                 case 'o':
                    if(ofile_to_use == NULL)
                    {
                        ofile_to_use = xstrdup(optarg);
                    } else
                    {
                        panic("Only one -o option allowed",NULL,NULL);
                    }
                    break; 
                case 'e':
                    add_expression(optarg);
                    break;
                case 'r':
                    add_replace(optarg);
                    break;
                case 'a':
                    expression_and = 1;
                    break;
                case 'v':
                    expression_invert = !expression_invert;
                    break;
                case 'l':
                    strict = 0;
                    break;
                case '?':
                    help(stdout);
                    exit(EXIT_SUCCESS);
                    break;
                case 'V':
                    print_version();
                    exit(EXIT_SUCCESS);
                    break;
                default:
                    usage(opt);
                    exit(EXIT_FAILURE);
                    break;
            }
        }


    if(optind < argc)
    {
        while(optind < argc) set_input_file(argv[optind++]);
    } else
    {
        set_input_file("-");
    }
    
    if(config_to_use == NULL) config_to_use = get_default_rc_name();
                
    parserc(config_to_use,field_list);

    check_rc(output_to_use);

    open_input_file();

    if(structure_to_use == NULL)  
    {
        structure_to_use = guess_structure();
        if(structure_to_use == NULL) panic("A structure cannot be guessed, use -s option",NULL,NULL);
    } 

    s = find_structure(structure_to_use);

    if(s == NULL)
    {
        panic("No structure named as",structure_to_use,NULL);
    }

    set_output_file(ofile_to_use);

    execute(s,strict,expression_and,expression_invert);

    close_output_file();

    exit(EXIT_SUCCESS);
}


Generated by  Doxygen 1.6.0   Back to index