%option reentrant bison-bridge bison-locations %option noyywrap %option never-interactive %x STRING %x IND_STRING %{ #include "aterm.hh" #include "nixexpr.hh" #include "nixexpr-ast.hh" #define BISON_HEADER_HACK #include "parser-tab.hh" using namespace nix; namespace nix { static void initLoc(YYLTYPE * loc) { loc->first_line = 1; loc->first_column = 1; } static void adjustLoc(YYLTYPE * loc, const char * s, size_t len) { while (len--) { switch (*s++) { case '\r': if (*s == '\n') /* cr/lf */ s++; /* fall through */ case '\n': ++loc->first_line; loc->first_column = 1; break; default: ++loc->first_column; } } } static Expr unescapeStr(const char * s) { string t; char c; while ((c = *s++)) { if (c == '\\') { assert(*s); c = *s++; if (c == 'n') t += '\n'; else if (c == 'r') t += '\r'; else if (c == 't') t += '\t'; else t += c; } else if (c == '\r') { /* Normalise CR and CR/LF into LF. */ t += '\n'; if (*s == '\n') s++; /* cr/lf */ } else t += c; } return makeStr(toATerm(t), ATempty); } } #define YY_USER_INIT initLoc(yylloc) #define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng); %} ID [a-zA-Z\_][a-zA-Z0-9\_\']* INT [0-9]+ PATH [a-zA-Z0-9\.\_\-\+]*(\/[a-zA-Z0-9\.\_\-\+]+)+ URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+ %% if { return IF; } then { return THEN; } else { return ELSE; } assert { return ASSERT; } with { return WITH; } let { return LET; } in { return IN; } rec { return REC; } inherit { return INHERIT; } \=\= { return EQ; } \!\= { return NEQ; } \&\& { return AND; } \|\| { return OR; } \-\> { return IMPL; } \/\/ { return UPDATE; } \+\+ { return CONCAT; } {ID} { yylval->t = toATerm(yytext); return ID; /* !!! alloc */ } {INT} { int n = atoi(yytext); /* !!! overflow */ yylval->t = ATmake("", n); return INT; } \" { BEGIN(STRING); return '"'; } ([^\$\"\\]|\$[^\{\"]|\\.)+ { /* !!! Not quite right: we want a follow restriction on "$", it shouldn't be followed by a "{". Right now "$\"" will be consumed as part of a string, rather than a "$" followed by the string terminator. Disallow "$\"" for now. */ yylval->t = unescapeStr(yytext); /* !!! alloc */ return STR; } \$\{ { BEGIN(INITIAL); return DOLLAR_CURLY; } \" { BEGIN(INITIAL); return '"'; } . return yytext[0]; /* just in case: shouldn't be reached */ \'\'(\ *\n)? { BEGIN(IND_STRING); return IND_STRING_OPEN; } ([^\$\']|\$[^\{\']|\'[^\'])+ { yylval->t = makeIndStr(toATerm(yytext)); return IND_STR; } \$\{ { BEGIN(INITIAL); return DOLLAR_CURLY; } \'\' { BEGIN(INITIAL); return IND_STRING_CLOSE; } . return yytext[0]; /* just in case: shouldn't be reached */ {PATH} { yylval->t = toATerm(yytext); return PATH; /* !!! alloc */ } {URI} { yylval->t = toATerm(yytext); return URI; /* !!! alloc */ } [ \t\r\n]+ /* eat up whitespace */ \#[^\r\n]* /* single-line comments */ \/\*([^*]|\*[^\/])*\*\/ /* long comments */ . return yytext[0]; %% namespace nix { /* Horrible, disgusting hack: allow the parser to set the scanner start condition back to STRING. Necessary in interpolations like "foo${expr}bar"; after the close brace we have to go back to the STRING state. */ void backToString(yyscan_t scanner) { struct yyguts_t * yyg = (struct yyguts_t *) scanner; BEGIN(STRING); } void backToIndString(yyscan_t scanner) { struct yyguts_t * yyg = (struct yyguts_t *) scanner; BEGIN(IND_STRING); } }