A parser for lexical grammars used by jison
npm install @gerhobbelt/lex-parser




![npm]()
A parser for lexical grammars used by jison and jison-lex.
npm install @gerhobbelt/lex-parser
or the entire bundle via
npm install jison-gho
Then the lex-parser library is located in the subdirectory packages/lex-parser/ of the jison-gho monorepo, i.e. .../node_modules/jison-gho/packages/lex-parser/.
Alternatively, the entire lex-parser API is also available via the jison API itself as can be seen from this internal jison code snippet:
``
import Lexer from '../packages/jison-lex';
import ebnfParser from '../packages/ebnf-parser';
import lexParser from '../packages/lex-parser';
import grammarPrinter from './util/grammar-printer.js';
import helpers from '../packages/helpers-lib';
var rmCommonWS = helpers.rmCommonWS;
var camelCase = helpers.camelCase;
var code_exec = helpers.exec;
import XRegExp from '@gerhobbelt/xregexp';
import recast from '@gerhobbelt/recast';
import astUtils from '@gerhobbelt/ast-util';
import json5 from '@gerhobbelt/json5';
// Also export other APIs: the JISON module should act as a 'facade' for the others,
// so applications using the JISON compiler itself can rely on it providing everything
// in a guaranteed compatible version as it allows userland code to use the precise
// same APIs as JISON will be using itself:
Jison.Lexer = Lexer;
Jison.ebnfParser = ebnfParser;
Jison.lexParser = lexParser;
Jison.codeExec = code_exec;
Jison.XRegExp = XRegExp;
Jison.recast = recast;
Jison.astUtils = astUtils;
Jison.JSON5 = json5;
Jison.prettyPrint = grammarPrinter;
Jison.rmCommonWS = rmCommonWS;
Jison.mkStdOptions = mkStdOptions;
Jison.camelCase = camelCase;
Jison.autodetectAndConvertToJSONformat = autodetectAndConvertToJSONformat;
...
Jison.Parser = Parser;
export default Jison;
`
hence you can get at it this way, for example:
`lex-parser
import jisonAPI from 'jison-gho';
// get a reference to the full API:`
const lexParserAPI = jisonAPI.lexParser;
Follow the install & build directions of the monorepo.
You can also only build this particular subpackage by cd-ing into this directory
and then invoking the local make:
cd packages/lex-parser
make
This will generate lex-parser.js and the rollup/babel-postprocessed ES6 and ES5 dist/
compatible libraries in the local directory.
>
> ### Note about ES6/rollup usage vs. ES5
>
> All dist/ library files are 'self-contained': they include all 'local imports' recast
> from within this jison monorepo in order to deliver a choice of source files
> for your perusal where you only need to worry about importing external dependencies
> (such as ).dist/
>
> As such, these files should be easier to minify and/or use in olderrollup
> (ES5) environments.
>
> #### rollup
>
> Iff you use or similar tools in an ES6/ES2015/ES2017 setting, then thepackage.json::module
> has
> already been set up for you to use the original sources instead!
>
var lexParser = require("@gerhobbelt/lex-parser");
// parse a lexical grammar and return JSON
lexParser.parse("%% ... ");
The parser can parse its own lexical grammar, shown below:
`
%code imports %{
import helpers from 'jison-helpers-lib';
%}
ASCII_LETTER [a-zA-z]
// \p{Alphabetic} already includes [a-zA-z], hence we don't need to merge
// with {UNICODE_LETTER} (though jison has code to optimize if you did
// include the [a-zA-Z] anyway):
UNICODE_LETTER [\p{Alphabetic}]
ALPHA [{UNICODE_LETTER}_]
DIGIT [\p{Number}]
WHITESPACE [\s\r\n\p{Separator}]
ALNUM [{ALPHA}{DIGIT}]
NAME {ALPHA}?
ID [{ALPHA}]{ALNUM}*
DECIMAL_NUMBER [1-9][0-9]*
HEX_NUMBER "0"[xX][0-9a-fA-F]+
BR \r\n|\n|\r
// WhiteSpace MUST NOT match CR/LF and the regex \s DOES, so we cannot use
// that one directly. Instead we define the {WS} macro here:
WS [^\S\r\n]
// Quoted string content: support escaped quotes inside strings:
QUOTED_STRING_CONTENT (?:\\\'|\\[^\']|[^\\\'\r\n])*
DOUBLEQUOTED_STRING_CONTENT (?:\\\"|\\[^\"]|[^\\\"\r\n])*
// backquoted ES6/ES2017 string templates MAY span multiple lines:
ES2017_STRING_CONTENT (?:\\\|\\[^\]|[^\\\])*
// Accept any non-regex-special character as a direct literal without
// the need to put quotes around it:
ANY_LITERAL_CHAR [^\s\r\n<>\[\](){}.*+?:!=|%\/\\^$,\'\";]
%s rules macro named_chunk
%x code start_condition options conditions action path set
// Off Topic
// ---------
//
// Do NOT specify the xregexp option as we want the XRegExp \p{...} regex macros converted to
// native regexes and used as such:
//
// %options xregexp
%options easy_keyword_rules
%options ranges
%options xregexp
%%
"%{" yy.dept = 0;
yy.include_command_allowed = false;
this.pushState('action');
this.unput(yytext);
yytext = '';
return 'ACTION_START';
yy.include_command_allowed = true;
return 'ACTION';
if (yy.include_command_allowed) {
// This is an include instruction in place of an action:
//
// - one %include per action chunk
// - one %include replaces an entire action chunk
this.pushState('path');
return 'INCLUDE';
} else {
// TODO
yyerror('oops!');
return 'INCLUDE_PLACEMENT_ERROR';
}
%}
return 'ACTION_BODY_C_COMMENT';
return 'ACTION_BODY_CPP_COMMENT';
// make sure to terminate on linefeed before the next rule alternative,
// which is announced by |:
this.popState();
this.unput(yytext);
yytext = '';
return 'ACTION_END';
} else {
return 'ACTION_BODY';
}
// make sure to terminate on linefeed before the rule section ends,
// which is announced by %%:
this.popState();
this.unput(yytext);
yytext = '';
return 'ACTION_END';
} else {
return 'ACTION_BODY';
}
// regexp with braces or quotes (and no spaces, so we don't mistake
// a division operator / for a regex delimiter here in most circumstances):{}][^\s/]?)*"/"
yy.include_command_allowed = false;
return 'ACTION_BODY';
// hack to cope with slashes which MAY be divide operators OR are regex starters:
// we simply gobble the entire line until the end or until we hit a closing brace,
// as we MUST keep track of the curly brace pairs inside an action body.
yy.include_command_allowed = false;
return 'ACTION_BODY';
yy.include_command_allowed = false;
return 'ACTION_BODY';
return 'ACTION_BODY'; yy.include_command_allowed = false; |%\{\}{BR}{WS}]+ yy.include_command_allowed = false;
return 'ACTION_BODY';
return 'ACTION_BODY';
yy.include_command_allowed = false;
return 'ACTION_BODY';
yy.include_command_allowed = false;
if (yy.depth <= 0) {
yyerror(rmCommonWS
too many closing curly braces in lexer rule action block.
Note: the action code chunk may be too complex for jison to parse
easily; we suggest you wrap the action code chunk in '%{...%\}'
to help jison grok more or less complex action code chunks.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
return 'BRACKETS_SURPLUS';
} else {
yy.depth--;
}
return 'ACTION_BODY';
%}
// make sure to terminate on linefeed before the next rule alternative,
// which is announced by |.
// Note that lexer options & commands should be at the start-of-line, i.e.
// without leading whitespace. The only lexer command which we do accept
// here after the last indent is %include, which is considered (part
// of) the rule's action code block.
return 'ACTION_BODY_WHITESPACE'; // keep empty lines as-is inside action code blocks.
yy.include_command_allowed = true;
return 'ACTION_BODY_WHITESPACE'; // keep empty lines as-is inside action code blocks.
} else {
// end of action code chunk
this.popState();
this.unput(yytext);
yytext = '';
return 'ACTION_END';
}
yy.include_command_allowed = false;
if (yy.depth !== 0) {
yyerror(rmCommonWS
missing ${yy.depth} closing curly braces in lexer rule action block.
Note: the action code chunk may be too complex for jison to parse
easily; we suggest you wrap the action code chunk in '%{...%\}'
to help jison grok more or less complex action code chunks.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
yytext = '';
return 'BRACKETS_MISSING';
}
this.popState();
yytext = '';
return 'ACTION_END';
%}
// Comments should be gobbled and discarded anywhere
// except the code/action blocks:
/ skip single-line comment /
/ skip multi-line comment /
/ skip single-line comment /
/ skip multi-line comment /
// ACTION code chunks follow rules and are generally indented, but
// never start with characters special to the lex language itself:
// - % can start options, commands, etc., e.g. %include or %options
// - | starts a rule alternative, never a chunk of action code.
// -
yy.include_command_allowed = true;
this.pushState('action');
return 'ACTION_START';
this.pushState('code');
return '%%';
// Accept any non-regex-special character as a direct literal without
// the need to put quotes around it:
%{
// accept any non-regex, non-lex, non-string-delim,
// non-escape-starter, non-space character as-is
return 'CHARACTER_LIT';
%}
yytext = unescQuote(this.matches[1], /\\"/g); return 'OPTION_STRING_VALUE'; // value is always a string type
yytext = unescQuote(this.matches[1], /\\'/g); return 'OPTION_STRING_VALUE'; // value is always a string type{ES2017_STRING_CONTENT}\
yytext = unescQuote(this.matches[1], /\\/g); return 'OPTION_STRING_VALUE'; // value is always a string type
// Accept any non-regex-special character as a direct literal without
// the need to put quotes around it:
// accept any non-regex, non-lex, non-string-delim,
// non-escape-starter, non-space character as-is
return 'CHARACTER_LIT';
%}
{BR}+ / empty /
\s+ / empty /
\"{DOUBLEQUOTED_STRING_CONTENT}\" %{
yytext = unescQuote(this.matches[1], /\\"/g);
return 'STRING_LIT';
%}
\'{QUOTED_STRING_CONTENT}\' %{
yytext = unescQuote(this.matches[1], /\\'/g);
return 'STRING_LIT';
%}
"[" this.pushState('set'); return 'REGEX_SET_START';
"|" return '|';
"(?:" return 'SPECIAL_GROUP';
"(?=" return 'SPECIAL_GROUP';
"(?!" return 'SPECIAL_GROUP';
"(" return '(';
")" return ')';
"+" return '+';
"" return '';
"?" return '?';
"^" return '^';
"," return ',';
"<
"<" this.pushState('conditions'); return '<';
"/!" return '/!'; // treated as (?!atom)(?=atom)
"/" return '/'; // treated as
"\\"([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|"c"[A-Z]|"x"[0-9A-F]{2}|"u"[a-fA-F0-9]{4})
return 'ESCAPE_CHAR';
"\\". yytext = yytext.replace(/^\\/g, ''); return 'ESCAPE_CHAR';
"$" return '$';
"." return '.';
"%options" this.pushState('options'); return 'OPTIONS';
"%s" this.pushState('start_condition'); return 'START_INC';
"%x" this.pushState('start_condition'); return 'START_EXC';
"%code" this.pushState('named_chunk'); return 'INIT_CODE';
"%import" this.pushState('named_chunk'); return 'IMPORT';
"%include" yy.depth = 0;
yy.include_command_allowed = true;
this.pushState('action');
this.unput(yytext);
yytext = '';
return 'ACTION_START';
"%include" this.pushState('path');
return 'INCLUDE';
%{
/ ignore unrecognized decl /
this.warn(rmCommonWS
LEX: ignoring unsupported lexer option ${dquote(yytext)}
while lexing in ${dquote(this.topState())} state.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
yytext = [
this.matches[1], // {NAME}
this.matches[2].trim() // optional value/parameters
];
return 'UNKNOWN_DECL';
%}
"%%" this.pushState('rules');
return '%%';
"{"\d+(","\s*\d+|",")?"}" return 'RANGE_REGEX';
"{"{ID}"}" return 'NAME_BRACE';
"{" return '{';
"}" return '}';
return 'REGEX_SET_END';
// in the trailing CODE block, only accept these %include macros when
// they appear at the start of a line and make sure the rest of lexer
// regexes account for this one so it'll match that way only:[^\r\n]*(\r|\n)+ return 'CODE';[^\r\n]+ return 'CODE'; // the bit of CODE just before EOF...
yytext = unescQuote(this.matches[1]);
this.popState();
return 'PATH';
yytext = unescQuote(this.matches[1]);
this.popState();
return 'PATH';
return 'PATH';
// detect and report unterminated string constants ASAP
// for 'action', 'options', but also for other lexer conditions:
//
// these error catching rules fix https://github.com/GerHobbelt/jison/issues/13
unterminated string constant in lexer rule action block.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
return 'error';
unterminated string constant in lexer rule action block.
Erroneous area:
+ this.prettyPrintRange(this, yylloc)); yyerror(rmCommonWS
return 'error';
unterminated string constant in lexer rule action block.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
return 'error';
unterminated string constant in %options entry.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
return 'error';
unterminated string constant in %options entry.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
return 'error'; yyerror(rmCommonWS
unterminated string constant in %options entry.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
return 'error';
<*>\" var rules = (this.topState() === 'macro' ? 'macro\'s' : this.topState());
yyerror(rmCommonWS
unterminated string constant encountered while lexing
${rules}.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
return 'error';
<*>\' var rules = (this.topState() === 'macro' ? 'macro\'s' : this.topState());
yyerror(rmCommonWS
unterminated string constant encountered while lexing
${rules}.
Erroneous area:
+ this.prettyPrintRange(this, yylloc)); var rules = (this.topState() === 'macro' ? 'macro\'s' : this.topState());
return 'error';
<*>\
yyerror(rmCommonWS
unterminated string constant encountered while lexing
${rules}.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
return 'error';
/ b0rk on bad characters /
var rules = (this.topState() === 'macro' ? 'macro\'s' : this.topState());
yyerror(rmCommonWS
unsupported lexer input encountered while lexing
${rules} (i.e. jison lex regexes).
NOTE: When you want this input to be interpreted as a LITERAL part
of a lex rule regex, you MUST enclose it in double or
single quotes.
If not, then know that this input is not accepted as a valid
regex expression here in jison-lex ${rules}.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
%}
<*>. %{
yyerror(rmCommonWS
unsupported lexer input: ${dquote(yytext)}
while lexing in ${dquote(this.topState())} state.
Erroneous area:
+ this.prettyPrintRange(this, yylloc));
%}
<*><
%%
var rmCommonWS = helpers.rmCommonWS;
var dquote = helpers.dquote;
function indent(s, i) {
var a = s.split('\n');
var pf = (new Array(i + 1)).join(' ');
return pf + a.join('\n' + pf);
}
// unescape a string value which is wrapped in quotes/doublequotes
function unescQuote(str) {
str = '' + str;
var a = str.split('\\\\');
a = a.map(function (s) {
return s.replace(/\\'/g, "'").replace(/\\"/g, '"');
});
str = a.join('\\\\');
return str;
}
```
MIT
- jison / jison-gho @ NPM
- jison-lex @ NPM
- lex-parser @ NPM
- ebnf-parser @ NPM
- jison2json @ NPM
- json2jison @ NPM
- jison-helpers-lib @ NPM
- ### secondary source repositories
+ jison-lex
+ lex-parser
+ ebnf-parser
+ jison2json
+ json2jison
+ jison-helpers-lib