View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2013-2016, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(javascript_grammar,
   36          [ js_token//1
   37          ]).   38:- use_module(library(dcg/basics)).   39:- use_module(library(pure_input)).     % syntax_error//1
   40:- set_prolog_flag(double_quotes, codes).

JavaScript grammar

This file provides a tokenizer for JavaScript (EcmaScript). This code supports the quasi quotation syntax javascript, defined in library(http/js_write).

See also
- http://tomcopeland.blogs.com/EcmaScript.html is used for the high-level syntax.
- http://www.ecma-international.org/ecma-262/5.1/ is used for implementing the tokenization code. */
 js_token(-TokenType)//
Matches and classifies the next JavaScript token.
   58js_token(Type) -->
   59    token(Type).
 token(-Type) is semidet
Get the next token from the input. Fails when encountering the end of the input.
Errors
- syntax_error(Culprit)
   68token(comment)        --> comment, !.
   69token(string)         --> string_literal, !.
   70token(number)         --> numeric_literal, !.
   71token(identifier(Id)) --> identifier_name(Id), !.
   72token(regex)          --> regex_literal, !.
   73token(ws)             --> blank, !, blanks.
   74token(punct(Char))    --> [Code], { char_code(Char, Code) }.
 comment// is semidet
   78comment -->
   79    "/*",
   80    !,
   81    (   string(_), "*/"
   82    ->  []
   83    ;   syntax_error(eof_in_comment)
   84    ).
   85comment -->
   86    "//",
   87    !,
   88    (   string(_), eol
   89    ->  []
   90    ;   string(_), eof
   91    ->  []
   92    ).
 string_literal// is semidet
Matches a string literal
   99string_literal -->
  100    "\"",
  101    !,
  102    (   q_codes, "\""
  103    ->  []
  104    ;   syntax_error(eof_in_string)
  105    ).
  106string_literal -->
  107    "\'",
  108    !,
  109    (   q_codes, "\'"
  110    ->  []
  111    ;   syntax_error(eof_in_string)
  112    ).
 numeric_literal//
Matches JavaScript notion of a numeric constant
  119numeric_literal -->
  120    (   decimal_literal
  121    ->  []
  122    ;   hex_integer
  123    ),
  124    (   (   decimal_digit
  125        ;   js_id_start(_)
  126        )
  127    ->  syntax_error(js(illegal_number))
  128    ;   []
  129    ).
  130
  131decimal_literal -->
  132    decimal_integer, ".", opt_decimal_digits, opt_exponent.
  133decimal_literal -->
  134    ".", decimal_digits, opt_exponent.
  135decimal_literal -->
  136    decimal_integer,
  137    opt_exponent.
  138
  139decimal_integer -->
  140    "0",
  141    !.
  142decimal_integer -->
  143    non_zero_digit, opt_decimal_digits.
  144
  145decimal_digits -->
  146    decimal_digit,
  147    !,
  148    opt_decimal_digits.
  149
  150opt_decimal_digits -->
  151    decimal_digit,
  152    !,
  153    opt_decimal_digits.
  154opt_decimal_digits -->
  155    [].
  156
  157decimal_digit --> [C], { code_type(C, digit) }.
  158non_zero_digit --> [C], { code_type(C, digit), C \== 0'0 }.
  159
  160opt_exponent -->
  161    exponent,
  162    !.
  163opt_exponent -->
  164    [].
  165
  166exponent -->
  167    exponent_indictor,
  168    signed_integer.
  169
  170exponent_indictor --> "e", !.
  171exponent_indictor --> "E".
  172
  173signed_integer --> "+", !, decimal_digits.
  174signed_integer --> "-", !, decimal_digits.
  175signed_integer -->         decimal_digits.
  176
  177hex_integer --> "0", x, hex_digit, hex_digits.
  178
  179x --> "x".
  180x --> "X".
 regex_literal// is semidet
Matches regex expression /.../flags
  187regex_literal -->
  188    "/", regex_body, "/", !, regex_flags.
  189
  190regex_body -->
  191    regex_first_char,
  192    regex_chars.
  193
  194regex_chars --> regex_char, !, regex_chars.
  195regex_chars --> [].
  196
  197regex_first_char -->
  198    regex_non_terminator(C),
  199    !,
  200    { \+ memberchk(C, "*\\/[") }.
  201regex_first_char -->
  202    regex_backslash_sequence.
  203regex_first_char -->
  204    regex_class.
  205
  206regex_char -->
  207    regex_non_terminator(C),
  208    !,
  209    { \+ memberchk(C, "\\/[") }.
  210regex_char -->
  211    regex_backslash_sequence.
  212regex_char -->
  213    regex_class.
  214
  215regex_backslash_sequence -->
  216    "\\", !, regex_non_terminator(_).
  217
  218regex_class -->
  219    "[", regex_class_chars, "]".
  220
  221regex_class_chars --> regex_class_char, !, regex_class_chars.
  222regex_class_chars --> "".
  223
  224regex_class_char -->
  225    regex_non_terminator(C),
  226    !,
  227    { \+ memberchk(C, "]\\") }.
  228
  229regex_non_terminator(_) -->
  230    eol, !, {fail}.
  231regex_non_terminator(C) -->
  232    source_char(C).
  233
  234regex_flags -->
  235    js_id_conts(_).
  236
  237source_char(C) -->
  238    [C].
 q_codes//
Shortest list of quoted characters.
  245q_codes --> [] ; q_code, q_codes.
  246
  247q_code --> "\\", !, char_esc.
  248q_code --> eol, !, {fail}.
  249q_code --> [_].
  250
  251char_esc --> single_escape_char, !.
  252char_esc --> "x", !, hex_digit, hex_digit.
  253char_esc --> "u", !, hex_digit, hex_digit, hex_digit, hex_digit.
  254char_esc --> eol, !.
  255
  256hex_digits --> hex_digit, !, hex_digits.
  257hex_digits --> [].
  258
  259hex_digit --> [C], {code_type(C, xdigit(_))}.
  260
  261single_escape_char --> "'".
  262single_escape_char --> "\"".
  263single_escape_char --> "\\".
  264single_escape_char --> "b".
  265single_escape_char --> "f".
  266single_escape_char --> "n".
  267single_escape_char --> "r".
  268single_escape_char --> "t".
  269single_escape_char --> "v".
  270
  271eol --> "\r\n", !.
  272eol --> "\n", !.
  273eol --> "\r".
  274
  275eof -->
  276    \+ [_].
  277
  278
  279%       js_identifier classification. Now  based  on   Prolog.  This  is
  280%       pretty close, but I'm afraid there are corner cases.
  281
  282identifier_name(Id) -->
  283    js_id_start(C0),
  284    !,
  285    js_id_conts(Rest),
  286    { atom_codes(Id, [C0|Rest]),
  287      (   keyword(Id)
  288      ->  fail, syntax_error(reserved(Id))
  289      ;   true
  290      )
  291    }.
  292
  293
  294js_id_start(C) --> [C], {js_id_start(C)}.
  295
  296js_id_start(C) :- code_type(C, prolog_var_start), !.
  297js_id_start(C) :- code_type(C, prolog_atom_start), !.
  298js_id_start(0'$).
  299
  300js_id_conts([H|T]) --> js_id_cont(H), !, js_id_conts(T).
  301js_id_conts([]) --> [].
  302
  303js_id_cont(C) --> [C], {js_id_cont(C)}.
  304
  305js_id_cont(C) :- code_type(C, prolog_identifier_continue), !.
  306js_id_cont(0'$) :- !.
  307
  308
  309keyword(break).                         % standard keywords
  310keyword(do).
  311keyword(instanceof).
  312keyword(typeof).
  313keyword(case).
  314keyword(else).
  315keyword(new).
  316keyword(var).
  317keyword(catch).
  318keyword(finally).
  319keyword(return).
  320keyword(void).
  321keyword(continue).
  322keyword(for).
  323keyword(switch).
  324keyword(while).
  325keyword(debugger).
  326keyword(function).
  327keyword(this).
  328keyword(with).
  329keyword(default).
  330keyword(if).
  331keyword(throw).
  332keyword(delete).
  333keyword(in).
  334keyword(try).
  335
  336keyword(class).                         % reserved keywords
  337keyword(enum).
  338keyword(extends).
  339keyword(super).
  340keyword(const).
  341keyword(export).
  342keyword(import).
  343
  344keyword(implements).                    % future reserved keywords
  345keyword(let).
  346keyword(private).
  347keyword(public).
  348keyword(yield).
  349keyword(interface).
  350keyword(package).
  351keyword(protected).
  352keyword(static)