View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2020, University of Amsterdam
    7                              CWI, Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf,
   37          [ load_rdf/2,                 % +File, -Triples
   38            load_rdf/3,                 % +File, -Triples, :Options
   39            xml_to_rdf/3,               % +XML, -Triples, +Options
   40            process_rdf/3               % +File, :OnTriples, :Options
   41          ]).   42
   43:- meta_predicate
   44    load_rdf(+, -, :),
   45    process_rdf(+, :, :).   46
   47:- autoload(library(lists),[select/3,append/3]).   48:- autoload(library(option),[meta_options/3,option/3]).   49:- autoload(library(rdf_parser),
   50	    [ make_rdf_state/3, xml_to_plrdf/3, rdf_name_space/1,
   51              rdf_modify_state/3, element_to_plrdf/3
   52	    ]).   53:- autoload(library(rdf_triple),
   54	    [rdf_start_file/2,rdf_end_file/1,rdf_triples/2]).   55:- autoload(library(sgml),
   56	    [ load_structure/3, new_sgml_parser/2, set_sgml_parser/2,
   57	      open_dtd/3, xml_quote_attribute/2, sgml_parse/2,
   58	      free_sgml_parser/1, get_sgml_parser/2
   59	    ]).

RDF/XML parser

This module parses RDF/XML documents. It defines two processing modes: load_rdf/2 and load_rdf/3 which process a document into a list of rdf(S,P,O) terms and process_rdf/3 which processes the input description-by-description and uses a callback to handle the triples.

To be done
- The predicate load_rdf/2,3 is too easily confused with the semweb library predicate rdf_load/2,3 which uses process_rdf/3 to store the triples in the triple DB.
- It would be better to use library(intercept) do distinguish to have one predicate that can operate in both collecting and streaming modes. */
 load_rdf(+File, -Triples) is det
 load_rdf(+File, -Triples, :Options) is det
Parse an XML file holding an RDF term into a list of RDF triples. see rdf_triple.pl for a definition of the output format. Options:
base_uri(+URI)
URI to use as base
expand_foreach(+Bool)
Apply each(Container, Pred, Object) on the members of Container
namespaces(-Namespaces:list(NS=URL))
Return list of namespaces declared using xmlns:NS=URL in the document. This can be used to update the namespace list with rdf_register_ns/2.
See also
- Use process_rdf/3 for processing large documents in call-back style.
   97load_rdf(File, Triples) :-
   98    load_rdf(File, Triples, []).
   99
  100load_rdf(File, Triples, M:Options0) :-
  101    entity_options(Options0, EntOptions, Options1),
  102    meta_options(load_meta_option, M:Options1, Options),
  103    init_ns_collect(Options, NSList),
  104    load_structure(File,
  105                   [ RDFElement
  106                   ],
  107                   [ dialect(xmlns),
  108                     space(sgml),
  109                     call(xmlns, rdf:on_xmlns)
  110                   | EntOptions
  111                   ]),
  112    rdf_start_file(Options, Cleanup),
  113    call_cleanup(xml_to_rdf(RDFElement, Triples0, Options),
  114                 rdf_end_file(Cleanup)),
  115    exit_ns_collect(NSList),
  116    post_process(Options, Triples0, Triples).
  117
  118entity_options([], [], []).
  119entity_options([H|T0], Entities, Rest) :-
  120    (   H = entity(_,_)
  121    ->  Entities = [H|ET],
  122        entity_options(T0, ET, Rest)
  123    ;   Rest = [H|RT],
  124        entity_options(T0, Entities, RT)
  125    ).
  126
  127load_meta_option(convert_typed_literal).
 xml_to_rdf(+XML, -Triples, +Options)
  131xml_to_rdf(XML, Triples, Options) :-
  132    is_list(Options),
  133    !,
  134    make_rdf_state(Options, State, _),
  135    xml_to_plrdf(XML, RDF, State),
  136    rdf_triples(RDF, Triples).
  137xml_to_rdf(XML, BaseURI, Triples) :-
  138    atom(BaseURI),
  139    !,
  140    xml_to_rdf(XML, Triples, [base_uri(BaseURI)]).
  141
  142
  143                 /*******************************
  144                 *       POST-PROCESSING        *
  145                 *******************************/
  146
  147post_process([], Triples, Triples).
  148post_process([expand_foreach(true)|T], Triples0, Triples) :-
  149    !,
  150    expand_each(Triples0, Triples1),
  151    post_process(T, Triples1, Triples).
  152post_process([_|T], Triples0, Triples) :-
  153    !,
  154    post_process(T, Triples0, Triples).
  155
  156
  157                 /*******************************
  158                 *            EXPAND            *
  159                 *******************************/
  160
  161expand_each(Triples0, Triples) :-
  162    select(rdf(each(Container), Pred, Object),
  163           Triples0, Triples1),
  164    !,
  165    each_triples(Triples1, Container, Pred, Object, Triples2),
  166    expand_each(Triples2, Triples).
  167expand_each(Triples, Triples).
  168
  169each_triples([], _, _, _, []).
  170each_triples([H0|T0], Container, P, O,
  171             [H0, rdf(S,P,O)|T]) :-
  172    H0 = rdf(Container, rdf:A, S),
  173    member_attribute(A),
  174    !,
  175    each_triples(T0, Container, P, O, T).
  176each_triples([H|T0], Container, P, O, [H|T]) :-
  177    each_triples(T0, Container, P, O, T).
  178
  179member_attribute(A) :-
  180    sub_atom(A, 0, _, _, '_').      % must check number?
  181
  182
  183                 /*******************************
  184                 *           BIG FILES          *
  185                 *******************************/
 process_rdf(+Input, :OnObject, :Options)
Process RDF from Input. Input is either an atom or a term of the format stream(Handle). For each encountered description, call OnObject(+Triples) to handle the triples resulting from the description. Defined Options are:
base_uri(+URI)
Determines the reference URI.
db(DB)
When loading from a stream, the source is taken from this option or -if non-existent- from base_uri.
lang(LanguageID)
Set initial language (as xml:lang)
convert_typed_literal(:Convertor)
Call Convertor(+Type, +Content, -RDFObject) to create a triple rdf(S, P, RDFObject) instead of rdf(S, P, literal(type(Type, Content)).
namespaces(-Namespaces:list(NS=URL))
Return list of namespaces declared using xmlns:NS=URL in the document. This can be used to update the namespace list with rdf_register_ns/2.
entity(Name, Value)
Overrule entity values found in the file
embedded(Boolean)
If true, do not give warnings if rdf:RDF is embedded in other XML data.
  221process_rdf(File, OnObject, M:Options0) :-
  222    is_list(Options0),
  223    !,
  224    entity_options(Options0, EntOptions, Options1),
  225    meta_options(load_meta_option, M:Options1, Options2),
  226    option(base_uri(BaseURI), Options2, ''),
  227    rdf_start_file(Options2, Cleanup),
  228    strip_module(OnObject, Module, Pred),
  229    b_setval(rdf_object_handler, Module:Pred),
  230    nb_setval(rdf_options, Options2),
  231    nb_setval(rdf_state, -),
  232    init_ns_collect(Options2, NSList),
  233    (   File = stream(In)
  234    ->  Source = BaseURI
  235    ;   is_stream(File)
  236    ->  In = File,
  237        option(graph(Source), Options2, BaseURI)
  238    ;   open(File, read, In, [type(binary)]),
  239        Close = In,
  240        Source = File
  241    ),
  242    new_sgml_parser(Parser, [dtd(DTD)]),
  243    def_entities(EntOptions, DTD),
  244    (   Source \== []
  245    ->  set_sgml_parser(Parser, file(Source))
  246    ;   true
  247    ),
  248    set_sgml_parser(Parser, dialect(xmlns)),
  249    set_sgml_parser(Parser, space(sgml)),
  250    do_process_rdf(Parser, In, NSList, Close, Cleanup, Options2).
  251process_rdf(File, BaseURI, OnObject) :-
  252    process_rdf(File, OnObject, [base_uri(BaseURI)]).
  253
  254def_entities([], _).
  255def_entities([entity(Name, Value)|T], DTD) :-
  256    !,
  257    def_entity(DTD, Name, Value),
  258    def_entities(T, DTD).
  259def_entities([_|T0], DTD) :-
  260    def_entities(T0, DTD).
  261
  262def_entity(DTD, Name, Value) :-
  263    open_dtd(DTD, [], Stream),
  264    xml_quote_attribute(Value, QValue),
  265    format(Stream, '<!ENTITY ~w "~w">~n', [Name, QValue]),
  266    close(Stream).
  267
  268
  269do_process_rdf(Parser, In, NSList, Close, Cleanup, Options) :-
  270    call_cleanup((   sgml_parse(Parser,
  271                                [ source(In),
  272                                  call(begin, on_begin),
  273                                  call(xmlns, on_xmlns)
  274                                | Options
  275                                ]),
  276                     exit_ns_collect(NSList)
  277                 ),
  278                 cleanup_process(Close, Cleanup, Parser)).
  279
  280cleanup_process(In, Cleanup, Parser) :-
  281    (   var(In)
  282    ->  true
  283    ;   close(In)
  284    ),
  285    free_sgml_parser(Parser),
  286    nb_delete(rdf_options),
  287    nb_delete(rdf_object_handler),
  288    nb_delete(rdf_state),
  289    nb_delete(rdf_nslist),
  290    rdf_end_file(Cleanup).
  291
  292on_begin(NS:'RDF', Attr, _) :-
  293    rdf_name_space(NS),
  294    !,
  295    nb_getval(rdf_options, Options),
  296    make_rdf_state(Options, State0, _),
  297    rdf_modify_state(Attr, State0, State),
  298    nb_setval(rdf_state, State).
  299on_begin(Tag, Attr, Parser) :-
  300    nb_getval(rdf_state, State),
  301    (   State == (-)
  302    ->  nb_getval(rdf_options, RdfOptions),
  303        (   memberchk(embedded(true), RdfOptions)
  304        ->  true
  305        ;   print_message(warning, rdf(unexpected(Tag, Parser)))
  306        )
  307    ;   get_sgml_parser(Parser, line(Start)),
  308        get_sgml_parser(Parser, file(File)),
  309        sgml_parse(Parser,
  310                   [ document(Content),
  311                     parse(content)
  312                   ]),
  313        b_getval(rdf_object_handler, OnTriples),
  314        element_to_plrdf(element(Tag, Attr, Content), Objects, State),
  315        rdf_triples(Objects, Triples),
  316        call(OnTriples, Triples, File:Start)
  317    ).
 on_xmlns(+NS, +URL, +Parser)
Build up the list of encountered xmlns:NS=URL declarations. We use destructive assignment here as an alternative to assert/retract, ensuring thread-safety and better performance.
  325on_xmlns(NS, URL, _Parser) :-
  326    (   nb_getval(rdf_nslist, List),
  327        List = list(L0)
  328    ->  nb_linkarg(1, List, [NS=URL|L0])
  329    ;   true
  330    ).
  331
  332init_ns_collect(Options, NSList) :-
  333    (   option(namespaces(NSList), Options, -),
  334        NSList \== (-)
  335    ->  nb_setval(rdf_nslist, list([]))
  336    ;   nb_setval(rdf_nslist, -),
  337        NSList = (-)
  338    ).
  339
  340exit_ns_collect(NSList) :-
  341    (   NSList == (-)
  342    ->  true
  343    ;   nb_getval(rdf_nslist, list(NSList))
  344    ).
  345
  346
  347
  348                 /*******************************
  349                 *            MESSAGES          *
  350                 *******************************/
  351
  352:- multifile
  353    prolog:message/3.  354
  355%       Catch messages.  sgml/4 is generated by the SGML2PL binding.
  356
  357prolog:message(rdf(unparsed(Data))) -->
  358    { phrase(unparse_xml(Data), XML)
  359    },
  360    [ 'RDF: Failed to interpret "~s"'-[XML] ].
  361prolog:message(rdf(shared_blank_nodes(N))) -->
  362    [ 'RDF: Shared ~D blank nodes'-[N] ].
  363prolog:message(rdf(not_a_name(Name))) -->
  364    [ 'RDF: argument to rdf:ID is not an XML name: ~p'-[Name] ].
  365prolog:message(rdf(redefined_id(Id))) -->
  366    [ 'RDF: rdf:ID ~p: multiple definitions'-[Id] ].
  367prolog:message(rdf(unexpected(Tag, Parser))) -->
  368    { get_sgml_parser(Parser, file(File)),
  369      get_sgml_parser(Parser, line(Line))
  370    },
  371    [ 'RDF: ~w:~d: Unexpected element ~w'-[File, Line, Tag] ].
  372
  373
  374                 /*******************************
  375                 *          XML-TO-TEXT         *
  376                 *******************************/
  377
  378unparse_xml([]) -->
  379    !,
  380    [].
  381unparse_xml([H|T]) -->
  382    !,
  383    unparse_xml(H),
  384    unparse_xml(T).
  385unparse_xml(Atom) -->
  386    { atom(Atom)
  387    },
  388    !,
  389    atom(Atom).
  390unparse_xml(element(Name, Attr, Content)) -->
  391    "<",
  392    identifier(Name),
  393    attributes(Attr),
  394    (   { Content == []
  395        }
  396    ->  "/>"
  397    ;   ">",
  398        unparse_xml(Content)
  399    ).
  400
  401attributes([]) -->
  402    [].
  403attributes([H|T]) -->
  404    attribute(H),
  405    attributes(T).
  406
  407attribute(Name=Value) -->
  408    " ",
  409    identifier(Name),
  410    "=",
  411    value(Value).
  412
  413identifier(NS:Local) -->
  414    !,
  415    "{", atom(NS), "}",
  416    atom(Local).
  417identifier(Local) -->
  418    atom(Local).
  419
  420atom(Atom, Text, Rest) :-
  421    atom_codes(Atom, Chars),
  422    append(Chars, Rest, Text).
  423
  424value(Value) -->
  425    { atom_codes(Value, Chars)
  426    },
  427    "\"",
  428    quoted(Chars),
  429    "\"".
  430
  431quoted([]) -->
  432    [].
  433quoted([H|T]) -->
  434    quote(H),
  435    !,
  436    quoted(T).
  437
  438quote(0'<) --> "&lt;".
  439quote(0'>) --> "&gt;".
  440quote(0'") --> "&quot;".
  441quote(0'&) --> "&amp;".
  442quote(X)   --> [X].
  443
  444
  445                 /*******************************
  446                 *             XREF             *
  447                 *******************************/
  448
  449:- multifile prolog:meta_goal/2.  450prolog:meta_goal(process_rdf(_,G,_), [G+2])