1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2000-2015, University of Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(url, 36 [ parse_url/2, % +URL, -Parts | -URL +Parts 37 parse_url/3, % +URL|URI, +BaseURL, -Parts 38 % -URL, +BaseURL, +Parts 39 is_absolute_url/1, % +URL 40 global_url/3, % +Local, +Base, -Global 41 http_location/2, % ?Parts, ?Location 42 www_form_encode/2, % Value <-> Encoded 43 parse_url_search/2, % Form-data <-> Form fields 44 45 url_iri/2, % ?URL, ?IRI 46 47 file_name_to_url/2, % ?FileName, ?URL 48 49 set_url_encoding/2 % ?Old, +New 50 ]). 51:- autoload(library(error),[must_be/2,representation_error/1]). 52:- autoload(library(lists),[append/3,select/3,delete/3]). 53:- autoload(library(utf8),[utf8_codes/3]).
75 /******************************* 76 * GLOBALISE * 77 *******************************/
85global_url(URL, BaseURL, Global) :-
86 ( is_absolute_url(URL),
87 \+ sub_atom(URL, _, _, _, '%') % may have escape, use general
88 -> Global = URL
89 ; sub_atom(URL, 0, _, _, '//')
90 -> parse_url(BaseURL, [], Attributes),
91 memberchk(protocol(Proto), Attributes),
92 atomic_list_concat([Proto, :, URL], Global)
93 ; sub_atom(URL, 0, _, _, #)
94 -> ( sub_atom(BaseURL, _, _, 0, #)
95 -> sub_atom(URL, 1, _, 0, NoHash),
96 atom_concat(BaseURL, NoHash, Global)
97 ; atom_concat(BaseURL, URL, Global)
98 )
99 ; parse_url(URL, BaseURL, Attributes)
100 -> phrase(curl(Attributes), Chars),
101 atom_codes(Global, Chars)
102 ; throw(error(syntax_error(illegal_url), URL))
103 ).
110is_absolute_url(URL) :- 111 sub_atom(URL, 0, _, _, 'http://'), 112 !. 113is_absolute_url(URL) :- 114 sub_atom(URL, 0, _, _, 'https://'), 115 !. 116is_absolute_url(URL) :- 117 sub_atom(URL, 0, _, _, 'ftp://'), 118 !. 119is_absolute_url(URL) :- 120 sub_atom(URL, 0, _, _, 'file://'), 121 !. 122is_absolute_url(URL) :- 123 atom_codes(URL, Codes), 124 phrase(absolute_url, Codes, _), 125 !. 126 127 128 /******************************* 129 * CREATE URL/URI * 130 *******************************/
<Action> <Location> HTTP/<version>
145http_location(Parts, Location) :- % Parts --> Location 146 nonvar(Parts), 147 !, 148 phrase(curi(Parts), String), 149 !, 150 atom_codes(Location, String). 151http_location(Parts, Location) :- % Location --> Parts 152 atom(Location), 153 !, 154 atom_codes(Location, Codes), 155 phrase(http_location(Parts), Codes). 156http_location(Parts, Codes) :- % LocationCodes --> Parts 157 is_list(Codes), 158 phrase(http_location(Parts), Codes). 159 160 161curl(A) --> 162 { memberchk(protocol(Protocol), A) 163 }, 164 !, 165 catomic(Protocol), 166 ":", 167 curl(Protocol, A). 168curl(A) --> 169 curl(http, A). 170 171curl(file, A) --> 172 !, 173 ( "//" 174 -> cpath(A) 175 ; cpath(A) 176 ). 177curl(_, A) --> 178 "//", 179 cuser(A), 180 chost(A), 181 cport(A), 182 cpath(A), 183 csearch(A), 184 cfragment(A). 185 186curi(A) --> 187 cpath(A), 188 csearch(A). 189 190cpath(A) --> 191 ( { memberchk(path(Path), A) } 192 -> { atom_codes(Path, Codes) }, 193 www_encode(Codes, [0'/, 0'+, 0':, 0',]) 194 ; "" 195 ). 196 197cuser(A) --> 198 ( { memberchk(user(User), A) } 199 -> { atom_codes(User, Codes) }, 200 www_encode(Codes, [0':]), 201 "@" 202 ; "" 203 ). 204 205chost(A) --> 206 ( { memberchk(host(Host), A) } 207 -> { atom_codes(Host, Codes) }, 208 www_encode(Codes, []) 209 ; "" 210 ). 211 212cport(A) --> 213 ( { memberchk(port(Port), A), Port \== 80 } 214 -> { number_codes(Port, Codes) }, 215 ":", 216 www_encode(Codes, []) 217 ; "" 218 ). 219 220 221catomic(A, In, Out) :- 222 atom_codes(A, Codes), 223 append(Codes, Out, In).
227csearch(A)--> 228 ( { memberchk(search(Parameters), A) } 229 -> csearch(Parameters, [0'?]) 230 ; [] 231 ). 232 233csearch([], _) --> 234 []. 235csearch([Parameter|Parameters], Sep) --> 236 !, 237 codes(Sep), 238 cparam(Parameter), 239 csearch(Parameters, [0'&]). 240 241cparam(Name=Value) --> 242 !, 243 cname(Name), 244 "=", 245 cvalue(Value). 246cparam(NameValue) --> % allow to feed Name(Value) 247 { compound(NameValue), 248 !, 249 NameValue =.. [Name,Value] 250 }, 251 cname(Name), 252 "=", 253 cvalue(Value). 254cparam(Name)--> 255 cname(Name). 256 257codes([]) --> []. 258codes([H|T]) --> [H], codes(T). 259 260cname(Atom) --> 261 { atom_codes(Atom, Codes) }, 262 www_encode(Codes, []).
269cvalue(Value) --> 270 { atomic(Value), 271 !, 272 atom_codes(Value, Codes) 273 }, 274 www_encode(Codes, []). 275cvalue(Codes) --> 276 { must_be(codes, Codes) 277 }, 278 www_encode(Codes, []).
283cfragment(A) --> 284 { memberchk(fragment(Frag), A), 285 !, 286 atom_codes(Frag, Codes) 287 }, 288 "#", 289 www_encode(Codes, []). 290cfragment(_) --> 291 "". 292 293 294 /******************************* 295 * PARSING * 296 *******************************/
url:
, an
identifier separated from the remainder of the URL using :.
parse_url/2 assumes the http
protocol if no protocol is
specified and the URL can be parsed as a valid HTTP url. In
addition to the RFC-1738 specified protocols, the file
protocol is supported as well.ftp
, http
and file
protocols. If no path appears, the
library generates the path /
.?
, normally used to transfer data from HTML forms that
use the HTTP GET method. In the URL it consists of a
www-form-encoded list of Name=Value pairs. This is mapped to
a list of Prolog Name=Value terms with decoded names and
values.#
character.The example below illustrates all of this for an HTTP URL.
?- parse_url('http://www.xyz.org/hello?msg=Hello+World%21#x', P). P = [ protocol(http), host('www.xyz.org'), fragment(x), search([ msg = 'Hello World!' ]), path('/hello') ]
By instantiating the parts-list this predicate can be used to create a URL.
357parse_url(URL, Attributes) :- 358 nonvar(URL), 359 !, 360 atom_codes(URL, Codes), 361 phrase(url(Attributes), Codes). 362parse_url(URL, Attributes) :- 363 phrase(curl(Attributes), Codes), 364 !, 365 atom_codes(URL, Codes).
372parse_url(URL, BaseURL, Attributes) :- 373 nonvar(URL), 374 !, 375 atom_codes(URL, Codes), 376 ( phrase(absolute_url, Codes, _) 377 -> phrase(url(Attributes), Codes) 378 ; ( atomic(BaseURL) 379 -> parse_url(BaseURL, BaseA0) 380 ; BaseA0 = BaseURL 381 ), 382 select(path(BasePath), BaseA0, BaseA1), 383 delete(BaseA1, search(_), BaseA2), 384 delete(BaseA2, fragment(_), BaseA3), 385 phrase(relative_uri(URIA0), Codes), 386 select(path(LocalPath), URIA0, URIA1), 387 !, 388 globalise_path(LocalPath, BasePath, Path), 389 append(BaseA3, [path(Path)|URIA1], Attributes) 390 ). 391parse_url(URL, BaseURL, Attributes) :- 392 parse_url(BaseURL, BaseAttributes), 393 memberchk(path(BasePath), BaseAttributes), 394 ( memberchk(path(LocalPath), Attributes) 395 -> globalise_path(LocalPath, BasePath, Path) 396 ; Path = BasePath 397 ), 398 append([path(Path)|Attributes], BaseAttributes, GlobalAttributes), 399 phrase(curl(GlobalAttributes), Chars), 400 atom_codes(URL, Chars).
410globalise_path(LocalPath, _, LocalPath) :- 411 sub_atom(LocalPath, 0, _, _, /), 412 !. 413globalise_path(LocalPath, _, LocalPath) :- 414 is_absolute_file_name(LocalPath), 415 !. 416globalise_path(Local, Base, Path) :- 417 base_dir(Base, BaseDir), 418 make_path(BaseDir, Local, Path). 419 420base_dir(BasePath, BaseDir) :- 421 ( atom_concat(BaseDir, /, BasePath) 422 -> true 423 ; file_directory_name(BasePath, BaseDir) 424 ). 425 426make_path(Dir, Local, Path) :- 427 atom_concat('../', L2, Local), 428 file_directory_name(Dir, Parent), 429 Parent \== Dir, 430 !, 431 make_path(Parent, L2, Path). 432make_path(/, Local, Path) :- 433 !, 434 atom_concat(/, Local, Path). 435make_path(Dir, Local, Path) :- 436 atomic_list_concat([Dir, /, Local], Path).
445absolute_url --> 446 lwalpha(_First), 447 schema_chars(Rest), 448 { Rest \== [] }, 449 ":", 450 !. 451 452 453 /******************************* 454 * SEQUENCES * 455 *******************************/ 456 457digits(L) --> 458 digits(L, []). 459 460digits([C|T0], T) --> 461 digit(C), 462 !, 463 digits(T0, T). 464digits(T, T) --> 465 []. 466 467 468digit(C, [C|T], T) :- code_type(C, digit). 469 470 /******************************* 471 * RFC-3986 * 472 *******************************/
476url([protocol(Schema)|Parts]) --> 477 schema(Schema), 478 ":", 479 !, 480 hier_part(Schema, Parts, P2), 481 query(P2, P3), 482 fragment(P3, []). 483url([protocol(http)|Parts]) --> % implicit HTTP 484 authority(Parts, [path(Path)]), 485 path_abempty(Path). 486 487relative_uri(Parts) --> 488 relative_part(Parts, P2), 489 query(P2, P3), 490 fragment(P3, []). 491 492relative_part(Parts, Tail) --> 493 "//", 494 !, 495 authority(Parts, [path(Path)|Tail]), 496 path_abempty(Path). 497relative_part([path(Path)|T], T) --> 498 ( path_absolute(Path) 499 ; path_noschema(Path) 500 ; path_empty(Path) 501 ), 502 !. 503 504http_location([path(Path)|P2]) --> 505 path_abempty(Path), 506 query(P2, P3), 507 fragment(P3, []).
Schema ::= ALPHA *(ALPHA|DIGIT|"+"|"-"|".")
518schema(Schema) --> 519 lwalpha(C0), 520 schema_chars(Codes), 521 { atom_codes(Schema, [C0|Codes]) }. 522 523schema_chars([H|T]) --> 524 schema_char(H), 525 !, 526 schema_chars(T). 527schema_chars([]) --> 528 []. 529 530schema_char(H) --> 531 [C], 532 { C < 128, 533 ( code_type(C, alpha) 534 -> code_type(H, to_lower(C)) 535 ; code_type(C, digit) 536 -> H = C 537 ; schema_extra(C) 538 -> H = C 539 ) 540 }. 541 542schema_extra(0'+). 543schema_extra(0'-). 544schema_extra(0'.). % 0'
549hier_part(file, [path(Path)|Tail], Tail) --> 550 !, 551 "//", 552 ( win_drive_path(Path) 553 ; path_absolute(Path) 554 ; path_rootless(Path) 555 ; path_empty(Path) 556 ), 557 !. 558hier_part(_, Parts, Tail) --> 559 "//", 560 !, 561 authority(Parts, [path(Path)|Tail]), 562 path_abempty(Path). 563hier_part(_, [path(Path)|T], T) --> 564 ( path_absolute(Path) 565 ; path_rootless(Path) 566 ; path_empty(Path) 567 ), 568 !. 569 Parts, Tail) (--> 571 user_info_chars(UserChars), 572 "@", 573 !, 574 { atom_codes(User, UserChars), 575 Parts = [user(User),host(Host)|T0] 576 }, 577 host(Host), 578 port(T0,Tail). 579authority([host(Host)|T0], Tail) --> 580 host(Host), 581 port(T0, Tail). 582 583user_info_chars([H|T]) --> 584 user_info_char(H), 585 !, 586 user_info_chars(T). 587user_info_chars([]) --> 588 []. 589 590user_info_char(_) --> "@", !, {fail}. 591user_info_char(C) --> pchar(C). 592 593%host(Host) --> ip_literal(Host), !. % TBD: IP6 addresses 594host(Host) --> ip4_address(Host), !. 595host(Host) --> reg_name(Host). 596 597ip4_address(Atom) --> 598 i256_chars(Chars, [0'.|T0]), 599 i256_chars(T0, [0'.|T1]), 600 i256_chars(T1, [0'.|T2]), 601 i256_chars(T2, []), 602 { atom_codes(Atom, Chars) }. 603 604i256_chars(Chars, T) --> 605 digits(Chars, T), 606 { \+ \+ ( T = [], 607 Chars \== [], 608 number_codes(I, Chars), 609 I < 256 610 ) 611 }. 612 613reg_name(Host) --> 614 reg_name_chars(Chars), 615 { atom_codes(Host, Chars) }. 616 617reg_name_chars([H|T]) --> 618 reg_name_char(H), 619 !, 620 reg_name_chars(T). 621reg_name_chars([]) --> 622 []. 623 624reg_name_char(C) --> 625 pchar(C), 626 { C \== 0':, 627 C \== 0'@ 628 }. 629 630port([port(Port)|T], T) --> 631 ":", 632 !, 633 digit(D0), 634 digits(Ds), 635 { number_codes(Port, [D0|Ds]) }. 636port(T, T) --> 637 []. 638 639path_abempty(Path) --> 640 segments_chars(Chars, []), 641 { Chars == [] 642 -> Path = '/' 643 ; atom_codes(Path, Chars) 644 }. 645 646 647win_drive_path(Path) --> 648 drive_letter(C0), 649 ":", 650 ( "/" 651 -> {Codes = [C0, 0':, 0'/|Chars]} 652 ; {Codes = [C0, 0':|Chars]} 653 ), 654 segment_nz_chars(Chars, T0), 655 segments_chars(T0, []), 656 { atom_codes(Path, Codes) }. 657 658 659path_absolute(Path) --> 660 "/", 661 segment_nz_chars(Chars, T0), 662 segments_chars(T0, []), 663 { atom_codes(Path, [0'/| Chars]) }. 664 665path_noschema(Path) --> 666 segment_nz_nc_chars(Chars, T0), 667 segments_chars(T0, []), 668 { atom_codes(Path, Chars) }. 669 670path_rootless(Path) --> 671 segment_nz_chars(Chars, T0), 672 segments_chars(T0, []), 673 { atom_codes(Path, Chars) }. 674 675path_empty('/') --> 676 "". 677 678segments_chars([0'/|Chars], T) --> % 0' 679 "/", 680 !, 681 segment_chars(Chars, T0), 682 segments_chars(T0, T). 683segments_chars(T, T) --> 684 []. 685 686segment_chars([H|T0], T) --> 687 pchar(H), 688 !, 689 segment_chars(T0, T). 690segment_chars(T, T) --> 691 []. 692 693segment_nz_chars([H|T0], T) --> 694 pchar(H), 695 segment_chars(T0, T). 696 697segment_nz_nc_chars([H|T0], T) --> 698 segment_nz_nc_char(H), 699 !, 700 segment_nz_nc_chars(T0, T). 701segment_nz_nc_chars(T, T) --> 702 []. 703 704segment_nz_nc_char(_) --> ":", !, {fail}. 705segment_nz_nc_char(C) --> pchar(C).
712query([search(Params)|T], T) --> 713 "?", 714 !, 715 search(Params). 716query(T,T) --> 717 []. 718 719search([Parameter|Parameters])--> 720 parameter(Parameter), 721 !, 722 ( search_sep 723 -> search(Parameters) 724 ; { Parameters = [] } 725 ). 726search([]) --> 727 []. 728 729parameter(Param)--> 730 !, 731 search_chars(NameS), 732 { atom_codes(Name, NameS) 733 }, 734 ( "=" 735 -> search_value_chars(ValueS), 736 { atom_codes(Value, ValueS), 737 Param = (Name = Value) 738 } 739 ; { Param = Name 740 } 741 ). 742 743search_chars([C|T]) --> 744 search_char(C), 745 !, 746 search_chars(T). 747search_chars([]) --> 748 []. 749 750search_char(_) --> search_sep, !, { fail }. 751search_char(_) --> "=", !, { fail }. 752search_char(C) --> fragment_char(C). 753 754search_value_chars([C|T]) --> 755 search_value_char(C), 756 !, 757 search_value_chars(T). 758search_value_chars([]) --> 759 []. 760 761search_value_char(_) --> search_sep, !, { fail }. 762search_value_char(C) --> fragment_char(C).
772search_sep --> "&", !. 773search_sep --> ";".
780fragment([fragment(Fragment)|T], T) --> 781 "#", 782 !, 783 fragment_chars(Codes), 784 { atom_codes(Fragment, Codes) }. 785fragment(T, T) --> 786 []. 787 788fragment_chars([H|T]) --> 789 fragment_char(H), 790 !, 791 fragment_chars(T). 792fragment_chars([]) --> 793 [].
800fragment_char(C) --> pchar(C), !. 801fragment_char(0'/) --> "/", !. 802fragment_char(0'?) --> "?", !. 803fragment_char(0'[) --> "[", !. % Not according RDF3986! 804fragment_char(0']) --> "]", !. 805 806 807 /******************************* 808 * CHARACTER CLASSES * 809 *******************************/
Performs UTF-8 decoding of percent encoded strings.
817pchar(0'\s) --> "+", !. 818pchar(C) --> 819 [C], 820 { unreserved(C) 821 ; sub_delim(C) 822 ; C == 0': 823 ; C == 0'@ 824 }, 825 !. 826pchar(C) --> 827 percent_coded(C).
833lwalpha(H) --> 834 [C], 835 { C < 128, 836 code_type(C, alpha), 837 code_type(H, to_lower(C)) 838 }. 839 840drive_letter(C) --> 841 [C], 842 { C < 128, 843 code_type(C, alpha) 844 }. 845 846 847 /******************************* 848 * RESERVED CHARACTERS * 849 *******************************/
855sub_delim(0'!). 856sub_delim(0'$). 857sub_delim(0'&). 858sub_delim(0''). 859sub_delim(0'(). 860sub_delim(0')). 861sub_delim(0'*). 862sub_delim(0'+). 863sub_delim(0',). 864sub_delim(0';). 865sub_delim(0'=).
873term_expansion(unreserved(map), Clauses) :- 874 findall(unreserved(C), unreserved_(C), Clauses). 875 876unreserved_(C) :- 877 between(1, 128, C), 878 code_type(C, alnum). 879unreserved_(0'-). 880unreserved_(0'.). 881unreserved_(0'_). 882unreserved_(0'~). % 0' 883 884unreserved(map). % Expanded 885 886 887 /******************************* 888 * FORMS * 889 *******************************/ 890 891/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 892Encoding/decoding of form-fields using the popular www-form-encoded 893encoding used with the HTTP GET. 894- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
alnum
(see code_type/2)), and one of "-._~" using percent
encoding. Newline is mapped to %OD%OA
. When decoding,
newlines appear as a single newline (10) character.
Note that a space is encoded as %20
instead of +
.
Decoding decodes both to a space.
910www_form_encode(Value, Encoded) :- 911 atomic(Value), 912 !, 913 atom_codes(Value, Codes), 914 phrase(www_encode(Codes, []), EncCodes), 915 atom_codes(Encoded, EncCodes). 916www_form_encode(Value, Encoded) :- 917 atom_codes(Encoded, EncCodes), 918 phrase(www_decode(Codes), EncCodes), 919 atom_codes(Value, Codes).
923www_encode([0'\r, 0'\n|T], Extra) --> 924 !, 925 "%0D%0A", 926 www_encode(T, Extra). 927www_encode([0'\n|T], Extra) --> 928 !, 929 "%0D%0A", 930 www_encode(T, Extra). 931www_encode([H|T], Extra) --> 932 percent_encode(H, Extra), 933 www_encode(T, Extra). 934www_encode([], _) --> 935 "". 936 937percent_encode(C, _Extra) --> 938 { unreserved(C) }, 939 !, 940 [C]. 941percent_encode(C, Extra) --> 942 { memberchk(C, Extra) }, 943 !, 944 [C]. 945%percent_encode(0' , _) --> !, "+". % Deprecated: use %20 946percent_encode(C, _) --> 947 { C =< 127 }, 948 !, 949 percent_byte(C). 950percent_encode(C, _) --> % Unicode characters 951 { current_prolog_flag(url_encoding, utf8), 952 !, 953 phrase(utf8_codes([C]), Bytes) 954 }, 955 percent_bytes(Bytes). 956percent_encode(C, _) --> 957 { C =< 255 }, 958 !, 959 percent_byte(C). 960percent_encode(_C, _) --> 961 { representation_error(url_character) 962 }. 963 964percent_bytes([]) --> 965 "". 966percent_bytes([H|T]) --> 967 percent_byte(H), 968 percent_bytes(T). 969 970percent_byte(C) --> 971 [0'%, D1, D2], 972 { nonvar(C) 973 -> Dv1 is (C>>4 /\ 0xf), 974 Dv2 is (C /\ 0xf), 975 code_type(D1, xdigit(Dv1)), 976 code_type(D2, xdigit(Dv2)) 977 ; code_type(D1, xdigit(Dv1)), 978 code_type(D2, xdigit(Dv2)), 979 C is ((Dv1)<<4) + Dv2 980 }. 981 982percent_coded(C) --> 983 percent_byte(C0), 984 !, 985 ( { C0 == 13 % %0D%0A --> \n 986 }, 987 "%0", 988 ( "A" ; "a" ) 989 -> { C = 10 990 } 991 ; { C0 >= 0xc0 }, % UTF-8 lead-in 992 utf8_cont(Cs), 993 { phrase(utf8_codes([C]), [C0|Cs]) } 994 -> [] 995 ; { C = C0 996 } 997 ).
1001www_decode([0' |T]) --> 1002 "+", 1003 !, 1004 www_decode(T). 1005www_decode([C|T]) --> 1006 percent_coded(C), 1007 !, 1008 www_decode(T). 1009www_decode([C|T]) --> 1010 [C], 1011 !, 1012 www_decode(T). 1013www_decode([]) --> 1014 []. 1015 1016utf8_cont([H|T]) --> 1017 percent_byte(H), 1018 { between(0x80, 0xbf, H) }, 1019 !, 1020 utf8_cont(T). 1021utf8_cont([]) --> 1022 [].
utf8
.
The only other defined value is iso_latin_1
.
1033:- create_prolog_flag(url_encoding, utf8, [type(atom)]). 1034 1035set_url_encoding(Old, New) :- 1036 current_prolog_flag(url_encoding, Old), 1037 ( Old == New 1038 -> true 1039 ; must_be(oneof([utf8, iso_latin_1]), New), 1040 set_prolog_flag(url_encoding, New) 1041 ). 1042 1043 1044 /******************************* 1045 * IRI PROCESSING * 1046 *******************************/
1055url_iri(Encoded, Decoded) :- 1056 nonvar(Encoded), 1057 !, 1058 ( sub_atom(Encoded, _, _, _, '%') 1059 -> atom_codes(Encoded, Codes), 1060 unescape_precent(Codes, UTF8), 1061 phrase(utf8_codes(Unicodes), UTF8), 1062 atom_codes(Decoded, Unicodes) 1063 ; Decoded = Encoded 1064 ). 1065url_iri(URL, IRI) :- 1066 atom_codes(IRI, IRICodes), 1067 atom_codes('/:?#&=', ExtraEscapes), 1068 phrase(www_encode(IRICodes, ExtraEscapes), UrlCodes), 1069 atom_codes(URL, UrlCodes). 1070 1071 1072unescape_precent([], []). 1073unescape_precent([0'%,C1,C2|T0], [H|T]) :- %' 1074 !, 1075 code_type(C1, xdigit(D1)), 1076 code_type(C2, xdigit(D2)), 1077 H is D1*16 + D2, 1078 unescape_precent(T0, T). 1079unescape_precent([H|T0], [H|T]) :- 1080 unescape_precent(T0, T). 1081 1082 1083 /******************************* 1084 * FORM DATA * 1085 *******************************/
application/x-www-form-urlencoded
as used in HTTP GET
requests.1094parse_url_search(Spec, Fields) :- 1095 atomic(Spec), 1096 !, 1097 atom_codes(Spec, Codes), 1098 phrase(search(Fields), Codes). 1099parse_url_search(Codes, Fields) :- 1100 is_list(Codes), 1101 !, 1102 phrase(search(Fields), Codes). 1103parse_url_search(Codes, Fields) :- 1104 must_be(list, Fields), 1105 phrase(csearch(Fields, []), Codes). 1106 1107 1108 /******************************* 1109 * FILE URLs * 1110 *******************************/
1120file_name_to_url(File, FileURL) :- 1121 nonvar(File), 1122 !, 1123 absolute_file_name(File, Path), 1124 atom_concat('file://', Path, FileURL), 1125 !. 1126file_name_to_url(File, FileURL) :- 1127 atom_concat('file://', File, FileURL), 1128 !
Analysing and constructing URL
This library deals with the analysis and construction of a URL, Universal Resource Locator. URL is the basis for communicating locations of resources (data) on the web. A URL consists of a protocol identifier (e.g. HTTP, FTP, and a protocol-specific syntax further defining the location. URLs are standardized in RFC-1738.
The implementation in this library covers only a small portion of the defined protocols. Though the initial implementation followed RFC-1738 strictly, the current is more relaxed to deal with frequent violations of the standard encountered in practical use.
clib
package. */