34
35:- module(url,
36 [ parse_url/2, 37 parse_url/3, 38 39 is_absolute_url/1, 40 global_url/3, 41 http_location/2, 42 www_form_encode/2, 43 parse_url_search/2, 44
45 url_iri/2, 46
47 file_name_to_url/2, 48
49 set_url_encoding/2 50 ]). 51:- autoload(library(error),[must_be/2,representation_error/1]). 52:- autoload(library(lists),[append/3,select/3,delete/3]). 53:- autoload(library(utf8),[utf8_codes/3]). 54
55
74
75 78
84
85global_url(URL, BaseURL, Global) :-
86 ( is_absolute_url(URL),
87 \+ sub_atom(URL, _, _, _, '%') 88 -> Global = URL
89 ; sub_atom(URL, 0, _, _, '//')
90 -> parse_url(BaseURL, [], Attributes),
91 memberchk(protocol(Proto), Attributes),
92 atomic_list_concat([Proto, :, URL], Global)
93 ; sub_atom(URL, 0, _, _, #)
94 -> ( sub_atom(BaseURL, _, _, 0, #)
95 -> sub_atom(URL, 1, _, 0, NoHash),
96 atom_concat(BaseURL, NoHash, Global)
97 ; atom_concat(BaseURL, URL, Global)
98 )
99 ; parse_url(URL, BaseURL, Attributes)
100 -> phrase(curl(Attributes), Chars),
101 atom_codes(Global, Chars)
102 ; throw(error(syntax_error(illegal_url), URL))
103 ).
104
109
110is_absolute_url(URL) :-
111 sub_atom(URL, 0, _, _, 'http://'),
112 !.
113is_absolute_url(URL) :-
114 sub_atom(URL, 0, _, _, 'https://'),
115 !.
116is_absolute_url(URL) :-
117 sub_atom(URL, 0, _, _, 'ftp://'),
118 !.
119is_absolute_url(URL) :-
120 sub_atom(URL, 0, _, _, 'file://'),
121 !.
122is_absolute_url(URL) :-
123 atom_codes(URL, Codes),
124 phrase(absolute_url, Codes, _),
125 !.
126
127
128 131
144
145http_location(Parts, Location) :- 146 nonvar(Parts),
147 !,
148 phrase(curi(Parts), String),
149 !,
150 atom_codes(Location, String).
151http_location(Parts, Location) :- 152 atom(Location),
153 !,
154 atom_codes(Location, Codes),
155 phrase(http_location(Parts), Codes).
156http_location(Parts, Codes) :- 157 is_list(Codes),
158 phrase(http_location(Parts), Codes).
159
160
161curl(A) -->
162 { memberchk(protocol(Protocol), A)
163 },
164 !,
165 catomic(Protocol),
166 ":",
167 curl(Protocol, A).
168curl(A) -->
169 curl(http, A).
170
171curl(file, A) -->
172 !,
173 ( "//"
174 -> cpath(A)
175 ; cpath(A)
176 ).
177curl(_, A) -->
178 "//",
179 cuser(A),
180 chost(A),
181 cport(A),
182 cpath(A),
183 csearch(A),
184 cfragment(A).
185
186curi(A) -->
187 cpath(A),
188 csearch(A).
189
190cpath(A) -->
191 ( { memberchk(path(Path), A) }
192 -> { atom_codes(Path, Codes) },
193 www_encode(Codes, [0'/, 0'+, 0':, 0',])
194 ; ""
195 ).
196
197cuser(A) -->
198 ( { memberchk(user(User), A) }
199 -> { atom_codes(User, Codes) },
200 www_encode(Codes, [0':]),
201 "@"
202 ; ""
203 ).
204
205chost(A) -->
206 ( { memberchk(host(Host), A) }
207 -> { atom_codes(Host, Codes) },
208 www_encode(Codes, [])
209 ; ""
210 ).
211
212cport(A) -->
213 ( { memberchk(port(Port), A), Port \== 80 }
214 -> { number_codes(Port, Codes) },
215 ":",
216 www_encode(Codes, [])
217 ; ""
218 ).
219
220
221catomic(A, In, Out) :-
222 atom_codes(A, Codes),
223 append(Codes, Out, In).
224
226
227csearch(A)-->
228 ( { memberchk(search(Parameters), A) }
229 -> csearch(Parameters, [0'?])
230 ; []
231 ).
232
233csearch([], _) -->
234 [].
235csearch([Parameter|Parameters], Sep) -->
236 !,
237 codes(Sep),
238 cparam(Parameter),
239 csearch(Parameters, [0'&]).
240
241cparam(Name=Value) -->
242 !,
243 cname(Name),
244 "=",
245 cvalue(Value).
246cparam(NameValue) --> 247 { compound(NameValue),
248 !,
249 NameValue =.. [Name,Value]
250 },
251 cname(Name),
252 "=",
253 cvalue(Value).
254cparam(Name)-->
255 cname(Name).
256
257codes([]) --> [].
258codes([H|T]) --> [H], codes(T).
259
260cname(Atom) -->
261 { atom_codes(Atom, Codes) },
262 www_encode(Codes, []).
263
268
269cvalue(Value) -->
270 { atomic(Value),
271 !,
272 atom_codes(Value, Codes)
273 },
274 www_encode(Codes, []).
275cvalue(Codes) -->
276 { must_be(codes, Codes)
277 },
278 www_encode(Codes, []).
279
280
282
283cfragment(A) -->
284 { memberchk(fragment(Frag), A),
285 !,
286 atom_codes(Frag, Codes)
287 },
288 "#",
289 www_encode(Codes, []).
290cfragment(_) -->
291 "".
292
293
294 297
356
357parse_url(URL, Attributes) :-
358 nonvar(URL),
359 !,
360 atom_codes(URL, Codes),
361 phrase(url(Attributes), Codes).
362parse_url(URL, Attributes) :-
363 phrase(curl(Attributes), Codes),
364 !,
365 atom_codes(URL, Codes).
366
371
372parse_url(URL, BaseURL, Attributes) :-
373 nonvar(URL),
374 !,
375 atom_codes(URL, Codes),
376 ( phrase(absolute_url, Codes, _)
377 -> phrase(url(Attributes), Codes)
378 ; ( atomic(BaseURL)
379 -> parse_url(BaseURL, BaseA0)
380 ; BaseA0 = BaseURL
381 ),
382 select(path(BasePath), BaseA0, BaseA1),
383 delete(BaseA1, search(_), BaseA2),
384 delete(BaseA2, fragment(_), BaseA3),
385 phrase(relative_uri(URIA0), Codes),
386 select(path(LocalPath), URIA0, URIA1),
387 !,
388 globalise_path(LocalPath, BasePath, Path),
389 append(BaseA3, [path(Path)|URIA1], Attributes)
390 ).
391parse_url(URL, BaseURL, Attributes) :-
392 parse_url(BaseURL, BaseAttributes),
393 memberchk(path(BasePath), BaseAttributes),
394 ( memberchk(path(LocalPath), Attributes)
395 -> globalise_path(LocalPath, BasePath, Path)
396 ; Path = BasePath
397 ),
398 append([path(Path)|Attributes], BaseAttributes, GlobalAttributes),
399 phrase(curl(GlobalAttributes), Chars),
400 atom_codes(URL, Chars).
401
402
409
410globalise_path(LocalPath, _, LocalPath) :-
411 sub_atom(LocalPath, 0, _, _, /),
412 !.
413globalise_path(LocalPath, _, LocalPath) :-
414 is_absolute_file_name(LocalPath),
415 !.
416globalise_path(Local, Base, Path) :-
417 base_dir(Base, BaseDir),
418 make_path(BaseDir, Local, Path).
419
420base_dir(BasePath, BaseDir) :-
421 ( atom_concat(BaseDir, /, BasePath)
422 -> true
423 ; file_directory_name(BasePath, BaseDir)
424 ).
425
426make_path(Dir, Local, Path) :-
427 atom_concat('../', L2, Local),
428 file_directory_name(Dir, Parent),
429 Parent \== Dir,
430 !,
431 make_path(Parent, L2, Path).
432make_path(/, Local, Path) :-
433 !,
434 atom_concat(/, Local, Path).
435make_path(Dir, Local, Path) :-
436 atomic_list_concat([Dir, /, Local], Path).
437
438
444
445absolute_url -->
446 lwalpha(_First),
447 schema_chars(Rest),
448 { Rest \== [] },
449 ":",
450 !.
451
452
453 456
457digits(L) -->
458 digits(L, []).
459
460digits([C|T0], T) -->
461 digit(C),
462 !,
463 digits(T0, T).
464digits(T, T) -->
465 [].
466
467
468digit(C, [C|T], T) :- code_type(C, digit).
469
470 473
475
476url([protocol(Schema)|Parts]) -->
477 schema(Schema),
478 ":",
479 !,
480 hier_part(Schema, Parts, P2),
481 query(P2, P3),
482 fragment(P3, []).
483url([protocol(http)|Parts]) --> 484 authority(Parts, [path(Path)]),
485 path_abempty(Path).
486
487relative_uri(Parts) -->
488 relative_part(Parts, P2),
489 query(P2, P3),
490 fragment(P3, []).
491
492relative_part(Parts, Tail) -->
493 "//",
494 !,
495 authority(Parts, [path(Path)|Tail]),
496 path_abempty(Path).
497relative_part([path(Path)|T], T) -->
498 ( path_absolute(Path)
499 ; path_noschema(Path)
500 ; path_empty(Path)
501 ),
502 !.
503
504http_location([path(Path)|P2]) -->
505 path_abempty(Path),
506 query(P2, P3),
507 fragment(P3, []).
508
517
518schema(Schema) -->
519 lwalpha(C0),
520 schema_chars(Codes),
521 { atom_codes(Schema, [C0|Codes]) }.
522
523schema_chars([H|T]) -->
524 schema_char(H),
525 !,
526 schema_chars(T).
527schema_chars([]) -->
528 [].
529
530schema_char(H) -->
531 [C],
532 { C < 128,
533 ( code_type(C, alpha)
534 -> code_type(H, to_lower(C))
535 ; code_type(C, digit)
536 -> H = C
537 ; schema_extra(C)
538 -> H = C
539 )
540 }.
541
(0'+).
543schema_extra(0'-).
544schema_extra(0'.). 545
546
548
549hier_part(file, [path(Path)|Tail], Tail) -->
550 !,
551 "//",
552 ( win_drive_path(Path)
553 ; path_absolute(Path)
554 ; path_rootless(Path)
555 ; path_empty(Path)
556 ),
557 !.
558hier_part(_, Parts, Tail) -->
559 "//",
560 !,
561 authority(Parts, [path(Path)|Tail]),
562 path_abempty(Path).
563hier_part(_, [path(Path)|T], T) -->
564 ( path_absolute(Path)
565 ; path_rootless(Path)
566 ; path_empty(Path)
567 ),
568 !.
569
570authority(Parts, Tail) -->
571 user_info_chars(UserChars),
572 "@",
573 !,
574 { atom_codes(User, UserChars),
575 Parts = [user(User),host(Host)|T0]
576 },
577 host(Host),
578 port(T0,Tail).
579authority([host(Host)|T0], Tail) -->
580 host(Host),
581 port(T0, Tail).
582
583user_info_chars([H|T]) -->
584 user_info_char(H),
585 !,
586 user_info_chars(T).
587user_info_chars([]) -->
588 [].
589
590user_info_char(_) --> "@", !, {fail}.
591user_info_char(C) --> pchar(C).
592
594host(Host) --> ip4_address(Host), !.
595host(Host) --> reg_name(Host).
596
597ip4_address(Atom) -->
598 i256_chars(Chars, [0'.|T0]),
599 i256_chars(T0, [0'.|T1]),
600 i256_chars(T1, [0'.|T2]),
601 i256_chars(T2, []),
602 { atom_codes(Atom, Chars) }.
603
604i256_chars(Chars, T) -->
605 digits(Chars, T),
606 { \+ \+ ( T = [],
607 Chars \== [],
608 number_codes(I, Chars),
609 I < 256
610 )
611 }.
612
613reg_name(Host) -->
614 reg_name_chars(Chars),
615 { atom_codes(Host, Chars) }.
616
617reg_name_chars([H|T]) -->
618 reg_name_char(H),
619 !,
620 reg_name_chars(T).
621reg_name_chars([]) -->
622 [].
623
624reg_name_char(C) -->
625 pchar(C),
626 { C \== 0':,
627 C \== 0'@
628 }.
629
630port([port(Port)|T], T) -->
631 ":",
632 !,
633 digit(D0),
634 digits(Ds),
635 { number_codes(Port, [D0|Ds]) }.
636port(T, T) -->
637 [].
638
639path_abempty(Path) -->
640 segments_chars(Chars, []),
641 { Chars == []
642 -> Path = '/'
643 ; atom_codes(Path, Chars)
644 }.
645
646
647win_drive_path(Path) -->
648 drive_letter(C0),
649 ":",
650 ( "/"
651 -> {Codes = [C0, 0':, 0'/|Chars]}
652 ; {Codes = [C0, 0':|Chars]}
653 ),
654 segment_nz_chars(Chars, T0),
655 segments_chars(T0, []),
656 { atom_codes(Path, Codes) }.
657
658
659path_absolute(Path) -->
660 "/",
661 segment_nz_chars(Chars, T0),
662 segments_chars(T0, []),
663 { atom_codes(Path, [0'/| Chars]) }.
664
665path_noschema(Path) -->
666 segment_nz_nc_chars(Chars, T0),
667 segments_chars(T0, []),
668 { atom_codes(Path, Chars) }.
669
670path_rootless(Path) -->
671 segment_nz_chars(Chars, T0),
672 segments_chars(T0, []),
673 { atom_codes(Path, Chars) }.
674
675path_empty('/') -->
676 "".
677
678segments_chars([0'/|Chars], T) --> 679 "/",
680 !,
681 segment_chars(Chars, T0),
682 segments_chars(T0, T).
683segments_chars(T, T) -->
684 [].
685
686segment_chars([H|T0], T) -->
687 pchar(H),
688 !,
689 segment_chars(T0, T).
690segment_chars(T, T) -->
691 [].
692
693segment_nz_chars([H|T0], T) -->
694 pchar(H),
695 segment_chars(T0, T).
696
697segment_nz_nc_chars([H|T0], T) -->
698 segment_nz_nc_char(H),
699 !,
700 segment_nz_nc_chars(T0, T).
701segment_nz_nc_chars(T, T) -->
702 [].
703
704segment_nz_nc_char(_) --> ":", !, {fail}.
705segment_nz_nc_char(C) --> pchar(C).
706
707
711
712query([search(Params)|T], T) -->
713 "?",
714 !,
715 search(Params).
716query(T,T) -->
717 [].
718
719search([Parameter|Parameters])-->
720 parameter(Parameter),
721 !,
722 ( search_sep
723 -> search(Parameters)
724 ; { Parameters = [] }
725 ).
726search([]) -->
727 [].
728
729parameter(Param)-->
730 !,
731 search_chars(NameS),
732 { atom_codes(Name, NameS)
733 },
734 ( "="
735 -> search_value_chars(ValueS),
736 { atom_codes(Value, ValueS),
737 Param = (Name = Value)
738 }
739 ; { Param = Name
740 }
741 ).
742
743search_chars([C|T]) -->
744 search_char(C),
745 !,
746 search_chars(T).
747search_chars([]) -->
748 [].
749
750search_char(_) --> search_sep, !, { fail }.
751search_char(_) --> "=", !, { fail }.
752search_char(C) --> fragment_char(C).
753
754search_value_chars([C|T]) -->
755 search_value_char(C),
756 !,
757 search_value_chars(T).
758search_value_chars([]) -->
759 [].
760
761search_value_char(_) --> search_sep, !, { fail }.
762search_value_char(C) --> fragment_char(C).
763
771
772search_sep --> "&", !.
773search_sep --> ";".
774
775
779
780fragment([fragment(Fragment)|T], T) -->
781 "#",
782 !,
783 fragment_chars(Codes),
784 { atom_codes(Fragment, Codes) }.
785fragment(T, T) -->
786 [].
787
788fragment_chars([H|T]) -->
789 fragment_char(H),
790 !,
791 fragment_chars(T).
792fragment_chars([]) -->
793 [].
794
795
799
800fragment_char(C) --> pchar(C), !.
801fragment_char(0'/) --> "/", !.
802fragment_char(0'?) --> "?", !.
803fragment_char(0'[) --> "[", !. 804fragment_char(0']) --> "]", !.
805
806
807 810
816
817pchar(0'\s) --> "+", !.
818pchar(C) -->
819 [C],
820 { unreserved(C)
821 ; sub_delim(C)
822 ; C == 0':
823 ; C == 0'@
824 },
825 !.
826pchar(C) -->
827 percent_coded(C).
828
832
833lwalpha(H) -->
834 [C],
835 { C < 128,
836 code_type(C, alpha),
837 code_type(H, to_lower(C))
838 }.
839
840drive_letter(C) -->
841 [C],
842 { C < 128,
843 code_type(C, alpha)
844 }.
845
846
847 850
854
855sub_delim(0'!).
856sub_delim(0'$).
857sub_delim(0'&).
858sub_delim(0'').
859sub_delim(0'().
860sub_delim(0')).
861sub_delim(0'*).
862sub_delim(0'+).
863sub_delim(0',).
864sub_delim(0';).
865sub_delim(0'=).
866
867
872
873term_expansion(unreserved(map), Clauses) :-
874 findall(unreserved(C), unreserved_(C), Clauses).
875
876unreserved_(C) :-
877 between(1, 128, C),
878 code_type(C, alnum).
879unreserved_(0'-).
880unreserved_(0'.).
881unreserved_(0'_).
882unreserved_(0'~). 883
884unreserved(map). 885
886
887 890
895
909
910www_form_encode(Value, Encoded) :-
911 atomic(Value),
912 !,
913 atom_codes(Value, Codes),
914 phrase(www_encode(Codes, []), EncCodes),
915 atom_codes(Encoded, EncCodes).
916www_form_encode(Value, Encoded) :-
917 atom_codes(Encoded, EncCodes),
918 phrase(www_decode(Codes), EncCodes),
919 atom_codes(Value, Codes).
920
922
923www_encode([0'\r, 0'\n|T], Extra) -->
924 !,
925 "%0D%0A",
926 www_encode(T, Extra).
927www_encode([0'\n|T], Extra) -->
928 !,
929 "%0D%0A",
930 www_encode(T, Extra).
931www_encode([H|T], Extra) -->
932 percent_encode(H, Extra),
933 www_encode(T, Extra).
934www_encode([], _) -->
935 "".
936
937percent_encode(C, _Extra) -->
938 { unreserved(C) },
939 !,
940 [C].
941percent_encode(C, Extra) -->
942 { memberchk(C, Extra) },
943 !,
944 [C].
946percent_encode(C, _) -->
947 { C =< 127 },
948 !,
949 percent_byte(C).
950percent_encode(C, _) --> 951 { current_prolog_flag(url_encoding, utf8),
952 !,
953 phrase(utf8_codes([C]), Bytes)
954 },
955 percent_bytes(Bytes).
956percent_encode(C, _) -->
957 { C =< 255 },
958 !,
959 percent_byte(C).
960percent_encode(_C, _) -->
961 { representation_error(url_character)
962 }.
963
964percent_bytes([]) -->
965 "".
966percent_bytes([H|T]) -->
967 percent_byte(H),
968 percent_bytes(T).
969
970percent_byte(C) -->
971 [0'%, D1, D2],
972 { nonvar(C)
973 -> Dv1 is (C>>4 /\ 0xf),
974 Dv2 is (C /\ 0xf),
975 code_type(D1, xdigit(Dv1)),
976 code_type(D2, xdigit(Dv2))
977 ; code_type(D1, xdigit(Dv1)),
978 code_type(D2, xdigit(Dv2)),
979 C is ((Dv1)<<4) + Dv2
980 }.
981
982percent_coded(C) -->
983 percent_byte(C0),
984 !,
985 ( { C0 == 13 986 },
987 "%0",
988 ( "A" ; "a" )
989 -> { C = 10
990 }
991 ; { C0 >= 0xc0 }, 992 utf8_cont(Cs),
993 { phrase(utf8_codes([C]), [C0|Cs]) }
994 -> []
995 ; { C = C0
996 }
997 ).
998
1000
1001www_decode([0' |T]) -->
1002 "+",
1003 !,
1004 www_decode(T).
1005www_decode([C|T]) -->
1006 percent_coded(C),
1007 !,
1008 www_decode(T).
1009www_decode([C|T]) -->
1010 [C],
1011 !,
1012 www_decode(T).
1013www_decode([]) -->
1014 [].
1015
1016utf8_cont([H|T]) -->
1017 percent_byte(H),
1018 { between(0x80, 0xbf, H) },
1019 !,
1020 utf8_cont(T).
1021utf8_cont([]) -->
1022 [].
1023
1024
1032
1033:- create_prolog_flag(url_encoding, utf8, [type(atom)]). 1034
1035set_url_encoding(Old, New) :-
1036 current_prolog_flag(url_encoding, Old),
1037 ( Old == New
1038 -> true
1039 ; must_be(oneof([utf8, iso_latin_1]), New),
1040 set_prolog_flag(url_encoding, New)
1041 ).
1042
1043
1044 1047
1054
1055url_iri(Encoded, Decoded) :-
1056 nonvar(Encoded),
1057 !,
1058 ( sub_atom(Encoded, _, _, _, '%')
1059 -> atom_codes(Encoded, Codes),
1060 unescape_precent(Codes, UTF8),
1061 phrase(utf8_codes(Unicodes), UTF8),
1062 atom_codes(Decoded, Unicodes)
1063 ; Decoded = Encoded
1064 ).
1065url_iri(URL, IRI) :-
1066 atom_codes(IRI, IRICodes),
1067 atom_codes('/:?#&=', ExtraEscapes),
1068 phrase(www_encode(IRICodes, ExtraEscapes), UrlCodes),
1069 atom_codes(URL, UrlCodes).
1070
1071
1072unescape_precent([], []).
1073unescape_precent([0'%,C1,C2|T0], [H|T]) :- 1074 !,
1075 code_type(C1, xdigit(D1)),
1076 code_type(C2, xdigit(D2)),
1077 H is D1*16 + D2,
1078 unescape_precent(T0, T).
1079unescape_precent([H|T0], [H|T]) :-
1080 unescape_precent(T0, T).
1081
1082
1083 1086
1093
1094parse_url_search(Spec, Fields) :-
1095 atomic(Spec),
1096 !,
1097 atom_codes(Spec, Codes),
1098 phrase(search(Fields), Codes).
1099parse_url_search(Codes, Fields) :-
1100 is_list(Codes),
1101 !,
1102 phrase(search(Fields), Codes).
1103parse_url_search(Codes, Fields) :-
1104 must_be(list, Fields),
1105 phrase(csearch(Fields, []), Codes).
1106
1107
1108 1111
1119
1120file_name_to_url(File, FileURL) :-
1121 nonvar(File),
1122 !,
1123 absolute_file_name(File, Path),
1124 atom_concat('file://', Path, FileURL),
1125 !.
1126file_name_to_url(File, FileURL) :-
1127 atom_concat('file://', File, FileURL),
1128 !