1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2009-2018, VU University Amsterdam 7 CWI, Amsterdam 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(csv, 37 [ csv//1, % +Rows 38 csv//2, % +Rows, +Options 39 40 csv_read_file/2, % +File, -Data 41 csv_read_file/3, % +File, -Data, +Options 42 csv_read_stream/3, % +Stream, -Data, +Options 43 44 csv_read_file_row/3, % +File, -Row, +Options 45 csv_read_row/3, % +Stream, -Row, +CompiledOptions 46 csv_options/2, % -Compiled, +Options 47 48 csv_write_file/2, % +File, +Data 49 csv_write_file/3, % +File, +Data, +Options 50 csv_write_stream/3 % +Stream, +Data, +Options 51 ]). 52:- use_module(library(record),[(record)/1, op(_,_,record)]). 53 54:- autoload(library(apply),[maplist/2]). 55:- autoload(library(debug),[debug/3]). 56:- autoload(library(error),[must_be/2,domain_error/2]). 57:- autoload(library(lists),[append/3]). 58:- autoload(library(option),[option/2,select_option/4]). 59:- autoload(library(pure_input), 60 [phrase_from_file/3,phrase_from_stream/2]). 61:- autoload(library(readutil),[read_line_to_codes/2]). 62:- autoload(library(dcg/basics),[string//1,eos//0]).
78:- predicate_options(csv//2, 2, 79 [ separator(nonneg), % mustv be code 80 strip(boolean), 81 ignore_quotes(boolean), 82 convert(boolean), 83 case(oneof([down,preserve,up])), 84 functor(atom), 85 arity(-nonneg), % actually ?nonneg 86 match_arity(boolean) 87 ]). 88:- predicate_options(csv_read_file/3, 3, 89 [ pass_to(csv//2, 2), 90 pass_to(phrase_from_file/3, 3) 91 ]). 92:- predicate_options(csv_read_file_row/3, 3, 93 [ pass_to(csv//2, 2), 94 pass_to(open/4, 4) 95 ]). 96:- predicate_options(csv_write_file/3, 3, 97 [ pass_to(csv//2, 2), 98 pass_to(open/4, 4) 99 ]). 100:- predicate_options(csv_write_stream/3, 3, 101 [ pass_to(csv//2, 2) 102 ]). 103 104 105:- record 106 csv_options(separator:integer=0',, 107 strip:boolean=false, 108 ignore_quotes:boolean=false, 109 convert:boolean=true, 110 case:oneof([down,preserve,up])=preserve, 111 functor:atom=row, 112 arity:integer, 113 match_arity:boolean=true, 114 skip_header:atom).
\t
for
.tsv
files and ,
otherwise.
Suppose we want to create a predicate table/6 from a CSV file
that we know contains 6 fields per record. This can be done
using the code below. Without the option arity(6)
, this would
generate a predicate table/N, where N is the number of fields
per record in the data.
?- csv_read_file(File, Rows, [functor(table), arity(6)]), maplist(assert, Rows).
138csv_read_file(File, Rows) :- 139 csv_read_file(File, Rows, []). 140 141csv_read_file(File, Rows, Options) :- 142 default_separator(File, Options, Options1), 143 make_csv_options(Options1, Record, RestOptions), 144 phrase_from_file(csv_roptions(Rows, Record), File, RestOptions). 145 146 147default_separator(File, Options0, Options) :- 148 ( option(separator(_), Options0) 149 -> Options = Options0 150 ; file_name_extension(_, Ext0, File), 151 downcase_atom(Ext0, Ext), 152 ext_separator(Ext, Sep) 153 -> Options = [separator(Sep)|Options0] 154 ; Options = Options0 155 ). 156 157ext_separator(csv, 0',). 158ext_separator(tsv, 0'\t).
165csv_read_stream(Stream, Rows, Options) :-
166 make_csv_options(Options, Record, _),
167 phrase_from_stream(csv_roptions(Rows, Record), Stream).
separator(0';)
parses
a semicolon separated file.true
(default false), threat double quotes as a normal
character.true
(default false
), strip leading and trailing
blank space. RFC4180 says that blank space is part of the
data.#
. After
skipping comment lines this option causes csv//2 to skip empty
lines. Note that an empty line may not contain white space
characters (space or tab) as these may provide valid data.true
(default), use name/2 on the field data. This
translates the field into a number if possible.down
, downcase atomic values. If up
, upcase them
and if preserve
(default), do not change the case.row
.domain_error(row_arity(Expected), Found)
if a row is
found with different arity.false
(default true
), do not reject CSV files where
lines provide a varying number of fields (columns). This
can be a work-around to use some incorrect CSV files.219csv(Rows) --> 220 csv(Rows, []). 221 222csv(Rows, Options) --> 223 { make_csv_options(Options, Record, _) }, 224 csv_roptions(Rows, Record). 225 226csv_roptions(Rows, Record) --> 227 { ground(Rows) }, 228 !, 229 emit_csv(Rows, Record). 230csv_roptions(Rows, Record) --> 231 skip_header(Record), 232 csv_data(Rows, Record). 233 234skip_header(Options) --> 235 { csv_options_skip_header(Options, CommentStart), 236 nonvar(CommentStart), 237 atom_codes(CommentStart, Codes) 238 }, 239 !, 240 skip_header_lines(Codes), 241 skip_blank_lines. 242skip_header(_) --> 243 []. 244 245skip_header_lines(CommentStart) --> 246 string(CommentStart), 247 !, 248 ( string(_Comment), 249 end_of_record 250 -> skip_header_lines(CommentStart) 251 ). 252skip_header_lines(_) --> 253 []. 254 255skip_blank_lines --> 256 eos, 257 !. 258skip_blank_lines --> 259 end_of_record, 260 !, 261 skip_blank_lines. 262skip_blank_lines --> 263 []. 264 265csv_data([], _) --> 266 eos, 267 !. 268csv_data([Row|More], Options) --> 269 row(Row, Options), 270 !, 271 { debug(csv, 'Row: ~p', [Row]) }, 272 csv_data(More, Options). 273 274 275row(Row, Options) --> 276 fields(Fields, Options), 277 { csv_options_functor(Options, Functor), 278 Row =.. [Functor|Fields], 279 functor(Row, _, Arity), 280 check_arity(Options, Arity) 281 }. 282 283check_arity(Options, Arity) :- 284 csv_options_arity(Options, Arity), 285 !. 286check_arity(Options, _) :- 287 csv_options_match_arity(Options, false), 288 !. 289check_arity(Options, Arity) :- 290 csv_options_arity(Options, Expected), 291 domain_error(row_arity(Expected), Arity). 292 293fields([F|T], Options) --> 294 field(F, Options), 295 ( separator(Options) 296 -> fields(T, Options) 297 ; end_of_record 298 -> { T = [] } 299 ). 300 301field(Value, Options) --> 302 "\"", 303 { csv_options_ignore_quotes(Options, false) }, 304 !, 305 string_codes(Codes), 306 { make_value(Codes, Value, Options) }. 307field(Value, Options) --> 308 { csv_options_strip(Options, true) }, 309 !, 310 stripped_field(Value, Options). 311field(Value, Options) --> 312 { csv_options_separator(Options, Sep) }, 313 field_codes(Codes, Sep), 314 { make_value(Codes, Value, Options) }. 315 316 317stripped_field(Value, Options) --> 318 ws, 319 ( "\"", 320 { csv_options_strip(Options, false) } 321 -> string_codes(Codes), 322 ws 323 ; { csv_options_separator(Options, Sep) }, 324 field_codes(Codes0, Sep), 325 { strip_trailing_ws(Codes0, Codes) } 326 ), 327 { make_value(Codes, Value, Options) }. 328 329ws --> " ", !, ws. 330ws --> "\t", !, ws. 331ws --> "". 332 333strip_trailing_ws(List, Stripped) :- 334 append(Stripped, WS, List), 335 all_ws(WS). 336 337all_ws([]). 338all_ws([32|T]) :- all_ws(T). 339all_ws([9|T]) :- all_ws(T).
347string_codes(List) --> 348 [H], 349 ( { H == 0'" } 350 -> ( "\"" 351 -> { List = [H|T] }, 352 string_codes(T) 353 ; { List = [] } 354 ) 355 ; { List = [H|T] }, 356 string_codes(T) 357 ). 358 359field_codes([], Sep), [Sep] --> [Sep], !. 360field_codes([], _), "\n" --> "\r\n", !. 361field_codes([], _), "\n" --> "\n", !. 362field_codes([], _), "\n" --> "\r", !. 363field_codes([H|T], Sep) --> [H], !, field_codes(T, Sep). 364field_codes([], _) --> []. % unterminated last record
371make_value(Codes, Value, Options) :- 372 csv_options_convert(Options, Convert), 373 csv_options_case(Options, Case), 374 make_value(Convert, Case, Codes, Value). 375 376make_value(true, preserve, Codes, Value) :- 377 !, 378 name(Value, Codes). 379make_value(true, Case, Codes, Value) :- 380 !, 381 ( number_string(Value, Codes) 382 -> true 383 ; make_value(false, Case, Codes, Value) 384 ). 385make_value(false, preserve, Codes, Value) :- 386 !, 387 atom_codes(Value, Codes). 388make_value(false, down, Codes, Value) :- 389 !, 390 string_codes(String, Codes), 391 downcase_atom(String, Value). 392make_value(false, up, Codes, Value) :- 393 string_codes(String, Codes), 394 upcase_atom(String, Value). 395 396separator(Options) --> 397 { csv_options_separator(Options, Sep) }, 398 [Sep]. 399 400end_of_record --> "\n". % Unix files 401end_of_record --> "\r\n". % DOS files 402end_of_record --> "\r". % MacOS files 403end_of_record --> eos. % unterminated last record
In addition to the options of csv_read_file/3, this predicate processes the option:
427csv_read_file_row(File, Row, Options) :- 428 default_separator(File, Options, Options1), 429 make_csv_options(Options1, RecordOptions, Options2), 430 select_option(line(Line), Options2, RestOptions, _), 431 setup_call_cleanup( 432 open(File, read, Stream, RestOptions), 433 csv_read_stream_row(Stream, Row, Line, RecordOptions), 434 close(Stream)). 435 436csv_read_stream_row(Stream, Row, Line, Options) :- 437 between(1, infinite, Line), 438 ( csv_read_row(Stream, Row0, Options), 439 Row0 \== end_of_file 440 -> Row = Row0 441 ; !, 442 fail 443 ).
end_of_file
upon reaching the
end of the input.453csv_read_row(Stream, Row, _Record) :- 454 at_end_of_stream(Stream), 455 !, 456 Row = end_of_file. 457csv_read_row(Stream, Row, Record) :- 458 read_lines_to_codes(Stream, Codes, Record, even), 459 phrase(row(Row0, Record), Codes), 460 !, 461 Row = Row0. 462 463read_lines_to_codes(Stream, Codes, Options, QuoteQuantity) :- 464 read_line_to_codes(Stream, Codes0), 465 Codes0 \== end_of_file, 466 ( ( csv_options_ignore_quotes(Options, true) 467 ; check_quotes(Codes0, QuoteQuantity, even) 468 ) 469 -> Codes = Codes0 470 ; append(Codes0, [0'\n|Tail], Codes), 471 read_lines_to_codes(Stream, Tail, Options, odd) 472 ). 473 474check_quotes([], QuoteQuantity, QuoteQuantity) :- 475 !. 476check_quotes([0'"|T], odd, Result) :- 477 !, 478 check_quotes(T, even, Result). 479check_quotes([0'"|T], even, Result) :- 480 !, 481 check_quotes(T, odd, Result). 482check_quotes([_|T], QuoteQuantity, Result) :- 483 check_quotes(T, QuoteQuantity, Result).
493csv_options(Compiled, Options) :- 494 make_csv_options(Options, Compiled, _Ignored). 495 496 497 /******************************* 498 * OUTPUT * 499 *******************************/
\t
for
.tsv
files and ,
otherwise.509csv_write_file(File, Data) :- 510 csv_write_file(File, Data, []). 511 512csv_write_file(File, Data, Options) :- 513 must_be(list, Data), 514 default_separator(File, Options, Options1), 515 make_csv_options(Options1, OptionsRecord, RestOptions), 516 setup_call_cleanup( 517 open(File, write, Out, RestOptions), 518 maplist(csv_write_row(Out, OptionsRecord), Data), 519 close(Out)). 520 521csv_write_row(Out, OptionsRecord, Row) :- 522 phrase(emit_row(Row, OptionsRecord), String), 523 format(Out, '~s', [String]). 524 525emit_csv([], _) --> []. 526emit_csv([H|T], Options) --> 527 emit_row(H, Options), 528 emit_csv(T, Options). 529 530emit_row(Row, Options) --> 531 { Row =.. [_|Fields] }, 532 emit_fields(Fields, Options), 533 "\r\n". % RFC 4180 demands \r\n 534 535emit_fields([], _) --> 536 "". 537emit_fields([H|T], Options) --> 538 emit_field(H, Options), 539 ( { T == [] } 540 -> [] 541 ; { csv_options_separator(Options, Sep) }, 542 [Sep], 543 emit_fields(T, Options) 544 ). 545 546emit_field(H, Options) --> 547 { ( atom(H) 548 -> atom_codes(H, Codes) 549 ; string(H) 550 -> string_codes(H, Codes) 551 ) 552 }, 553 !, 554 ( { needs_quotes(H, Options) } 555 -> "\"", emit_string(Codes), "\"" 556 ; emit_codes(Codes) 557 ). 558emit_field([], _) --> 559 !, 560 { atom_codes('[]', Codes) }, 561 emit_codes(Codes). 562emit_field(H, _) --> 563 { number_codes(H,Codes) }, 564 emit_codes(Codes). 565 566needs_quotes(Atom, _) :- 567 sub_atom(Atom, _, _, _, '"'), 568 !. 569needs_quotes(Atom, _) :- 570 sub_atom(Atom, _, _, _, '\n'), 571 !. 572needs_quotes(Atom, _) :- 573 sub_atom(Atom, _, _, _, '\r'), 574 !. 575needs_quotes(Atom, Options) :- 576 csv_options_separator(Options, Sep), 577 char_code(Char, Sep), 578 sub_atom(Atom, _, _, _, Char), 579 !. 580 581emit_string([]) --> "". 582emit_string([0'"|T]) --> !, "\"\"", emit_string(T). 583emit_string([H|T]) --> [H], emit_string(T). 584 585emit_codes([]) --> "". 586emit_codes([0'"|T]) --> !, "\"\"", emit_codes(T). 587emit_codes([H|T]) --> [H], emit_codes(T).
save_data(File) :- setup_call_cleanup( open(File, write, Out), forall(data(C1,C2,C3), csv_write_stream(Out, [row(C1,C2,C3)], [])), close(Out)),
606csv_write_stream(Stream, Data, Options) :-
607 must_be(list, Data),
608 make_csv_options(Options, OptionsRecord, _),
609 maplist(csv_write_row(Stream, OptionsRecord), Data)
Process CSV (Comma-Separated Values) data
This library parses and generates CSV data. CSV data is represented in Prolog as a list of rows. Each row is a compound term, where all rows have the same name and arity.