;;; vcard-parse.el --- Library for parsing vCards -*- lexical-binding: t; -*- ;; Copyright (C) 2019 Free Software Foundation, Inc. ;; Author: Eric Abrahamsen ;; Maintainer: Eric Abrahamsen ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program. If not, see . ;;; Commentary: ;; This package provides a consumer-agnostic parser for vCard files, ;; aka Virtual Contact Files. Its entry points parse a file or buffer ;; containing one or more contacts in vCard format, and return the ;; data as a structure meant for use by other programs. It can parse ;; versions 2.1, 3.0, and 4.0 of the vCard standard, RFC 6350 (see ;; https://tools.ietf.org/html/rfc6350). ;; Parsed vCards are returned as lists containing contact properties. ;; Each property is a list containing the property name, downcased and ;; interned as a symbol, the property value, cast to the most ;; appropriate type, and a further alist of property parameters, ;; values also cast to type where applicable. For example, this email ;; property: ;; EMAIL;TYPE=work:address@hidden ;; Will be parsed into: ;; (email "address@hidden" ((type . "work"))) ;; A contact is a structure containing a list of properties. As much ;; as possible, the internal implementation of the structure should be ;; ignored, and the properties of a single contact accessed only ;; through the provided getters. The getters are: ;; `vcard-contact-properties': Return a list of all properties. ;; `vcard-contact-property-types': Return a list of all the different ;; property types this contact has, as symbols. ;; `vcard-contact-property-type': Return all properties of the given ;; type, for this contact. The return value, if non-nil, is either a ;; single property, or a list of (possibly just one) properties, ;; depending on the cardinality of the property type (see the RFC). ;; `vcard-contact-property-groups': Return a list of all the property ;; groups for the given contact. A single property's group is found ;; under the 'group key in its parameter list. ;; `vcard-contact-property-group': Return all the properties of the ;; given group, for this contact, or nil. ;; For reference, these are the property types specified for vCard ;; version 4.0: ;; "SOURCE" "KIND" "FN" "N" "NICKNAME" "PHOTO" "BDAY" "ANNIVERSARY" ;; "GENDER" "ADR" "TEL" "EMAIL" "IMPP" "LANG" "TZ" "GEO" "TITLE" ;; "ROLE" "LOGO" "ORG" "MEMBER" "RELATED" "CATEGORIES" "NOTE" "PRODID" ;; "REV" "SOUND" "UID" "CLIENTPIDMAP" "URL" "KEY" "FBURL" "CALADRURI" ;; "CALURI" "XML" iana-token x-name ;; Value types: ;; Booleans, integers, and floats are all cast as expected. If ;; `vcard-parse-datetime-values' is non-nil, the code will do the best ;; it can to turn a datetime value into a list of integers a-la ;; `parse-time-string'. This is done either with the built-in ;; `iso8601' library that exists in newer Emacs, or with a local copy ;; that ships with this package, if the built-in version isn't found. ;; While different vCard versions provide slightly different options, ;; the parsing process attempts to normalize property values as much ;; as possible. Version 4.0 might have more properties available (the ;; KIND property, for instance), but for the most part the parsed data ;; will look the same. ;; TODO: ;; - Go the other direction: produce vCard files from structures. ;;; Code: (require 'cl-lib) (defgroup vcard nil "Customization options for the vcard library." :group 'mail) (defgroup vcard-parse nil "Customization options for vcard parsing." :group 'vcard) (defcustom vcard-parse-select-fields nil "A list of field types to select. If this variable is non-nil, only the fields listed will be parsed, all others will be discarded. Note that the 'version and 'fn properties are always returned. Most useful when let-bound around one of the parsing functions." :type '(repeat symbol)) (defcustom vcard-parse-omit-fields nil "A list of field types to omit. If this variable is non-nil, the fields listed will be discarded. Most useful when let-bound around one of the parsing functions." :type '(repeat symbol)) (defcustom vcard-parse-datetime-values t "When non-nil, attempt to parse date/time property values. If successful, the property value will be (usually) converted to a list of integers, though if the \"type\" parameter of the property is \"text\", the value will be returned as a string. It is also possible that parsing may fail, in which case the original string value will also be returned." :type 'boolean) (defcustom vcard-parse-card-consumer-function nil "Custom function for consuming a single contact card. It is called with a list of properties, as produced by the built-in code, or by the return value of `vcard-parse-property-consumer-function'." :type 'function) (defcustom vcard-parse-property-consumer-function nil "Custom function for consuming a single property. The function is called with four arguments: the property type as a symbol, the property value (all un-escaping, decoding, splitting, etc already complete), the property parameters as an alist with symbol keys, and the vcard version as a float." :type 'function) (defvar vcard-parse-overriding-version nil "vCard version, as a float, used when no VERSION property is present. vCard versions are sometimes specified outside of the cards themselves -- as part of the file media type, for instance. In these cases, this variable can be let-bound around the parsing process to specify the version. If a card contains its own VERSION property, that property value cannot be overridden.") (defvar vcard-compound-properties '(n adr gender org) "A list of vcard properties with multi-part values. Properties are symbols. Values have several parts, separated by semicolons.") (defvar vcard-datetime-properties '(bday anniversary rev) "A list of vcard properties representing date or time values. The parsing process will make some attempt at converting these values into lisp timestamps.") ;; Maybe load our local version of iso8601. (eval-when-compile (unless (fboundp 'iso8601-parse) (require 'vcard-iso8601))) ;;;###autoload (defun vcard-parse-file (file) "Parse FILE containing vCard data into an alist." (interactive "f") (with-temp-buffer (insert-file-contents file) (vcard-parse-buffer))) ;;;###autoload (defun vcard-parse-buffer () "Parse current buffer, containing vCard data. Returns a list of contact objects." (interactive) (let ((card-consumer (when (functionp vcard-parse-card-consumer-function) vcard-parse-card-consumer-function)) (prop-consumer (if (functionp vcard-parse-property-consumer-function) vcard-parse-property-consumer-function #'list)) (warning-series t) card out) ;; vCard 4.0 files *must* be utf-8 encoded + CRLF. But we're only ;; parsing this file, we're not responsible for how it's saved to ;; disk. Don't enable this for now. ;; (when (and (null (eq buffer-file-coding-system 'utf-8-unix)) ;; (or (eql ;; vard-parse-overriding-version 4.0) ;; (save-excursion ;; (re-search-forward "VERSION:4\\.0" (point-max) t)))) ;; (set-buffer-file-coding-system 'utf-8-unix)) (goto-char (point-min)) ;; Unfolding consists of removing any instances of ;; newline-plus-space-or-horizontal-tab. Technically there should ;; always be a non-space character following the space, but we ;; don't really care. ;; From the RFC: ;; Note: It is possible for very simple implementations to ;; generate improperly folded lines in the middle of a UTF-8 ;; multi-octet sequence. For this reason, implementations SHOULD ;; unfold lines in such a way as to properly restore the original ;; sequence. ;; How would we do that? We could operate on ;; `find-file-literally', but then what? ;; CR = \015 ;; LF = \012 ;; SPC = \040 ;; TAB = \011 (while (re-search-forward "\n[ \t]" (point-max) t) (replace-match "")) (goto-char (point-min)) ;; This routine assumes no blank lines in the whole file, which is ;; the way it's supposed to be, but we could be a little kinder ;; with a `skip-syntax-forward' check. (while (re-search-forward "^BEGIN:VCARD\n" (line-end-position 2) t) (when (setq card (ignore-errors ;; `vcard-parse-card' moves point past the ;; card. (vcard-parse-card prop-consumer card-consumer))) (push card out))) (nreverse out))) (defun vcard-parse-card (&optional prop-consumer card-consumer) "Collect properties from a single vCard and return them as an alist. Point is at bol on the first property. Collect properties until the \"END:VCARD\" tag is reached, then move past that tag. PROP-CONSUMER, if given, should be a function accepting three arguments -- a property symbol, property value list, and property parameter list -- and returning a property object. CARD-CONSUMER should be a function accepting one argument -- a list of properties -- and returning a card/contact object." (let ((prop-consumer (or prop-consumer #'list)) (version ;; First line should be the VERSION property. (or (when (re-search-forward "VERSION:\\([[:digit:].]+\\)\n" (line-end-position 2) t) (string-to-number (match-string 1))) vcard-parse-overriding-version (error "Can't determine vCard version"))) card) (push (list 'version version) card) (while (and (null (looking-at-p "^END:VCARD$")) (re-search-forward "^\\(?:\\(?1:[-[:alnum:]]+\\)\\.\\)?\\(?2:[-[:alnum:]]+\\)" (line-end-position) t)) (let ((prop (intern (downcase (match-string 2)))) anchor sep params value) (when (or (eql prop 'fn) (and (or (null vcard-parse-omit-fields) (null (memql prop vcard-parse-omit-fields))) (or (null vcard-parse-select-fields) (memql prop vcard-parse-select-fields)))) ;; Pick up the group. (when-let ((group (match-string-no-properties 1))) (push (cons 'group group) params)) ;; Pick up parameters. (while (re-search-forward ";\\([^=]+\\)=\\([^;:]+\\)" (line-end-position) t) (push (cons (intern (match-string-no-properties 1)) (downcase (match-string-no-properties 2))) params)) (skip-chars-forward ":") ;; Break value on unescaped commas or semicolons, as ;; appropriate. Properties may either be compound ;; (eg. addresses), with parts separated by semicolons, or ;; multi-value (eg. categories), with instances separated by ;; commas, but *not both*. (setq sep (if (memq prop vcard-compound-properties) ";" ",") anchor (point)) (while (re-search-forward sep (line-end-position) t) ;; 92 = backslash. Having ?\ in the buffer confuses ;; paredit. (unless (eql (char-before (1- (point))) 92) (push (buffer-substring-no-properties anchor (1- (point))) value) (setq anchor (point)))) (push (buffer-substring-no-properties anchor (line-end-position)) value) ;; Unescape all remaining colons, semicolons, commas, ;; backslashes and newlines. (setq value (mapcar (lambda (v) (replace-regexp-in-string "\\\\\\([\n:;\\,]\\)" "\\1" v)) value)) ;; Possibly do some parsing of the value(s). (let ((case-fold-search t)) (setq value (mapcar (lambda (v) (cond ((string-match-p "false" v) nil) ((string-match-p "true" v) t) ;; What the hell is this, anyway? ((and (eql prop 'x-ablabel) (string-match "_$!<\\([^>]+\\)>!$_" v)) (match-string 1 v)) ((memql prop vcard-datetime-properties) (if vcard-parse-datetime-values (let ((val-type (cdr-safe (assoc 'value params)))) (cond ((and (stringp val-type) (string-equal val-type "text")) v) ((and (stringp val-type) (string-equal val-type "timestamp")) (parse-time-string v)) (t (condition-case nil (iso8601-parse v) (error (lwarn '(vcard) :error "Unable to parse date value: \"%s\"" v)))))) v)) ((string-match-p "\\`[[:digit:].]+\\'" v) (string-to-number v)) (t v))) value))) ;; Do we want to normalize this? This way consumers have to ;; explicitly check if it's a string or a list. (setq value (if (= 1 (length value)) (car value) (nreverse value))) (push (funcall prop-consumer prop value params) card)) (forward-line))) (if card-consumer (funcall card-consumer (nreverse card)) (nreverse card)))) (cl-defmethod vcard-contact-properties ((contact list)) "Return a list of all properties in CONTACT." contact) (cl-defmethod vcard-contact-property-types ((contact list)) "Return a list of all property types in CONTACT. Each type is a symbol representing a downcased property name." (let (types) (dolist (p (vcard-contact-properties contact) types) (cl-pushnew (car p) types)))) (cl-defmethod vcard-contact-property-type ((contact list) (type symbol)) "Return all properties of TYPE from CONTACT. TYPE is a symbol, e.g. 'email." (let (props) (dolist (p (vcard-contact-properties contact) props) (when (eql type (car p)) (push p props))))) (cl-defmethod vcard-contact-property-groups ((contact list)) "Return a list of all properties groups in CONTACT. Each group is a string." (let (groups) (dolist (p (vcard-contact-properties contact) (nreverse groups)) (when-let ((g (cdr-safe (assoc 'group (nth 2 p))))) (cl-pushnew g groups :test #'equal))))) (cl-defmethod vcard-contact-property-group ((contact list) (group string)) "Return all properties belonging to GROUP in CONTACT. GROUP is a string." (let (props) (dolist (p (vcard-contact-properties contact) props) (when (string-equal (cdr (assq 'group (nth 2 p))) group) (push p props))))) (provide 'vcard-parse) ;;; vcard-parse.el ends here