;;; $Id: sxml-helpers.scm,v 1.4 2006/01/05 22:24:09 leavens Exp $ ;;; AUTHOR: Gary T. Leavens ;;; ;;; Grammar for SXML representation in Scheme, adapted from ;;; http://okmij.org/ftp/Scheme/SXML.html. The helpers in this file ;;; deal with the abstract syntax of XML parsed by the SXML tools; of ;;; course, XML's concrete syntax itself doesn't look like this. ;;; The grammar below also ignores annotations, ;;; which are an XML extension mechanism. ;;; ;;; ::= ;;; (*TOP* {}* {}* ) "document (PIs comments element)" ;;; ::= ;;; ( {}* ) "named (name children)" ;;; | ( ( @ {}* ) "attributed (name attributes ;;; {}* ) children)" ;;; ::= ;;; ( ) "name-value (name value)" ;;; | ( ) "name-only (name)" ;;; ::= ;;; "child (element)" ;;; | "text (string)" ;;; | "child-pi (PI)" ;;; | "child-comment (comment)" ;;; | ( *ENTITY* ) "entity (public-id system-id)" ;;; ::= ;;; ( *PI* ) "PI (target content)" ;;; ::= ;;; ( *COMMENT* ) "comment (string)" ;;; ;;; Note that and nonterminals have an extra set ;;; of convenience helpers that allow processing their alternatives in ;;; a uniform way. That is, these make it seem as if the grammar were: ;;; ;;; ::= ;;; ( ( @ {}* ) "element (name attributes ;;; {}* ) children)" ;;; ::= ;;; ( ) "attribute (name value)" ;;; ;;; The idea behind these extra helpers is given by the following equations ;;; ;;; (element sym attrs kids) = (if (null? attrs) ;;; (named sym kids) ;;; (attributed sym attrs kids)) ;;; ;;; (element->attributes (named sym kids)) = '() ;;; ;;; (attribute nm val) = (if (equal? val "") ;;; (name-only nm) ;;; (name-value nm val)) ;;; ;;; (attribute->value (name-only nm)) = "" ;;; ;;; The type predicates element? and attribute? should not be used in ;;; most programs, as they are slow. But they wouldn't be needed if ;;; you are imagining you are using grammar for and ;;; without alternatives. (module sxml-helpers (lib "typedscm.ss" "typedscm") (provide document? element? attribute? child-of-element? PI? comment? named? attributed? name-value? name-only? child? text? child-pi? child-comment? entity? document named attributed element name-value name-only attribute child text child-pi child-comment entity PI comment document->PIs document->comments document->element named->name named->children attributed->name attributed->attributes attributed->children element->name element->attributes element->children name-value->name name-value->value name-only->name attribute->name attribute->value child->element text->string child-pi->PI child-comment->comment entity->public-id entity->system-id PI->target PI->content comment->string ) ;; we rely on an external parser, SSAX, available from the URL above ;; type predicates, these will be slow in general (deftype document? (type-predicate-for document)) (deftype element? (type-predicate-for element)) (deftype child-of-element? (type-predicate-for child-of-element)) (deftype attribute? (type-predicate-for attribute)) (deftype PI? (type-predicate-for PI)) (deftype comment? (type-predicate-for comment)) ;; case testers (discriminators), coded to be fast (deftype named? (-> (element) boolean)) (deftype attributed? (-> (element) boolean)) (deftype name-value? (-> (attribute) boolean)) (deftype name-only? (-> (attribute) boolean)) (deftype child? (-> (child-of-element) boolean)) (deftype text? (-> (child-of-element) boolean)) (deftype child-pi? (-> (child-of-element) boolean)) (deftype child-comment? (-> (child-of-element) boolean)) (deftype entity? (-> (child-of-element) boolean)) ;; constructors (deftype document (-> ((list-of PI) (list-of comment) element) document)) (deftype named (-> (symbol (list-of child-of-element)) element)) (deftype attributed (-> (symbol (list-of attribute) (list-of child-of-element)) element)) (deftype name-value (-> (symbol string) attribute)) (deftype name-only (-> (symbol) attribute)) (deftype child (-> (element) child-of-element)) (deftype text (-> (string) child-of-element)) (deftype child-pi (-> (PI) child-of-element)) (deftype child-comment (-> (comment) child-of-element)) (deftype entity (-> (string string) child-of-element)) (deftype PI (-> (symbol string) PI)) (deftype comment (-> (string) comment)) ;;; convenience constructors (deftype element (-> (symbol (list-of attribute) (list-of child-of-element)) element)) (deftype attribute (-> (symbol string) attribute)) ;; observers (deftype document->PIs (-> (document) (list-of PI))) (deftype document->comments (-> (document) (list-of comment))) (deftype document->element (-> (document) element)) (deftype named->name (-> (element) symbol)) (deftype named->children (-> (element) (list-of child-of-element))) (deftype attributed->name (-> (element) symbol)) (deftype attributed->attributes (-> (element) (list-of attribute))) (deftype attributed->children (-> (element) (list-of child-of-element))) (deftype name-value->name (-> (attribute) symbol)) (deftype name-value->value (-> (attribute) string)) (deftype name-only->name (-> (attribute) symbol)) (deftype child->element (-> (child-of-element) element)) (deftype text->string (-> (child-of-element) string)) (deftype child-pi->PI (-> (child-of-element) PI)) (deftype child-comment->comment (-> (child-of-element) comment)) (deftype entity->public-id (-> (child-of-element) string)) (deftype entity->system-id (-> (child-of-element) string)) (deftype PI->target (-> (PI) symbol)) (deftype PI->content (-> (PI) string)) (deftype comment->string (-> (comment) string)) ;; convenience observers (deftype element->name (-> (element) symbol)) (deftype element->attributes (-> (element) (list-of attribute))) (deftype element->children (-> (element) (list-of child-of-element))) (deftype attribute->name (-> (attribute) symbol)) (deftype attribute->value (-> (attribute) string)) ;; Representations, see the grammar above for details (defrep (document (list-of datum)) (element (list-of datum)) (attribute (list-of datum)) (child-of-element datum) (PI (list-of datum)) (comment (list-of datum))) ;; The syntax '@ is not technically allowed in Scheme by R^5RS, ;; so we treat it specially. (define *sxml-helpers:attribsym* (string->symbol "@")) ;; Type predicates (slow) (define document? (lambda (d) (and (list? d) (not (null? d)) (eq? '*TOP* (car d)) (element? (list-ref d (- (length d) 1)))))) (define element? (lambda (d) (test-type boolean? (and (list? d) (not (null? d)) (symbol? (car d)) (not (eq? (car d) '*PI*)) (not (eq? (car d) '*COMMENT*)) (or (null? (cdr d)) (if (and (pair? (cadr d)) (eq? *sxml-helpers:attribsym* (caadr d))) (and ((list-of attribute?) (cdadr d)) ((list-of child-of-element?) (cddr d))) ((list-of child-of-element?) (cdr d)))))))) (define attribute? (lambda (d) (and (pair? d) (symbol? (car d)) (or (null? (cdr d)) (and (pair? (cdr d)) (string? (cadr d)) (null? (cddr d))))))) (define child-of-element? (lambda (d) (test-type boolean? (or (element? d) (string? d) (PI? d) (comment? d) (and (entity? d) (list? d) (= 3 (length d)) (string? (cadr d)) (string? (caddr d))))))) (define PI? (lambda (d) (test-type boolean? (and (pair? d) (eq? '*PI* (car d)) (pair? (cdr d)) (symbol? (cadr d)) (pair? (cddr d)) (string? (caddr d)) (null? (cdddr d)))))) (define comment? (lambda (d) (test-type boolean? (and (pair? d) (eq? '*COMMENT* (car d)) (pair? (cdr d)) (string? (cadr d)) (null? (cddr d)))))) ;; Tests (discriminators, fast) (define named? (lambda (elem) (or (null? (cdr elem)) (not (and (pair? (cadr elem)) (eq? *sxml-helpers:attribsym* (test-type symbol? (caadr elem)))))))) (define attributed? (lambda (elem) (and (not (null? (cdr elem))) (pair? (cadr elem)) (eq? *sxml-helpers:attribsym* (test-type symbol? (caadr elem)))))) (define name-value? (lambda (attr) (not (null? (cdr attr))))) (define name-only? (lambda (attr) (null? (cdr attr)))) (define child? (lambda (coe) (and (pair? coe) (let ((car-coe (test-type symbol? (car coe)))) (not (or (eq? '*TOP* car-coe) (eq? '*PI* car-coe) (eq? '*COMMENT* car-coe) (eq? '*ENTITY* car-coe))))))) (define text? (lambda (coe) (string? coe))) (define child-pi? (lambda (coe) (and (pair? coe) (eq? '*PI* (test-type symbol? (car coe)))))) (define child-comment? (lambda (coe) (and (pair? coe) (eq? '*COMMENT* (test-type symbol? (car coe)))))) (define entity? (lambda (coe) (and (pair? coe) (eq? '*ENTITY* (test-type symbol? (car coe)))))) ;; Constructors (define document (lambda (PIs comments element) (cons '*TOP* (append PIs comments (list element))))) (define named (lambda (name kids) (cons name kids))) (define attributed (lambda (name attributes children) (cons name (cons (cons *sxml-helpers:attribsym* attributes) children)))) ;; convenience for making elements more uniformly (define element (lambda (name attributes children) (if (null? attributes) (named name children) (attributed name attributes children)))) (define name-value (lambda (name value) (list name value))) (define name-only (lambda (name) (list (has-type datum name)))) ;; convenience for making attributes more uniformly (define attribute (lambda (name value) (if (equal? "" value) (name-only name) (name-value name value)))) (define child (lambda (coe) (has-type datum coe))) (define text (lambda (coe) (has-type datum coe))) (define child-pi (lambda (coe) (has-type datum coe))) (define child-comment (lambda (coe) (has-type datum coe))) (define entity (lambda (public-id system-id) (has-type datum (list '*ENTITY* public-id system-id)))) (define PI (lambda (target content) (list '*PI* target content))) (define comment (lambda (string) (list '*COMMENT* string))) (deftype sxml-helpers:extract-maker (forall (T) (-> ((-> (T) boolean) string) (-> (T) datum)))) (define sxml-helpers:extract-maker (lambda (test? name) (has-type (forall (T) (-> (T) datum)) (lambda (x) (if (test? x) x (error (string-append "expecting a(n) " name ", but given:") (if (string? x) (string-append "\"" x "\"") x))))))) (define document->PIs (lambda (doc) (test-type (list-of (list-of datum?)) (sxml-helpers:filter PI? (cdr doc))))) (define document->comments (lambda (doc) (test-type (list-of (list-of datum?)) (sxml-helpers:filter comment? (cdr doc))))) (define document->element (lambda (doc) (test-type (list-of datum?) (list-ref doc (- (length doc) 1))))) (define element->name (lambda (elem) (test-type symbol? (car elem)))) (define named->name (lambda (elem) (element->name elem))) (define named->children (lambda (elem) (cdr (test-type (pair-of symbol? (list-of datum?)) ((sxml-helpers:extract-maker named? "named element") elem))))) (define attributed->name (lambda (elem) (element->name elem))) (define attributed->attributes (lambda (elem) (test-type (list-of (list-of datum?)) (cdadr ((sxml-helpers:extract-maker attributed? "attributed element") elem))))) (define attributed->children (lambda (elem) (cddr (test-type (pair-of symbol? (pair-of (list-of datum?) (list-of datum?))) ((sxml-helpers:extract-maker attributed? "attributed element") elem))))) (define element->attributes (lambda (elem) (if (attributed? elem) (attributed->attributes elem) '()))) (define element->children (lambda (elem) (if (attributed? elem) (attributed->children elem) (named->children elem)))) (define attribute->name (lambda (attr) (test-type symbol? (car attr)))) (define name-value->name (lambda (attr) (attribute->name attr))) (define name-value->value (lambda (attr) (test-type string? (cadr attr)))) (define name-only->name (lambda (attr) (attribute->name attr))) (define attribute->value (lambda (attr) (if (name-only? attr) "" (name-value->value attr)))) (define child->element (lambda (coe) (test-type (list-of datum?) ((sxml-helpers:extract-maker child? "child") coe)))) (define text->string (lambda (coe) (test-type string? ((sxml-helpers:extract-maker text? "text") coe)))) (define child-pi->PI (lambda (coe) (test-type (list-of datum?) ((sxml-helpers:extract-maker child-pi? "child-pi") coe)))) (define child-comment->comment (lambda (coe) (test-type (list-of datum?) ((sxml-helpers:extract-maker child-comment? "child-comment") coe)))) (define entity->public-id (lambda (coe) (test-type string? (cadr ((sxml-helpers:extract-maker entity? "entity") coe))))) (define entity->system-id (lambda (coe) (test-type string? (caddr ((sxml-helpers:extract-maker entity? "entity") coe))))) (define PI->target (lambda (pi) (test-type symbol? (cadr pi)))) (define PI->content (lambda (pi) (test-type string? (caddr pi)))) (define comment->string (lambda (com) (test-type string? (cadr com)))) (deftype sxml-helpers:filter (forall (t) (-> ((-> (t) boolean) (list-of t)) (list-of t)))) (define sxml-helpers:filter (lambda (p ls) (if (null? ls) '() (if (p (car ls)) (cons (car ls) (sxml-helpers:filter p (cdr ls))) (sxml-helpers:filter p (cdr ls)))))) ) ; end module