001    /*
002     * (c) Copyright 2010 University of Bristol
003     * All rights reserved.
004     * [See end of file]
005     */
006    package net.rootdev.javardfa.literal;
007    
008    import java.io.StringWriter;
009    import java.util.Collection;
010    import java.util.LinkedList;
011    import java.util.List;
012    import java.util.Stack;
013    import javax.xml.stream.XMLEventFactory;
014    import javax.xml.stream.XMLEventWriter;
015    import javax.xml.stream.XMLOutputFactory;
016    import javax.xml.stream.XMLStreamException;
017    import javax.xml.stream.XMLStreamWriter;
018    import javax.xml.stream.events.Attribute;
019    import javax.xml.stream.events.StartElement;
020    import javax.xml.stream.events.XMLEvent;
021    import net.rootdev.javardfa.Parser;
022    
023    /**
024     *
025     * @author pldms
026     */
027    public class LiteralCollector {
028    
029        final String XMLLiteral = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral";
030    
031        private final Stack<Collector> collectors;
032        private List<XMLEvent> queuedEvents;
033        private int level;
034        private final Parser parser;
035        private final StartElement fakeEnvelope;
036        private final XMLEventFactory eventFactory;
037        private final XMLOutputFactory outputFactory;
038    
039        public LiteralCollector(Parser parser, XMLEventFactory eventFactory, XMLOutputFactory outputFactory) {
040            this.parser = parser;
041            this.collectors = new Stack<Collector>();
042            this.queuedEvents = null;
043            this.eventFactory = eventFactory;
044            this.outputFactory = outputFactory;
045            this.fakeEnvelope = eventFactory.createStartElement("", null, "fake");
046        }
047    
048        public boolean isCollecting() { return !collectors.isEmpty(); }
049    
050        public boolean isCollectingXML() {
051            if (!isCollecting()) return false;
052            return XMLLiteral.equals(collectors.peek().datatype);
053        }
054    
055        public void collect(String subject, Collection<String> props, String datatype, String lang) {
056            if (!isCollecting()) { // set up collection
057                queuedEvents = new LinkedList<XMLEvent>();
058                level = 0;
059            }
060    
061            Collector coll = new Collector(subject, props, datatype, lang, level, queuedEvents.size());
062            collectors.push(coll);
063        }
064    
065        public void handleEvent(XMLEvent event) {
066            if (!isCollecting()) return; // nothing to do
067            if (event.isStartElement()) handleStartEvent(event);
068            else if (event.isEndElement()) handleEndEvent(event);
069            else queuedEvents.add(event);
070        }
071    
072        private void handleStartEvent(XMLEvent event) {
073            level++;
074            queuedEvents.add(event);
075            if (collectors.peek().datatype == null) { // undecided so far
076                collectors.peek().datatype = XMLLiteral;
077            }
078        }
079    
080        private void handleEndEvent(XMLEvent event) {
081            queuedEvents.add(event);
082            if (collectors.peek().level == level) { 
083                Collector coll = collectors.pop();
084                emitTriples(coll, queuedEvents.subList(coll.start, queuedEvents.size()));
085            }
086            level--;
087        }
088    
089        private void emitTriples(Collector coll, List<XMLEvent> subList) {
090            String lex = (XMLLiteral.equals(coll.datatype)) ?
091                gatherXML(subList, coll.lang) :
092                gatherText(subList) ;
093            if ((coll.datatype != null) && !"".equals(coll.datatype)) // not plain
094                parser.emitTriplesDatatypeLiteral(coll.subject,
095                        coll.props, lex, coll.datatype);
096            else
097                parser.emitTriplesPlainLiteral(coll.subject,
098                        coll.props, lex, coll.lang);
099        }
100    
101        private String gatherXML(List<XMLEvent> subList, String lang) {
102            try {
103                return gatherXMLEx(subList, lang);
104            } catch (XMLStreamException ex) {
105                throw new RuntimeException("Problem gathering XML", ex);
106            }
107        }
108    
109        private String gatherXMLEx(List<XMLEvent> subList, String lang)
110                throws XMLStreamException {
111            Attribute xmlLang = (lang == null) ?
112                null :
113                eventFactory.createAttribute("xml:lang", lang);
114            StringWriter sw = new StringWriter();
115            XMLStreamWriter out = outputFactory.createXMLStreamWriter(sw);
116            XMLEventWriter xmlWriter = new CanonicalXMLEventWriter(out, xmlLang);
117            xmlWriter.add(fakeEnvelope); // Some libraries dislike xml fragements
118            for (XMLEvent e: subList) {
119                xmlWriter.add(e);
120            }
121            xmlWriter.flush();
122            String xml = sw.toString();
123            // remove <fake..></fake>
124            return xml.substring(xml.indexOf('>') + 1, xml.length() - 7);
125        }
126    
127        private String gatherText(List<XMLEvent> subList) {
128            StringBuilder sb = new StringBuilder();
129            for (XMLEvent e: subList) {
130                if (e.isCharacters()) sb.append(e.asCharacters().getData());
131            }
132            return sb.toString();
133        }
134    
135        final static class Collector {
136            private final String subject;
137            private final Collection<String> props;
138            private String datatype;
139            private final String lang;
140            private final int level;
141            private final int start;
142    
143            private Collector(String subject, Collection<String> props, String datatype,
144                    String lang, int level, int start) {
145                this.subject = subject;
146                this.props = props;
147                this.datatype = datatype;
148                this.lang = lang;
149                this.level = level;
150                this.start = start;
151            }
152    
153        }
154    
155    }
156    
157    /*
158     * (c) Copyright 2009 University of Bristol
159     * All rights reserved.
160     *
161     * Redistribution and use in source and binary forms, with or without
162     * modification, are permitted provided that the following conditions
163     * are met:
164     * 1. Redistributions of source code must retain the above copyright
165     *    notice, this list of conditions and the following disclaimer.
166     * 2. Redistributions in binary form must reproduce the above copyright
167     *    notice, this list of conditions and the following disclaimer in the
168     *    documentation and/or other materials provided with the distribution.
169     * 3. The name of the author may not be used to endorse or promote products
170     *    derived from this software without specific prior written permission.
171     *
172     * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
173     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
174     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
175     * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
176     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
177     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
178     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
179     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
180     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
181     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
182     */