001    /*
002     * (c) Copyright 2009 University of Bristol
003     * All rights reserved.
004     * [See end of file]
005     */
006    package net.rootdev.javardfa;
007    
008    import net.rootdev.javardfa.uri.URIExtractor10;
009    import net.rootdev.javardfa.uri.URIExtractor;
010    import net.rootdev.javardfa.uri.URIExtractor11;
011    import net.rootdev.javardfa.uri.IRIResolver;
012    import javax.xml.stream.XMLEventFactory;
013    import javax.xml.stream.XMLOutputFactory;
014    import nu.validator.htmlparser.common.XmlViolationPolicy;
015    import nu.validator.htmlparser.sax.HtmlParser;
016    import org.xml.sax.SAXException;
017    import org.xml.sax.XMLReader;
018    import org.xml.sax.helpers.XMLReaderFactory;
019    
020    /**
021     * I use these in a few places. stuck here for simplicity
022     *
023     * @author pldms
024     */
025    public class ParserFactory {
026    
027        public enum Format {
028    
029            HTML, XHTML;
030    
031            public static Format lookup(String format) {
032                if ("xhtml".equalsIgnoreCase(format)) {
033                    return XHTML;
034                }
035                if ("html".equalsIgnoreCase(format)) {
036                    return HTML;
037                }
038                return null;
039            }
040        }
041    
042        /**
043         *
044         * @return An XMLReader with validation turned off
045         * @throws SAXException
046         */
047        public static XMLReader createNonvalidatingReader() throws SAXException {
048            XMLReader reader = XMLReaderFactory.createXMLReader();
049            reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
050            return reader;
051        }
052    
053        /**
054         *
055         * @return An HTML 5 XMLReader set up to by fairly forgiving.
056         */
057        public static XMLReader createHTML5Reader() {
058            HtmlParser reader = new HtmlParser();
059            reader.setXmlPolicy(XmlViolationPolicy.ALLOW);
060            reader.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
061            reader.setMappingLangToXmlLang(false);
062            return reader;
063        }
064    
065        /**
066         * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed
067         * to the StatementSink sink. Uses IRI resolver.
068         *
069         * @param sink
070         * @param format
071         * @return
072         * @throws SAXException
073         */
074        public static XMLReader createReaderForFormat(StatementSink sink,
075                Format format, Setting... settings) throws SAXException {
076            return createReaderForFormat(sink, format, new IRIResolver(), settings);
077        }
078    
079        /**
080         * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed
081         * to the StatementSink sink.
082         *
083         * @param sink
084         * @param format
085         * @param resolver
086         * @return
087         * @throws SAXException
088         */
089        public static XMLReader createReaderForFormat(StatementSink sink,
090                Format format, Resolver resolver, Setting... settings) throws SAXException {
091            XMLReader reader = getReader(format);
092            boolean is11 = false;
093            for (Setting setting: settings) if (setting == Setting.OnePointOne) is11 = true;
094            URIExtractor extractor = (is11) ?
095                new URIExtractor11(resolver) : new URIExtractor10(resolver);
096            ProfileCollector profileCollector = (is11) ?
097                new SimpleProfileCollector() : ProfileCollector.EMPTY_COLLECTOR ;
098            Parser parser = getParser(format, sink, extractor, profileCollector);
099            for (Setting setting: settings) parser.enable(setting);
100            reader.setContentHandler(parser);
101            return reader;
102        }
103    
104        private static XMLReader getReader(Format format) throws SAXException {
105            switch (format) {
106                case XHTML:
107                    return ParserFactory.createNonvalidatingReader();
108                default:
109                    return ParserFactory.createHTML5Reader();
110            }
111        }
112    
113        private static Parser getParser(Format format, StatementSink sink,
114                URIExtractor extractor, ProfileCollector profileCollector) {
115            return getParser(format, sink, XMLOutputFactory.newInstance(), 
116                    XMLEventFactory.newInstance(), extractor, profileCollector);
117        }
118    
119        private static Parser getParser(Format format, StatementSink sink,
120                XMLOutputFactory outputFactory, XMLEventFactory eventFactory,
121                URIExtractor extractor, ProfileCollector profileCollector) {
122            switch (format) {
123                case XHTML:
124                    return new Parser(sink, outputFactory, eventFactory, extractor, profileCollector);
125                default:
126                    Parser p = new Parser(sink, outputFactory, eventFactory, extractor, profileCollector);
127                    p.enable(Setting.ManualNamespaces);
128                    return p;
129            }
130        }
131    }
132    
133    /*
134     * (c) Copyright 2009 University of Bristol
135     * All rights reserved.
136     *
137     * Redistribution and use in source and binary forms, with or without
138     * modification, are permitted provided that the following conditions
139     * are met:
140     * 1. Redistributions of source code must retain the above copyright
141     *    notice, this list of conditions and the following disclaimer.
142     * 2. Redistributions in binary form must reproduce the above copyright
143     *    notice, this list of conditions and the following disclaimer in the
144     *    documentation and/or other materials provided with the distribution.
145     * 3. The name of the author may not be used to endorse or promote products
146     *    derived from this software without specific prior written permission.
147     *
148     * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
149     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
150     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
151     * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
152     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
153     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
154     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
155     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
156     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
157     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
158     */