001 /*
002 * (c) Copyright 2009 University of Bristol
003 * All rights reserved.
004 * [See end of file]
005 */
006 package net.rootdev.javardfa;
007
008 import net.rootdev.javardfa.uri.URIExtractor10;
009 import net.rootdev.javardfa.uri.URIExtractor;
010 import net.rootdev.javardfa.uri.URIExtractor11;
011 import net.rootdev.javardfa.uri.IRIResolver;
012 import javax.xml.stream.XMLEventFactory;
013 import javax.xml.stream.XMLOutputFactory;
014 import nu.validator.htmlparser.common.XmlViolationPolicy;
015 import nu.validator.htmlparser.sax.HtmlParser;
016 import org.xml.sax.SAXException;
017 import org.xml.sax.XMLReader;
018 import org.xml.sax.helpers.XMLReaderFactory;
019
020 /**
021 * I use these in a few places. stuck here for simplicity
022 *
023 * @author pldms
024 */
025 public class ParserFactory {
026
027 public enum Format {
028
029 HTML, XHTML;
030
031 public static Format lookup(String format) {
032 if ("xhtml".equalsIgnoreCase(format)) {
033 return XHTML;
034 }
035 if ("html".equalsIgnoreCase(format)) {
036 return HTML;
037 }
038 return null;
039 }
040 }
041
042 /**
043 *
044 * @return An XMLReader with validation turned off
045 * @throws SAXException
046 */
047 public static XMLReader createNonvalidatingReader() throws SAXException {
048 XMLReader reader = XMLReaderFactory.createXMLReader();
049 reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
050 return reader;
051 }
052
053 /**
054 *
055 * @return An HTML 5 XMLReader set up to by fairly forgiving.
056 */
057 public static XMLReader createHTML5Reader() {
058 HtmlParser reader = new HtmlParser();
059 reader.setXmlPolicy(XmlViolationPolicy.ALLOW);
060 reader.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
061 reader.setMappingLangToXmlLang(false);
062 return reader;
063 }
064
065 /**
066 * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed
067 * to the StatementSink sink. Uses IRI resolver.
068 *
069 * @param sink
070 * @param format
071 * @return
072 * @throws SAXException
073 */
074 public static XMLReader createReaderForFormat(StatementSink sink,
075 Format format, Setting... settings) throws SAXException {
076 return createReaderForFormat(sink, format, new IRIResolver(), settings);
077 }
078
079 /**
080 * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed
081 * to the StatementSink sink.
082 *
083 * @param sink
084 * @param format
085 * @param resolver
086 * @return
087 * @throws SAXException
088 */
089 public static XMLReader createReaderForFormat(StatementSink sink,
090 Format format, Resolver resolver, Setting... settings) throws SAXException {
091 XMLReader reader = getReader(format);
092 boolean is11 = false;
093 for (Setting setting: settings) if (setting == Setting.OnePointOne) is11 = true;
094 URIExtractor extractor = (is11) ?
095 new URIExtractor11(resolver) : new URIExtractor10(resolver);
096 ProfileCollector profileCollector = (is11) ?
097 new SimpleProfileCollector() : ProfileCollector.EMPTY_COLLECTOR ;
098 Parser parser = getParser(format, sink, extractor, profileCollector);
099 for (Setting setting: settings) parser.enable(setting);
100 reader.setContentHandler(parser);
101 return reader;
102 }
103
104 private static XMLReader getReader(Format format) throws SAXException {
105 switch (format) {
106 case XHTML:
107 return ParserFactory.createNonvalidatingReader();
108 default:
109 return ParserFactory.createHTML5Reader();
110 }
111 }
112
113 private static Parser getParser(Format format, StatementSink sink,
114 URIExtractor extractor, ProfileCollector profileCollector) {
115 return getParser(format, sink, XMLOutputFactory.newInstance(),
116 XMLEventFactory.newInstance(), extractor, profileCollector);
117 }
118
119 private static Parser getParser(Format format, StatementSink sink,
120 XMLOutputFactory outputFactory, XMLEventFactory eventFactory,
121 URIExtractor extractor, ProfileCollector profileCollector) {
122 switch (format) {
123 case XHTML:
124 return new Parser(sink, outputFactory, eventFactory, extractor, profileCollector);
125 default:
126 Parser p = new Parser(sink, outputFactory, eventFactory, extractor, profileCollector);
127 p.enable(Setting.ManualNamespaces);
128 return p;
129 }
130 }
131 }
132
133 /*
134 * (c) Copyright 2009 University of Bristol
135 * All rights reserved.
136 *
137 * Redistribution and use in source and binary forms, with or without
138 * modification, are permitted provided that the following conditions
139 * are met:
140 * 1. Redistributions of source code must retain the above copyright
141 * notice, this list of conditions and the following disclaimer.
142 * 2. Redistributions in binary form must reproduce the above copyright
143 * notice, this list of conditions and the following disclaimer in the
144 * documentation and/or other materials provided with the distribution.
145 * 3. The name of the author may not be used to endorse or promote products
146 * derived from this software without specific prior written permission.
147 *
148 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
149 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
150 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
151 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
152 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
153 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
154 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
155 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
156 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
157 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
158 */