001    /*
002    New BSD license: http://opensource.org/licenses/bsd-license.php
003    
004    Copyright (c) 2009 Sun Microsystems, Inc.
005    901 San Antonio Road, Palo Alto, CA 94303 USA.
006    All rights reserved.
007    
008    
009    Redistribution and use in source and binary forms, with or without
010    modification, are permitted provided that the following conditions are met:
011    
012    - Redistributions of source code must retain the above copyright notice,
013    this list of conditions and the following disclaimer.
014    - Redistributions in binary form must reproduce the above copyright notice,
015    this list of conditions and the following disclaimer in the documentation
016    and/or other materials provided with the distribution.
017    - Neither the name of Sun Microsystems, Inc. nor the names of its contributors
018    may be used to endorse or promote products derived from this software
019    without specific prior written permission.
020    
021    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
022    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
023    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
024    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
025    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
026    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
027    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
028    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
029    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
030    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
031    POSSIBILITY OF SUCH DAMAGE.
032     */
033    package net.rootdev.javardfa;
034    
035    import java.io.IOException;
036    import java.io.InputStream;
037    import java.io.Reader;
038    import nu.validator.htmlparser.common.XmlViolationPolicy;
039    import nu.validator.htmlparser.sax.HtmlParser;
040    import org.slf4j.Logger;
041    import org.slf4j.LoggerFactory;
042    import org.openrdf.model.ValueFactory;
043    import org.openrdf.rio.ParseErrorListener;
044    import org.openrdf.rio.ParseLocationListener;
045    import org.openrdf.rio.RDFFormat;
046    import org.openrdf.rio.RDFHandler;
047    import org.openrdf.rio.RDFHandlerException;
048    import org.openrdf.rio.RDFParseException;
049    import org.openrdf.rio.RDFParser;
050    import org.xml.sax.InputSource;
051    import org.xml.sax.SAXException;
052    import org.xml.sax.XMLReader;
053    import org.xml.sax.helpers.XMLReaderFactory;
054    
055    /**
056     *
057     * @author Henry Story <henry.story@bblfish.net>
058     */
059    public abstract class SesameRDFaParser implements RDFParser {
060    
061       private static Logger log = LoggerFactory.getLogger(SesameRDFaParser.class);
062       ValueFactory valFact;
063       RDFHandler handler;
064       boolean verifyData = false;
065       private XMLReader xmlReader;
066       boolean stopAtFirstError = true;
067       private boolean preserveBNodeIds = false;
068    
069       public static class HTMLRDFaParser extends SesameRDFaParser {
070    
071          @Override
072          public XMLReader getReader() {
073             HtmlParser reader = new HtmlParser();
074             reader.setXmlPolicy(XmlViolationPolicy.ALLOW);
075             reader.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
076             reader.setMappingLangToXmlLang(false);
077             return reader;
078          }
079    
080          @Override
081          public void initParser(Parser parser) {
082             parser.enable(Setting.ManualNamespaces);
083          }
084    
085          @Override
086          public RDFFormat getRDFFormat() {
087             return RDFaHtmlParserFactory.rdfa_html_Format;
088          }
089       }
090    
091       public static class XHTMLRDFaParser extends SesameRDFaParser {
092    
093          @Override
094          public XMLReader getReader() throws SAXException {
095             XMLReader reader = XMLReaderFactory.createXMLReader();
096             reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
097             return reader;
098          }
099    
100          public RDFFormat getRDFFormat() {
101             return RDFaXHtmlParserFactory.rdfa_xhtml_Format;
102          }
103       }
104    
105       public void setValueFactory(ValueFactory valueFactory) {
106          this.valFact = valueFactory;
107       }
108    
109       public void setRDFHandler(RDFHandler handler) {
110          this.handler = handler;
111       }
112    
113       public void setParseErrorListener(ParseErrorListener el) {
114          throw new UnsupportedOperationException("Not supported yet.");
115       }
116    
117       public void setParseLocationListener(ParseLocationListener ll) {
118          throw new UnsupportedOperationException("Not supported yet.");
119       }
120    
121       public void setVerifyData(boolean verifyData) {
122          log.warn("not implemented setVerifyData(...) in " + this.getClass().getCanonicalName());
123    
124       }
125    
126       public void setPreserveBNodeIDs(boolean preserveBNodeIDs) {
127          log.warn("not implemented setPreserveBNodeIDs(...) in " + this.getClass().getCanonicalName());
128       }
129    
130       public void setStopAtFirstError(boolean stopAtFirstError) {
131          log.warn("not implemented setStopAtFirstError(...) in " + this.getClass().getCanonicalName());
132       }
133    
134       public void setDatatypeHandling(DatatypeHandling datatypeHandling) {
135          log.warn("not impemented setDatatypeHandling(...) yet in " + this.getClass().getCanonicalName());
136       }
137    
138       public void setReader(XMLReader reader) {
139          this.xmlReader = reader;
140       }
141    
142       protected XMLReader getReader() throws SAXException {
143          return xmlReader;
144       }
145    
146       protected void initParser(Parser parser) {
147       }
148    
149       public void parse(InputStream in, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
150          parse(new InputSource(in), baseURI);
151       }
152    
153       public void parse(Reader reader, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
154          parse(new InputSource(reader), baseURI);
155       }
156    
157       private void parse(InputSource in, String baseURI) throws IOException {
158          Parser parser = new Parser(new SesameStatementSink(valFact, handler));
159          parser.setBase(baseURI);
160          initParser(parser);
161          try {
162             XMLReader xreader = getReader();
163             xreader.setContentHandler(parser);
164             xreader.parse(in);
165          } catch (SAXException ex) {
166             throw new RuntimeException("SAX Error when parsing", ex);
167          }
168       }
169    }