001    /*
002     * (c) Copyright 2009 University of Bristol
003     * All rights reserved.
004     * [See end of file]
005     */
006    
007    package net.rootdev.javardfa;
008    
009    import java.io.OutputStream;
010    import java.io.OutputStreamWriter;
011    import java.io.PrintWriter;
012    import java.io.UnsupportedEncodingException;
013    import java.io.Writer;
014    import java.util.regex.Pattern;
015    
016    
017    /**
018     * A pretty ropey NTriple serialiser.
019     * Advantages: streams, no dependencies.
020     *
021     * @author pldms
022     */
023    public class NTripleSink implements StatementSink {
024        private final PrintWriter out;
025        private final String[] comments;
026    
027        public NTripleSink(OutputStream os, String... comments) throws UnsupportedEncodingException {
028            this(new OutputStreamWriter(os, "US-ASCII"), comments); // N-Triples is 7-bit ascii
029        }
030    
031        public NTripleSink(Writer writer, String... comments) {
032            this.out = new PrintWriter(writer);
033            this.comments = comments;
034        }
035    
036        public void start() {
037            for (String line: comments) {
038                out.print("# ");
039                out.println(line);
040            }
041        }
042    
043        public void end() {
044            out.flush();
045        }
046    
047        public void addObject(String subject, String predicate, String object) {
048            out.print(toNode(subject));
049            out.print(toNode(predicate));
050            out.print(toNode(object));
051            out.println(".");
052        }
053    
054        public void addLiteral(String subject, String predicate, String lex, String lang, String datatype) {
055            out.print(toNode(subject));
056            out.print(toNode(predicate));
057            out.print(toLiteral(lex, lang, datatype));
058            out.println(".");
059        }
060    
061        public void addPrefix(String prefix, String uri) {}
062    
063        protected final String toNode(String node) {
064            if (node.startsWith("_:") || node.startsWith("?"))
065                return node + " ";
066            return "<" + node + "> ";
067        }
068    
069        protected final String toLiteral(String lex, String lang, String datatype) {
070            if (lang != null)
071                return quote(lex) + "@" + lang + " ";
072            if (datatype != null)
073                return quote(lex) + "^^<" + datatype + "> ";
074            return quote(lex) + " ";
075        }
076    
077        private Pattern quotePattern = Pattern.compile("\"");
078        protected final String quote(String lex) {
079            return "\"" + encode(lex) + "\"";
080        }
081    
082        protected final String encode(String s) {
083            StringBuilder b = new StringBuilder();
084            for (int i = 0; i < s.length(); i++) {
085                int c = s.codePointAt(i);
086                if (c <= 8) b.append(enc(c));
087                else if (c == '\t') b.append("\\t");
088                else if (c == '\n') b.append("\\n");
089                else if (c == '\r') b.append("\\r");
090                else if (c == '"')  b.append("\\\"");
091                else if (c == '\\') b.append("\\\\");
092                else if (c <= 127)  b.appendCodePoint(c);
093                else if (c <= 0xFFFF) b.append(enc(c));
094                else b.append(longenc(c));
095            }
096            return b.toString();
097        }
098    
099        protected final String enc(int codepoint) {
100            return String.format("\\u%04x", codepoint);
101        }
102    
103        protected final String longenc(int codepoint) {
104            return String.format("\\u%08x", codepoint);
105        }
106    }
107    
108    /*
109     * (c) Copyright 2009 University of Bristol
110     * All rights reserved.
111     *
112     * Redistribution and use in source and binary forms, with or without
113     * modification, are permitted provided that the following conditions
114     * are met:
115     * 1. Redistributions of source code must retain the above copyright
116     *    notice, this list of conditions and the following disclaimer.
117     * 2. Redistributions in binary form must reproduce the above copyright
118     *    notice, this list of conditions and the following disclaimer in the
119     *    documentation and/or other materials provided with the distribution.
120     * 3. The name of the author may not be used to endorse or promote products
121     *    derived from this software without specific prior written permission.
122     *
123     * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
124     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
125     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
126     * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
127     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
128     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
129     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
130     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
131     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
132     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
133     */