View Javadoc
1   package org.argeo.cms.internal.http;
2   
3   import static javax.jcr.Property.JCR_DESCRIPTION;
4   import static javax.jcr.Property.JCR_LAST_MODIFIED;
5   import static javax.jcr.Property.JCR_TITLE;
6   
7   import java.io.IOException;
8   import java.io.PrintWriter;
9   import java.net.MalformedURLException;
10  import java.net.URL;
11  import java.security.PrivilegedExceptionAction;
12  import java.util.Calendar;
13  import java.util.Collection;
14  
15  import javax.jcr.Node;
16  import javax.jcr.NodeIterator;
17  import javax.jcr.Repository;
18  import javax.jcr.RepositoryException;
19  import javax.jcr.Session;
20  import javax.security.auth.Subject;
21  import javax.security.auth.login.LoginContext;
22  import javax.security.auth.login.LoginException;
23  import javax.servlet.ServletException;
24  import javax.servlet.http.HttpServlet;
25  import javax.servlet.http.HttpServletRequest;
26  import javax.servlet.http.HttpServletResponse;
27  
28  import org.argeo.api.NodeConstants;
29  import org.argeo.api.NodeUtils;
30  import org.argeo.cms.CmsException;
31  import org.argeo.jcr.JcrUtils;
32  import org.osgi.framework.BundleContext;
33  import org.osgi.framework.FrameworkUtil;
34  import org.osgi.framework.ServiceReference;
35  
36  public class LinkServlet extends HttpServlet {
37  	private final BundleContext bc = FrameworkUtil.getBundle(getClass()).getBundleContext();
38  
39  	private static final long serialVersionUID = 3749990143146845708L;
40  
41  	@Override
42  	protected void service(HttpServletRequest request, HttpServletResponse response)
43  			throws ServletException, IOException {
44  		String path = request.getPathInfo();
45  		String userAgent = request.getHeader("User-Agent").toLowerCase();
46  		boolean isBot = false;
47  		// boolean isCompatibleBrowser = false;
48  		if (userAgent.contains("bot") || userAgent.contains("facebook") || userAgent.contains("twitter")) {
49  			isBot = true;
50  		}
51  		// else if (userAgent.contains("webkit") ||
52  		// userAgent.contains("gecko") || userAgent.contains("firefox")
53  		// || userAgent.contains("msie") || userAgent.contains("chrome") ||
54  		// userAgent.contains("chromium")
55  		// || userAgent.contains("opera") || userAgent.contains("browser"))
56  		// {
57  		// isCompatibleBrowser = true;
58  		// }
59  
60  		if (isBot) {
61  			// log.warn("# BOT " + request.getHeader("User-Agent"));
62  			canonicalAnswer(request, response, path);
63  			return;
64  		}
65  
66  		// if (isCompatibleBrowser && log.isTraceEnabled())
67  		// log.trace("# BWS " + request.getHeader("User-Agent"));
68  		redirectTo(response, "/#" + path);
69  	}
70  
71  	private void redirectTo(HttpServletResponse response, String location) {
72  		response.setHeader("Location", location);
73  		response.setStatus(HttpServletResponse.SC_FOUND);
74  	}
75  
76  	// private boolean canonicalAnswerNeededBy(HttpServletRequest request) {
77  	// String userAgent = request.getHeader("User-Agent").toLowerCase();
78  	// return userAgent.startsWith("facebookexternalhit/");
79  	// }
80  
81  	/** For bots which don't understand RWT. */
82  	private void canonicalAnswer(HttpServletRequest request, HttpServletResponse response, String path) {
83  		Session session = null;
84  		try {
85  			PrintWriter writer = response.getWriter();
86  			session = Subject.doAs(anonymousLogin(), new PrivilegedExceptionAction<Session>() {
87  
88  				@Override
89  				public Session run() throws Exception {
90  					Collection<ServiceReference<Repository>> srs = bc.getServiceReferences(Repository.class,
91  							"(" + NodeConstants.CN + "=" + NodeConstants.EGO_REPOSITORY + ")");
92  					Repository repository = bc.getService(srs.iterator().next());
93  					return repository.login();
94  				}
95  
96  			});
97  			Node node = session.getNode(path);
98  			String title = node.hasProperty(JCR_TITLE) ? node.getProperty(JCR_TITLE).getString() : node.getName();
99  			String desc = node.hasProperty(JCR_DESCRIPTION) ? node.getProperty(JCR_DESCRIPTION).getString() : null;
100 			Calendar lastUpdate = node.hasProperty(JCR_LAST_MODIFIED) ? node.getProperty(JCR_LAST_MODIFIED).getDate()
101 					: null;
102 			String url = getCanonicalUrl(node, request);
103 			String imgUrl = null;
104 			// TODO support images
105 //			loop: for (NodeIterator it = node.getNodes(); it.hasNext();) {
106 //				// Takes the first found cms:image
107 //				Node child = it.nextNode();
108 //				if (child.isNodeType(CMS_IMAGE)) {
109 //					imgUrl = getDataUrl(child, request);
110 //					break loop;
111 //				}
112 //			}
113 			StringBuilder buf = new StringBuilder();
114 			buf.append("<html>");
115 			buf.append("<head>");
116 			writeMeta(buf, "og:title", escapeHTML(title));
117 			writeMeta(buf, "og:type", "website");
118 			buf.append("<meta name='twitter:card' content='summary' />");
119 			buf.append("<meta name='twitter:site' content='@argeo_org' />");
120 			writeMeta(buf, "og:url", url);
121 			if (desc != null)
122 				writeMeta(buf, "og:description", escapeHTML(desc));
123 			if (imgUrl != null)
124 				writeMeta(buf, "og:image", imgUrl);
125 			if (lastUpdate != null)
126 				writeMeta(buf, "og:updated_time", Long.toString(lastUpdate.getTime().getTime()));
127 			buf.append("</head>");
128 			buf.append("<body>");
129 			buf.append("<p><b>!! This page is meant for indexing robots, not for real people," + " visit <a href='/#")
130 					.append(path).append("'>").append(escapeHTML(title)).append("</a> instead.</b></p>");
131 			writeCanonical(buf, node);
132 			buf.append("</body>");
133 			buf.append("</html>");
134 			writer.print(buf.toString());
135 
136 			response.setHeader("Content-Type", "text/html");
137 			writer.flush();
138 		} catch (Exception e) {
139 			throw new CmsException("Cannot write canonical answer", e);
140 		} finally {
141 			JcrUtils.logoutQuietly(session);
142 		}
143 	}
144 
145 	/**
146 	 * From http://stackoverflow.com/questions/1265282/recommended-method-for-
147 	 * escaping-html-in-java (+ escaping '). TODO Use
148 	 * org.apache.commons.lang.StringEscapeUtils
149 	 */
150 	private String escapeHTML(String s) {
151 		StringBuilder out = new StringBuilder(Math.max(16, s.length()));
152 		for (int i = 0; i < s.length(); i++) {
153 			char c = s.charAt(i);
154 			if (c > 127 || c == '\'' || c == '"' || c == '<' || c == '>' || c == '&') {
155 				out.append("&#");
156 				out.append((int) c);
157 				out.append(';');
158 			} else {
159 				out.append(c);
160 			}
161 		}
162 		return out.toString();
163 	}
164 
165 	private void writeMeta(StringBuilder buf, String tag, String value) {
166 		buf.append("<meta property='").append(tag).append("' content='").append(value).append("'/>");
167 	}
168 
169 	private void writeCanonical(StringBuilder buf, Node node) throws RepositoryException {
170 		buf.append("<div>");
171 		if (node.hasProperty(JCR_TITLE))
172 			buf.append("<p>").append(node.getProperty(JCR_TITLE).getString()).append("</p>");
173 		if (node.hasProperty(JCR_DESCRIPTION))
174 			buf.append("<p>").append(node.getProperty(JCR_DESCRIPTION).getString()).append("</p>");
175 		NodeIterator children = node.getNodes();
176 		while (children.hasNext()) {
177 			writeCanonical(buf, children.nextNode());
178 		}
179 		buf.append("</div>");
180 	}
181 
182 	// DATA
183 	private StringBuilder getServerBaseUrl(HttpServletRequest request) {
184 		try {
185 			URL url = new URL(request.getRequestURL().toString());
186 			StringBuilder buf = new StringBuilder();
187 			buf.append(url.getProtocol()).append("://").append(url.getHost());
188 			if (url.getPort() != -1)
189 				buf.append(':').append(url.getPort());
190 			return buf;
191 		} catch (MalformedURLException e) {
192 			throw new CmsException("Cannot extract server base URL from " + request.getRequestURL(), e);
193 		}
194 	}
195 
196 	private String getDataUrl(Node node, HttpServletRequest request) throws RepositoryException {
197 		try {
198 			StringBuilder buf = getServerBaseUrl(request);
199 			buf.append(NodeUtils.getDataPath(NodeConstants.EGO_REPOSITORY, node));
200 			return new URL(buf.toString()).toString();
201 		} catch (MalformedURLException e) {
202 			throw new CmsException("Cannot build data URL for " + node, e);
203 		}
204 	}
205 
206 	// public static String getDataPath(Node node) throws
207 	// RepositoryException {
208 	// assert node != null;
209 	// String userId = node.getSession().getUserID();
210 	//// if (log.isTraceEnabled())
211 	//// log.trace(userId + " : " + node.getPath());
212 	// StringBuilder buf = new StringBuilder();
213 	// boolean isAnonymous =
214 	// userId.equalsIgnoreCase(NodeConstants.ROLE_ANONYMOUS);
215 	// if (isAnonymous)
216 	// buf.append(WEBDAV_PUBLIC);
217 	// else
218 	// buf.append(WEBDAV_PRIVATE);
219 	// Session session = node.getSession();
220 	// Repository repository = session.getRepository();
221 	// String cn;
222 	// if (repository.isSingleValueDescriptor(NodeConstants.CN)) {
223 	// cn = repository.getDescriptor(NodeConstants.CN);
224 	// } else {
225 	//// log.warn("No cn defined in repository, using " +
226 	// NodeConstants.NODE);
227 	// cn = NodeConstants.NODE;
228 	// }
229 	// return
230 	// buf.append('/').append(cn).append('/').append(session.getWorkspace().getName()).append(node.getPath())
231 	// .toString();
232 	// }
233 
234 	private String getCanonicalUrl(Node node, HttpServletRequest request) throws RepositoryException {
235 		try {
236 			StringBuilder buf = getServerBaseUrl(request);
237 			buf.append('/').append('!').append(node.getPath());
238 			return new URL(buf.toString()).toString();
239 		} catch (MalformedURLException e) {
240 			throw new CmsException("Cannot build data URL for " + node, e);
241 		}
242 		// return request.getRequestURL().append('!').append(node.getPath())
243 		// .toString();
244 	}
245 
246 	private Subject anonymousLogin() {
247 		Subject subject = new Subject();
248 		LoginContext lc;
249 		try {
250 			lc = new LoginContext(NodeConstants.LOGIN_CONTEXT_ANONYMOUS, subject);
251 			lc.login();
252 			return subject;
253 		} catch (LoginException e) {
254 			throw new CmsException("Cannot login as anonymous", e);
255 		}
256 	}
257 
258 }