1 // FIXME: add +proj and -proj to adjust project results
2 
3 // dmdi -g -debug -version=vps locate stemmer.d -oflocate_vps -version=scgi
4 
5 // my local config assumes this will be on port 9653
6 
7 module adrdox.locate;
8 
9 import arsd.postgres;
10 
11 import ps = PorterStemmer;
12 import arsd.cgi;
13 import arsd.dom;
14 import std.stdio;
15 import std.file;
16 import std.conv : to;
17 import std.algorithm : sort;
18 import std..string : toLower, replace, split;
19 
20 PostgreSql db_;
21 
22 enum projectAdjustment = 0;
23 
24 PostgreSql db() {
25 	if(db_ is null) {
26 		db_ = new PostgreSql("dbname=adrdox");
27 	}
28 	return db_;
29 }
30 
31 TermElement[] resultsByTerm(string term) {
32 	TermElement[] ret;
33 	foreach(row; db.query("SELECT d_symbols_id, score FROM auto_generated_tags WHERE tag = ? ORDER BY score DESC LIMIT 15", term))
34 		ret ~= TermElement(to!int(row[0]), to!int(row[1]));
35 	return ret;
36 }
37 
38 DeclElement getDecl(int i) {
39 	foreach(row; db.query("
40 		SELECT
41 			d_symbols.*,
42 			dub_package.url_name AS package_subdomain
43 		FROM
44 			d_symbols
45 		INNER JOIN
46 			package_version ON package_version.id = d_symbols.package_version_id
47 		INNER JOIN
48 			dub_package ON dub_package.id = package_version.dub_package_id
49 		WHERE
50 			d_symbols.id = ?
51 			AND
52 			is_latest = true
53 		", i)) {
54 		return DeclElement(row["fully_qualified_name"], row["summary"], row["url_name"], row["id"].to!int, "", 0, row["package_subdomain"]);
55 	}
56 	return DeclElement.init;
57 }
58 
59 static struct TermElement {
60 	int declId;
61 	int score;
62 }
63 
64 static struct DeclElement {
65 	string name;
66 	string description; // actually HTML
67 	string link;
68 	int id;
69 	string type;
70 	int parent;
71 	string packageName;
72 }
73 
74 static struct Magic {
75 	int declId;
76 	int score;
77 	DeclElement decl;
78 }
79 
80 Magic[] getPossibilities(string search) {
81 	int[int] declScores;
82 
83 	int[int] declHits;
84 
85 	ps.PorterStemmer s;
86 
87 	auto terms = search.split(" ");// ~ search.split(".");
88 	// filter empty terms
89 	for(int i = 0; i < terms.length; i++) {
90 		if(terms[i].length == 0) {
91 			terms[i] = terms[$-1];
92 			terms = terms[0 .. $-1];
93 			i--;
94 		}
95 	}
96 
97 	void addHit(TermElement item, size_t idx) {
98 		if(idx == 0) {
99 			declScores[item.declId] += item.score;
100 			return;
101 		}
102 		if(item.declId in declScores) {
103 			declScores[item.declId] += 25; // hit both terms
104 			declScores[item.declId] += item.score;
105 		} else {
106 			// only hit one term...
107 			declScores[item.declId] += item.score / 2;
108 		}
109 	}
110 
111 	// On each term, we want to check for exact match and fuzzy match / natural language match.
112 	// FIXME: if something matches both it should be really strong. see time_t vs "time_t std.datetime"
113 	foreach(idx, term; terms) {
114 		assert(term.length > 0);
115 
116 		foreach(item; resultsByTerm(term)) {
117 			addHit(item, idx);
118 			declHits[item.declId] |= 1 << idx;
119 		}
120 		auto l = term.toLower;
121 		if(l != term)
122 			foreach(item; resultsByTerm(l)) {
123 				addHit(item, idx);
124 				declHits[item.declId] |= 1 << idx;
125 			}
126 		auto st = s.stem(term.toLower).idup;
127 		if(st != l)
128 			foreach(item; resultsByTerm(st)) {
129 				addHit(item, idx);
130 				declHits[item.declId] |= 1 << idx;
131 			}
132 	}
133 
134 	Magic[] magic;
135 
136 	foreach(decl, score; declScores) {
137 		auto hits = declHits[decl];
138 		foreach(idx, term; terms) {
139 			if(!(hits & (1 << idx)))
140 				score /= 2;
141 		}
142 		magic ~= Magic(decl, score + projectAdjustment, getDecl(decl));
143 	}
144 
145 	if(magic.length == 0) {
146 		foreach(term; terms) {
147 			if(term.length == 0) continue;
148 			term = term.toLower();
149 			foreach(row; db.query("SELECT id, fully_qualified_name FROM d_symbols WHERE fully_qualified_name LIKE ?", term[0 .. 1] ~ "%")) {
150 				string name = row[1];
151 				int id = row[0].to!int;
152 				import std.algorithm;
153 				name = name.toLower;
154 				auto dist = cast(int) levenshteinDistance(name, term);
155 				if(dist <= 2)
156 					magic ~= Magic(id, projectAdjustment + (3 - dist), getDecl(id));
157 			}
158 		}
159 		}
160 
161 		// boosts based on topography
162 		foreach(ref item; magic) {
163 			auto decl = item.decl;
164 			if(decl.type == "module") {
165 				// if it is a module, give it moar points
166 				item.score += 8;
167 				continue;
168 			}
169 			if(getDecl(decl.id).type == "module") {
170 				item.score += 5;
171 			}
172 		}
173 
174 		return magic;
175 	}
176 
177 import std.uri;
178 
179 void searcher(Cgi cgi) {
180 
181 	auto search = cgi.request("q", cgi.request("searchTerm", cgi.queryString));
182 
183 	version(vps) {
184 		string path = cgi.requestUri;
185 
186 		auto q = path.indexOf("?");
187 		if(q != -1) {
188 			path = path[0 .. q];
189 		}
190 
191 		if(path.length && path[0] == '/')
192 			path = path[1 .. $];
193 
194                 if(path.length == 0 && search.length == 0) {
195 			import std.file;
196 
197 			cgi.write(std.file.read("/dpldocs-build/search-home.html"), true);
198 			return;
199                 }
200 
201 
202 		if(path == "script.js") {
203 			import std.file;
204 			cgi.setResponseContentType("text/javascript");
205 			cgi.write(std.file.read("/dpldocs-build/script.js"), true);
206 			return;
207 
208 		}
209 
210 		if(path == "style.css") {
211 			import std.file;
212 			cgi.setResponseContentType("text/css");
213 			cgi.write(std.file.read("/dpldocs-build/style.css"), true);
214 			return;
215 		}
216 	}
217 
218 	alias searchTerm = search;
219 
220 	if(search.length == 0 && path.length)
221 		search = path;
222 
223 	if(search.length == 0) {
224 		cgi.setResponseLocation("/");
225 		return;
226 	}
227 	auto parts = search.split(" ");
228 	switch(parts[0].toLower()) {
229 		case "auto-ref-return-function-prototype":
230 			cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-ref-functions");
231 			return;
232 		case "auto-function-return-prototype":
233 			cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-functions");
234 			return;
235 		case "ref-function-return-prototype":
236 			cgi.setResponseLocation("http://dlang.org/spec/function.html#ref-functions");
237 			return;
238 		case "bugzilla":
239 			auto url = "http://d.puremagic.com/issues/";
240 			if(parts.length > 1)
241 				url ~= "show_bug.cgi?id=" ~ parts[1];
242 			cgi.setResponseLocation(url);
243 			return;
244 		case "dip":
245 			auto url = "http://wiki.dlang.org/DIPs";
246 			if(parts.length > 1)
247 				url = "http://wiki.dlang.org/DIP" ~ parts[1];
248 			cgi.setResponseLocation(url);
249 			return;
250 		case "wiki":
251 			auto url = "http://wiki.dlang.org/";
252 			if(parts.length > 1)
253 				url ~= "search="~std.uri.encodeComponent(join(parts[1..$], "
254 							"))~"&go=Go&title=Special%3ASearch";
255 			cgi.setResponseLocation(url);
256 			return;
257 		case "faqs":
258 		case "faq":
259 			cgi.setResponseLocation("http://wiki.dlang.org/FAQs");
260 			return;
261 		case "template-alias-parameter":
262 			cgi.setResponseLocation("https://dlang.org/spec/template.html#aliasparameters");
263 			return;
264 		case "is-expression":
265 			cgi.setResponseLocation("https://dlang.org/spec/expression.html#IsExpression");
266 			return;
267 		case "typeof-expression":
268 			cgi.setResponseLocation("https://dlang.org/spec/declaration.html#Typeof");
269 			return;
270 		case "oldwiki":
271 			auto url = "http://prowiki.org/wiki4d/wiki.cgi";
272 			if(parts.length > 1)
273 				url ~= "?formpage=Search&id=Search&search=" ~ std.uri.
274 					encodeComponent(join(parts[1..$], " "));
275 			cgi.setResponseLocation(url);
276 			return;
277 		default:
278 			// just continue
279 			version(vps) { } else {
280 			if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".1.html")) {
281 				cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".1.html");
282 				return;
283 			}
284 			if(std.file.exists("/var/www/dpldocs.info/experimental-docs/" ~ searchTerm ~ ".html")) {
285 				cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".html");
286 				return;
287 			}
288 				// redirect to vps
289 				if("local" !in cgi.get)
290 				cgi.setResponseLocation("//search.dpldocs.info/?q=" ~ std.uri.encodeComponent(searchTerm));
291 			}
292 	}
293 
294 
295 	Magic[] magic = getPossibilities(search);
296 
297 	sort!((a, b) => a.score > b.score)(magic);
298 
299 	// adjustments based on previously showing results
300 	{
301 		bool[int] alreadyPresent;
302 		foreach(ref item; magic) {
303 			auto decl = item.decl;
304 			if(decl.parent in alreadyPresent)
305 				item.score -= 8;
306 			alreadyPresent[decl.id] = true;
307 		}
308 	}
309 
310 	auto document = new Document();
311 	version(vps) {
312 		import std.file;
313 		document.parseUtf8(readText("/dpldocs-build/skeleton.html"), true, true);
314 		document.title = "Dub Documentation Search";
315 	} else
316 		document.parseUtf8(import("skeleton.html"), true, true);
317 	document.title = "Search Results";
318 
319 	auto form = document.requireElementById!Form("search");
320 	form.setValue("searchTerm", search);
321 
322 	version(vps) {
323 		// intentionally blank
324 	} else {
325 		auto l = document.requireSelector("link");
326 		l.href = "/experimental-docs/" ~ l.href;
327 		l = document.requireSelector("script[src]");
328 		l.src = "/experimental-docs/" ~ l.src;
329 	}
330 
331 	auto pc = document.requireSelector("#page-content");
332 	pc.addChild("h1", "Search Results");
333 	auto ml = pc.addChild("dl");
334 	ml.className = "member-list";
335 
336 	string getFqn(DeclElement i) {
337 		string n;
338 		while(true) {
339 			if(n) n = "." ~ n;
340 			n = i.name ~ n;
341 			if(i.type == "module")
342 				break;
343 			if(i.parent == 0)
344 				break;
345 			i = getDecl(i.parent);
346 			if(i.id == 0)
347 				break;
348 		}
349 		return n;
350 	}
351 
352 	bool[string] alreadyPresent;
353 	int count = 0;
354 	foreach(idx, item; magic) {
355 		auto decl = item.decl;
356 		if(decl.id == 0) continue; // should never happen
357 		version(vps)
358 			auto link = "http://"~decl.packageName~".dpldocs.info/" ~ decl.link;
359 		else
360 			auto link = "http://dpldocs.info/experimental-docs/" ~ decl.link;
361 		auto fqn = getFqn(decl);
362 		if(fqn in alreadyPresent)
363 			continue;
364 		alreadyPresent[fqn] = true;
365 		auto dt = ml.addChild("dt");
366 		dt.addClass("search-result");
367 		dt.addChild("span", decl.packageName).addClass("project-name");
368 		dt.addChild("br");
369 		dt.addChild("a", fqn.replace(".", ".\u200B"), link);
370 		dt.dataset.score = to!string(item.score);
371 		auto html = decl.description;
372 		//auto d = new Document(html);
373 		//writeln(d.root.innerText.replace("\n", " "));
374 		//writeln();
375 
376 		// FIXME fix relative links from here
377 		ml.addChild("dd", Html(html));
378 		count++;
379 
380 		if(count >= 20)
381 			break;
382 	}
383 
384 	cgi.write(document.toString, true);
385 }
386 
387 mixin GenericMain!(searcher);
388