1 // FIXME: add +proj and -proj to adjust project results
2 
3 // my local config assumes this will be on port 9653
4 
5 module adrdox.locate;
6 
7 import arsd.postgres;
8 
9 //  # dpldocs: if one request, go right to it. and split camel case and ry rearranging words. File.size returned nothing
10 
11 import ps = PorterStemmer;
12 import arsd.cgi;
13 import arsd.dom;
14 import std.stdio;
15 import std.file;
16 import std.conv : to;
17 import std.algorithm : sort;
18 import std.string : toLower, replace, split;
19 
20 PostgreSql db_;
21 
22 PostgreSql db() {
23 	if(db_ is null)
24 		db_ = new PostgreSql("dbname=dpldocs user=me");
25 	return db_;
26 }
27 
28 class ProjectSearcher {
29 	int projectId;
30 	this(string path, string name, int projectAdjustment) {
31 
32 		//foreach(row; db.query("SELECT id FROM projects WHERE name = ?", name))
33 			//projectId = to!int(row[0]);
34 
35 		projectId = 1;
36 
37 		this.projectName = name;
38 		this.projectAdjustment = projectAdjustment;
39 	}
40 
41 	string projectName;
42 	int projectAdjustment = 0;
43 
44 	TermElement[] resultsByTerm(string term) {
45 		TermElement[] ret;
46 		// FIXME: project id?!?!?
47 		foreach(row; db.query("SELECT declId, score FROM terms WHERE term = ? ORDER BY score DESC LIMIT 15", term))
48 			ret ~= TermElement(to!int(row[0]), to!int(row[1]));
49 		return ret;
50 	}
51 
52 	DeclElement getDecl(int i) {
53 		foreach(row; db.query("SELECT * FROM decls WHERE id = ? AND project_id = ?", i, projectId)) {
54 			return DeclElement(row["name"], row["description"], row["link"], row["id"].to!int, row["type"], row["parent"].length ? row["parent"].to!int : 0);
55 		}
56 		return DeclElement.init;
57 	}
58 
59 	static struct TermElement {
60 		int declId;
61 		int score;
62 	}
63 
64 	static struct DeclElement {
65 		string name;
66 		string description; // actually HTML
67 		string link;
68 		int id;
69 		string type;
70 		int parent;
71 	}
72 
73 	static struct Magic {
74 		int declId;
75 		int score;
76 		DeclElement decl;
77 		ProjectSearcher searcher;
78 	}
79 
80 	Magic[] getPossibilities(string search) {
81 		int[int] declScores;
82 
83 		int[int] declHits;
84 
85 		ps.PorterStemmer s;
86 
87 		auto terms = search.split(" ");// ~ search.split(".");
88 		// filter empty terms
89 		for(int i = 0; i < terms.length; i++) {
90 			if(terms[i].length == 0) {
91 				terms[i] = terms[$-1];
92 				terms = terms[0 .. $-1];
93 				i--;
94 			}
95 		}
96 
97 		void addHit(TermElement item, size_t idx) {
98 			if(idx == 0) {
99 				declScores[item.declId] += item.score;
100 				return;
101 			}
102 			if(item.declId in declScores) {
103 				declScores[item.declId] += 25; // hit both terms
104 				declScores[item.declId] += item.score;
105 			} else {
106 				// only hit one term...
107 				declScores[item.declId] += item.score / 2;
108 			}
109 		}
110 
111 		// On each term, we want to check for exact match and fuzzy match / natural language match.
112 		// FIXME: if something matches both it should be really strong. see time_t vs "time_t std.datetime"
113 		foreach(idx, term; terms) {
114 			assert(term.length > 0);
115 
116 			foreach(item; resultsByTerm(term)) {
117 				addHit(item, idx);
118 				declHits[item.declId] |= 1 << idx;
119 			}
120 			auto l = term.toLower;
121 			if(l != term)
122 			foreach(item; resultsByTerm(l)) {
123 				addHit(item, idx);
124 				declHits[item.declId] |= 1 << idx;
125 			}
126 			auto st = s.stem(term.toLower).idup;
127 			if(st != l)
128 			foreach(item; resultsByTerm(st)) {
129 				addHit(item, idx);
130 				declHits[item.declId] |= 1 << idx;
131 			}
132 		}
133 
134 		Magic[] magic;
135 
136 		foreach(decl, score; declScores) {
137 			auto hits = declHits[decl];
138 			foreach(idx, term; terms) {
139 				if(!(hits & (1 << idx)))
140 					score /= 2;
141 			}
142 			magic ~= Magic(decl, score + projectAdjustment, getDecl(decl), this);
143 		}
144 
145 		if(magic.length == 0) {
146 			foreach(term; terms) {
147 				if(term.length == 0) continue;
148 				term = term.toLower();
149 				//foreach(row; db.query("SELECT id, term FROM terms WHERE score >= 10")) {
150 				foreach(row; db.query("SELECT id, name FROM decls WHERE name LIKE ?", term[0 .. 1] ~ "%")) {
151 					string name = row[1];
152 					int id = row[0].to!int;
153 					import std.algorithm;
154 					name = name.toLower;
155 					auto dist = cast(int) levenshteinDistance(name, term);
156 					if(dist <= 2)
157 						magic ~= Magic(id, projectAdjustment + (3 - dist), getDecl(id), this);
158 				}
159 			}
160 		}
161 
162 		// boosts based on topography
163 		foreach(ref item; magic) {
164 			auto decl = item.decl;
165 			if(decl.type == "module") {
166 				// if it is a module, give it moar points
167 				item.score += 8;
168 				continue;
169 			}
170 			if(getDecl(decl.id).type == "module") {
171 				item.score += 5;
172 			}
173 		}
174 
175 		return magic;
176 	}
177 
178 }
179 
180 __gshared ProjectSearcher[] projectSearchers;
181 
182 shared static this() {
183 	version(vps) {
184 		version(embedded_httpd)
185 			processPoolSize = 2;
186 
187 		import std.file;
188 
189 		foreach(dirName; dirEntries("/dpldocs/", SpanMode.shallow)) {
190 			string filename;
191 			filename = dirName ~ "/master/adrdox-generated/search-results.html.gz";
192 			if(!exists(filename)) {
193 				filename = null;
194 				foreach(fn; dirEntries(dirName, "search-results.html.gz", SpanMode.depth)) {
195 					filename = fn;
196 					break;
197 				}
198 			}
199 
200 			if(filename.length) {
201 				try {
202 				projectSearchers ~= new ProjectSearcher(filename, dirName["/dpldocs/".length .. $], 0);
203 				import std.stdio; writeln("Loading ", filename);
204 				} catch(Exception e) {
205 				import std.stdio; writeln("FAILED ", filename, "\n", e);
206 
207 				}
208 			}
209 		}
210 
211 		import std.stdio;
212 		writeln("Ready");
213 
214 	} else {
215 		projectSearchers ~= new ProjectSearcher("experimental-docs/search-results.html", "", 5);
216 		//projectSearchers ~= new ProjectSearcher("experimental-docs/std.xml", "Standard Library", 5);
217 		//projectSearchers ~= new ProjectSearcher("experimental-docs/arsd.xml", "arsd", 4);
218 		//projectSearchers ~= new ProjectSearcher("experimental-docs/vibe.xml", "Vibe.d", 0);
219 		//projectSearchers ~= new ProjectSearcher("experimental-docs/dmd.xml", "DMD", 0);
220 	}
221 }
222 
223 import std.uri;
224 
225 void searcher(Cgi cgi) {
226 
227 	version(vps) {
228 		string path = cgi.requestUri;
229 
230 		auto q = path.indexOf("?");
231 		if(q != -1) {
232 			path = path[0 .. q];
233 		}
234 
235 		if(path.length && path[0] == '/')
236 			path = path[1 .. $];
237 
238 
239 
240 		if(path == "script.js") {
241 			import std.file;
242 			cgi.setResponseContentType("text/javascript");
243 			cgi.write(std.file.read("/dpldocs-build/script.js"), true);
244 			return;
245 
246 		}
247 
248 		if(path == "style.css") {
249 			import std.file;
250 			cgi.setResponseContentType("text/css");
251 			cgi.write(std.file.read("/dpldocs-build/style.css"), true);
252 			return;
253 		}
254 	}
255 
256 	auto search = cgi.request("q", cgi.request("searchTerm", cgi.queryString));
257 	alias searchTerm = search;
258 
259 	if(search.length == 0) {
260 		cgi.setResponseLocation("/");
261 		return;
262 	}
263 	auto parts = search.split(" ");
264 	switch(parts[0].toLower()) {
265 		case "auto-ref-return-function-prototype":
266 			cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-ref-functions");
267 			return;
268 		case "auto-function-return-prototype":
269 			cgi.setResponseLocation("http://dlang.org/spec/function.html#auto-functions");
270 			return;
271 		case "ref-function-return-prototype":
272 			cgi.setResponseLocation("http://dlang.org/spec/function.html#ref-functions");
273 			return;
274 		case "bugzilla":
275 			auto url = "http://d.puremagic.com/issues/";
276 			if(parts.length > 1)
277 				url ~= "show_bug.cgi?id=" ~ parts[1];
278 			cgi.setResponseLocation(url);
279 			return;
280 		case "dip":
281 			auto url = "http://wiki.dlang.org/DIPs";
282 			if(parts.length > 1)
283 				url = "http://wiki.dlang.org/DIP" ~ parts[1];
284 			cgi.setResponseLocation(url);
285 			return;
286 		case "wiki":
287 			auto url = "http://wiki.dlang.org/";
288 			if(parts.length > 1)
289 				url ~= "search="~std.uri.encodeComponent(join(parts[1..$], "
290 							"))~"&go=Go&title=Special%3ASearch";
291 			cgi.setResponseLocation(url);
292 			return;
293 		case "faqs":
294 		case "faq":
295 			cgi.setResponseLocation("http://wiki.dlang.org/FAQs");
296 			return;
297 		case "template-alias-parameter":
298 			cgi.setResponseLocation("https://dlang.org/spec/template.html#aliasparameters");
299 			return;
300 		case "is-expression":
301 			cgi.setResponseLocation("https://dlang.org/spec/expression.html#IsExpression");
302 			return;
303 		case "typeof-expression":
304 			cgi.setResponseLocation("https://dlang.org/spec/declaration.html#Typeof");
305 			return;
306 		case "oldwiki":
307 			auto url = "http://prowiki.org/wiki4d/wiki.cgi";
308 			if(parts.length > 1)
309 				url ~= "?formpage=Search&id=Search&search=" ~ std.uri.
310 					encodeComponent(join(parts[1..$], " "));
311 			cgi.setResponseLocation(url);
312 			return;
313 		default:
314 			// just continue
315 			if(std.file.exists("experimental-docs/" ~ searchTerm ~ ".1.html")) {
316 				cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".1.html");
317 				return;
318 			}
319 			if(std.file.exists("experimental-docs/" ~ searchTerm ~ ".html")) {
320 				cgi.setResponseLocation("/experimental-docs/" ~ searchTerm ~ ".html");
321 				return;
322 			}
323 	}
324 
325 
326 	ProjectSearcher.Magic[] magic;
327 	foreach(searcher; projectSearchers)
328 		magic ~= searcher.getPossibilities(search);
329 
330 	sort!((a, b) => a.score > b.score)(magic);
331 
332 	// adjustments based on previously showing results
333 	{
334 		bool[int] alreadyPresent;
335 		foreach(ref item; magic) {
336 			auto decl = item.decl;
337 			if(decl.parent in alreadyPresent)
338 				item.score -= 8;
339 			alreadyPresent[decl.id] = true;
340 		}
341 	}
342 
343 	auto document = new Document();
344 	version(vps) {
345 		import std.file;
346 		document.parseUtf8(readText("/dpldocs-build/skeleton.html"), true, true);
347 		document.title = "Dub Documentation Search";
348 	} else
349 		document.parseUtf8(import("skeleton.html"), true, true);
350 	document.title = "Search Results";
351 
352 	auto form = document.requireElementById!Form("search");
353 	form.setValue("searchTerm", search);
354 
355 	version(vps) {
356 		// intentionally blank
357 	} else {
358 		auto l = document.requireSelector("link");
359 		l.href = "/experimental-docs/" ~ l.href;
360 		l = document.requireSelector("script[src]");
361 		l.src = "/experimental-docs/" ~ l.src;
362 	}
363 
364 	auto pc = document.requireSelector("#page-content");
365 	pc.addChild("h1", "Search Results");
366 	auto ml = pc.addChild("dl");
367 	ml.className = "member-list";
368 
369 	string getFqn(ProjectSearcher searcher, ProjectSearcher.DeclElement i) {
370 		string n;
371 		while(true) {
372 			if(n) n = "." ~ n;
373 			n = i.name ~ n;
374 			if(i.type == "module")
375 				break;
376 			if(i.parent == 0)
377 				break;
378 			i = searcher.getDecl(i.parent);
379 			if(i.id == 0)
380 				break;
381 		}
382 		return n;
383 	}
384 
385 	bool[string] alreadyPresent;
386 	int count = 0;
387 	foreach(idx, item; magic) {
388 		auto decl = item.decl;
389 		if(decl.id == 0) continue; // should never happen
390 		version(vps)
391 			auto link = "http://"~item.searcher.projectName~".dpldocs.info/" ~ decl.link;
392 		else
393 			auto link = "http://dpldocs.info/experimental-docs/" ~ decl.link;
394 		auto fqn = getFqn(item.searcher, decl);
395 		if(fqn in alreadyPresent)
396 			continue;
397 		alreadyPresent[fqn] = true;
398 		auto dt = ml.addChild("dt");
399 		dt.addClass("search-result");
400 		dt.addChild("span", item.searcher.projectName).addClass("project-name");
401 		dt.addChild("br");
402 		dt.addChild("a", fqn.replace(".", ".\u200B"), link);
403 		dt.dataset.score = to!string(item.score);
404 		auto html = decl.description;
405 		//auto d = new Document(html);
406 		//writeln(d.root.innerText.replace("\n", " "));
407 		//writeln();
408 
409 		// FIXME fix relative links from here
410 		ml.addChild("dd", Html(html));
411 		count++;
412 
413 		if(count >= 20)
414 			break;
415 	}
416 
417 	cgi.write(document.toString, true);
418 }
419 
420 mixin GenericMain!(searcher);
421