diff --git a/backend/app.py b/backend/app.py index 6cd0d97..39647e9 100644 --- a/backend/app.py +++ b/backend/app.py @@ -1,13 +1,14 @@ from flask import Flask, jsonify, request from elasticsearch_dsl import Search from elasticsearch_dsl.connections import connections +from elasticsearch.exceptions import NotFoundError from tqdm import tqdm from beeprint import pp import csv import click +import requests from .question import Question -from .question_search import QuestionSearch app = Flask(__name__) @@ -22,23 +23,22 @@ def index(): query = request.args.get("q") categories = request.args.get("categories", None) + page = int(request.args.get("page", 1)) - 1 - facets = {} - if categories is not None: - category_list = categories.split(",") - facets["category"] = category_list - - search = Search.from_dict({ + search_dict = { + "from": page * 10, "query": { - "query_string": { - "query": query, - }, + "bool": { + "must": [ + {"query_string": {"query": query}}, + ] + } }, "aggregations": { "category": { "terms": {"field": "category"}, }, - "suggestions": { + "chips": { "significant_terms": { "field": "body", "mutual_information": { @@ -48,19 +48,25 @@ def index(): }, } }, - }) + } + if categories is not None: + category_list = categories.split(",") + search_dict["post_filter"] = { + "terms": {"category": category_list}, + } + + search = Search.from_dict(search_dict) response = search.execute() + pp(response.to_dict()) - #date_facets = [{"timestamp": date.timestamp(), "count": count} - #for date, count, _ in response.facets.date_frequency] category_facets = [ {"category": bucket.key, "count": round_sigfig(bucket.doc_count, 3)} for bucket in response.aggregations.category.buckets ] - suggestions = [{"key": bucket.key, "count": bucket.doc_count} - for bucket in response.aggregations.suggestions.buckets] + chips = [{"key": bucket.key, "count": bucket.doc_count} + for bucket in response.aggregations.chips.buckets] date_facets = [] @@ -69,10 +75,15 @@ def index(): summary = Question.summary(hit) url = Question.url(hit) + try: + dead = hit.dead + except AttributeError: + dead = False + results.append({ "id": hit.meta.id, "score": hit.meta.score, "title": hit.title, "body": summary, "category": hit.category, "date": hit.date, - "url": url, + "url": url, "dead": dead, }) return jsonify( @@ -80,7 +91,7 @@ def index(): "months": date_facets, "categories": category_facets, }, - suggestions=suggestions, + chips=chips, results=results, hits=round_sigfig(response.hits.total, 4), took=response.took / 1000, @@ -90,7 +101,8 @@ def index(): @app.cli.command() @click.argument("questions") @click.argument("categories") -def import_data(questions, categories): +@click.argument("answers") +def import_data(questions, categories, answers): categories_dict = {} num_lines = sum(1 for line in open(categories)) with open(categories, newline="") as csv_file: @@ -101,23 +113,73 @@ def import_data(questions, categories): categories_dict[id_] = category - num_lines = sum(1 for line in open(questions)) - with open(questions, newline="") as csv_file: - reader = csv.reader(csv_file) + if questions != "skip": + num_lines = sum(1 for line in open(questions)) + with open(questions, newline="") as csv_file: + reader = csv.reader(csv_file) - it = tqdm(reader, desc="Reading questions", total=num_lines) - for i, row in enumerate(it): - try: - id_ = int(row[0]) - category_id = int(row[3]) + it = tqdm(reader, desc="Reading questions", total=num_lines) + for i, row in enumerate(it): + try: + id_ = int(row[0]) + category_id = int(row[3]) - question = Question(meta={"id": id_}) + question = Question(meta={"id": id_}) - question.date = row[1] - question.category = categories_dict[category_id] - question.title = row[4] - question.body = "\n".join(row[5:]) + question.date = row[1] + question.category = categories_dict[category_id] + question.title = row[4] + question.body = "\n".join(row[5:]) - question.save() - except (IndexError, ValueError): - continue + question.save() + except (IndexError, ValueError): + continue + + if answers != "skip": + with open(answers, newline="") as csv_file: + reader = csv.reader(csv_file) + + it = tqdm(reader, desc="Reading answers") + for i, row in enumerate(it): + try: + question_id = int(row[3]) + question = Question.get(id=question_id) + if question.answers is None: + question.answers = row[4] + else: + question.answers += "\n\n" + row[4] + question.save() + except (IndexError, ValueError, NotFoundError): + continue + + +@app.cli.command() +def cleanup_database(): + dead_count = 0 + alive_count = 0 + + for question in Question.search().scan(): + if question.dead is not None or question.error: + print(end="_") + dead_count += 1 + continue + + url = question.url() + response = requests.head(url) + + if response.status_code == 404: + dead_count += 1 + question.dead = True + question.save() + print(end=".") + elif response.status_code == 302: + alive_count += 1 + question.dead = False + print(end="#") + elif response.status_code == 500: + question.error = True + print(end="!") + else: + continue + + question.save() diff --git a/backend/question.py b/backend/question.py index 2078350..53b0a0b 100644 --- a/backend/question.py +++ b/backend/question.py @@ -1,4 +1,4 @@ -from elasticsearch_dsl import Document, Date, Keyword, Text +from elasticsearch_dsl import Document, Date, Keyword, Text, Boolean class Question(Document): @@ -9,6 +9,10 @@ class Question(Document): ) category = Keyword() date = Date() + answers = Text(analyzer="snowball") + + dead = Boolean() + error = Boolean() class Index: name = "goeievraag" diff --git a/backend/question_search.py b/backend/question_search.py deleted file mode 100644 index e385e9b..0000000 --- a/backend/question_search.py +++ /dev/null @@ -1,12 +0,0 @@ -from elasticsearch_dsl import FacetedSearch, TermsFacet, DateHistogramFacet -from .question import Question - - -class QuestionSearch(FacetedSearch): - doc_types = Question, - fields = "title", "body" - - facets = { - "date_frequency": DateHistogramFacet(field="date", interval="month"), - "category": TermsFacet(field="category"), - } diff --git a/backend/requirements.txt b/backend/requirements.txt index 5cc2513..52c3fb7 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -2,3 +2,4 @@ elasticsearch-dsl flask tqdm beeprint +requests diff --git a/frontend/src/assets/pages_active.png b/frontend/src/assets/pages_active.png new file mode 100644 index 0000000..57072d8 Binary files /dev/null and b/frontend/src/assets/pages_active.png differ diff --git a/frontend/src/assets/pages_end.png b/frontend/src/assets/pages_end.png new file mode 100644 index 0000000..0989149 Binary files /dev/null and b/frontend/src/assets/pages_end.png differ diff --git a/frontend/src/assets/pages_inactive.png b/frontend/src/assets/pages_inactive.png new file mode 100644 index 0000000..36a2cf8 Binary files /dev/null and b/frontend/src/assets/pages_inactive.png differ diff --git a/frontend/src/assets/pages_start.png b/frontend/src/assets/pages_start.png new file mode 100644 index 0000000..239ef9a Binary files /dev/null and b/frontend/src/assets/pages_start.png differ diff --git a/frontend/src/components/ResultBody.vue b/frontend/src/components/ResultBody.vue index a8735b2..1154a86 100644 --- a/frontend/src/components/ResultBody.vue +++ b/frontend/src/components/ResultBody.vue @@ -1,9 +1,8 @@ @@ -57,6 +76,13 @@ export default class ResultBody extends Vue { results = []; responseTime = 0; facets = {}; + chips = []; + + currentPage = 1; + + get pages() { + return Math.ceil(this.hits / 10); + } @Watch("value") async onQueryChanged(value) { @@ -83,7 +109,7 @@ export default class ResultBody extends Vue { async search() { const query = encodeURIComponent(this.value); - let queryString = `?q=${query}`; + let queryString = `?q=${query}&page=${this.currentPage}`; if (this.activeCategories.length > 0) { const categories = encodeURIComponent(this.activeCategories.join(",")); @@ -96,15 +122,21 @@ export default class ResultBody extends Vue { let response = await fetch(url); this.json = await response.json(); - this.results = this.json.results; + this.chips = this.json.chips; + this.facets = this.json.facets; this.hits = this.json.hits; this.responseTime = this.json.took; - this.facets = this.json.facets; + this.results = this.json.results; } appendToQuery(value) { this.$emit("input", `${this.value} AND ${value}`) } + + async switchPage(page) { + this.currentPage = page; + await this.search(); + } } @@ -191,7 +223,7 @@ export default class ResultBody extends Vue { text-align: right; } -.suggestions { +.chips { display: flex; flex-flow: row wrap; max-width: 1000px; @@ -201,7 +233,7 @@ export default class ResultBody extends Vue { overflow-y: hidden; } -.suggestion { +.chip { border: 1px solid rgba(0, 0, 0, 0.2); border-radius: 12px; @@ -210,5 +242,29 @@ export default class ResultBody extends Vue { margin: 2px; cursor: pointer; + height: 32px; +} + +.pagination { + display: flex; + margin: 0 0 16px; +} + +.pagination__page { + height: 50px; + text-align: center; + display: flex; + flex-flow: column; +} + +.pagination img { + height: 40px; + margin: 0 1px; +} + +.pagination__page:not(.active) { + text-decoration: underline; + color: blue; + cursor: pointer; }