2018-10-19 07:25:54 +00:00
|
|
|
from flask import Flask, jsonify, request
|
2018-10-19 18:23:52 +00:00
|
|
|
from elasticsearch_dsl import Search
|
2018-10-19 07:25:54 +00:00
|
|
|
from elasticsearch_dsl.connections import connections
|
|
|
|
from tqdm import tqdm
|
2018-10-19 18:23:52 +00:00
|
|
|
from beeprint import pp
|
2018-10-19 07:25:54 +00:00
|
|
|
import csv
|
|
|
|
import click
|
|
|
|
|
2018-10-19 18:23:52 +00:00
|
|
|
from .question import Question
|
|
|
|
from .question_search import QuestionSearch
|
2018-10-19 07:25:54 +00:00
|
|
|
|
|
|
|
app = Flask(__name__)
|
|
|
|
|
|
|
|
connections.create_connection(hosts=["localhost"])
|
|
|
|
Question.init()
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/api/")
|
|
|
|
def index():
|
2018-10-19 18:23:52 +00:00
|
|
|
def round_sigfig(value, figures):
|
|
|
|
return float(format(value, f".{figures}g"))
|
|
|
|
|
2018-10-19 07:25:54 +00:00
|
|
|
query = request.args.get("q")
|
|
|
|
categories = request.args.get("categories", None)
|
|
|
|
|
|
|
|
facets = {}
|
|
|
|
if categories is not None:
|
|
|
|
category_list = categories.split(",")
|
|
|
|
facets["category"] = category_list
|
|
|
|
|
2018-10-19 18:23:52 +00:00
|
|
|
search = Search.from_dict({
|
|
|
|
"query": {
|
|
|
|
"query_string": {
|
|
|
|
"query": query,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"aggregations": {
|
|
|
|
"category": {
|
|
|
|
"terms": {"field": "category"},
|
|
|
|
},
|
2018-10-23 11:16:23 +00:00
|
|
|
"suggestions": {
|
|
|
|
"significant_terms": {
|
|
|
|
"field": "body",
|
|
|
|
"mutual_information": {
|
|
|
|
"include_negatives": True,
|
|
|
|
},
|
|
|
|
"size": 40,
|
|
|
|
},
|
|
|
|
}
|
2018-10-19 18:23:52 +00:00
|
|
|
},
|
|
|
|
})
|
2018-10-19 07:25:54 +00:00
|
|
|
|
|
|
|
response = search.execute()
|
|
|
|
|
2018-10-19 18:23:52 +00:00
|
|
|
#date_facets = [{"timestamp": date.timestamp(), "count": count}
|
|
|
|
#for date, count, _ in response.facets.date_frequency]
|
|
|
|
category_facets = [
|
|
|
|
{"category": bucket.key, "count": round_sigfig(bucket.doc_count, 3)}
|
|
|
|
for bucket in response.aggregations.category.buckets
|
|
|
|
]
|
|
|
|
|
2018-10-23 11:16:23 +00:00
|
|
|
suggestions = [{"key": bucket.key, "count": bucket.doc_count}
|
|
|
|
for bucket in response.aggregations.suggestions.buckets]
|
|
|
|
|
2018-10-19 18:23:52 +00:00
|
|
|
date_facets = []
|
|
|
|
|
|
|
|
results = []
|
|
|
|
for hit in response:
|
|
|
|
summary = Question.summary(hit)
|
|
|
|
url = Question.url(hit)
|
|
|
|
|
|
|
|
results.append({
|
2018-10-23 11:16:23 +00:00
|
|
|
"id": hit.meta.id, "score": hit.meta.score, "title": hit.title,
|
|
|
|
"body": summary, "category": hit.category, "date": hit.date,
|
2018-10-19 18:23:52 +00:00
|
|
|
"url": url,
|
|
|
|
})
|
2018-10-19 07:25:54 +00:00
|
|
|
|
|
|
|
return jsonify(
|
2018-10-23 11:16:23 +00:00
|
|
|
facets={
|
|
|
|
"months": date_facets,
|
|
|
|
"categories": category_facets,
|
|
|
|
},
|
|
|
|
suggestions=suggestions,
|
2018-10-19 07:25:54 +00:00
|
|
|
results=results,
|
|
|
|
hits=round_sigfig(response.hits.total, 4),
|
|
|
|
took=response.took / 1000,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@app.cli.command()
|
|
|
|
@click.argument("questions")
|
|
|
|
@click.argument("categories")
|
|
|
|
def import_data(questions, categories):
|
|
|
|
categories_dict = {}
|
|
|
|
num_lines = sum(1 for line in open(categories))
|
|
|
|
with open(categories, newline="") as csv_file:
|
|
|
|
reader = csv.reader(csv_file)
|
|
|
|
for row in tqdm(reader, desc="Reading categories", total=num_lines):
|
|
|
|
id_ = int(row[0])
|
|
|
|
category = row[2]
|
|
|
|
|
|
|
|
categories_dict[id_] = category
|
|
|
|
|
|
|
|
num_lines = sum(1 for line in open(questions))
|
|
|
|
with open(questions, newline="") as csv_file:
|
|
|
|
reader = csv.reader(csv_file)
|
|
|
|
|
|
|
|
it = tqdm(reader, desc="Reading questions", total=num_lines)
|
|
|
|
for i, row in enumerate(it):
|
|
|
|
try:
|
|
|
|
id_ = int(row[0])
|
|
|
|
category_id = int(row[3])
|
|
|
|
|
|
|
|
question = Question(meta={"id": id_})
|
|
|
|
|
|
|
|
question.date = row[1]
|
|
|
|
question.category = categories_dict[category_id]
|
|
|
|
question.title = row[4]
|
|
|
|
question.body = "\n".join(row[5:])
|
|
|
|
|
|
|
|
question.save()
|
|
|
|
except (IndexError, ValueError):
|
|
|
|
continue
|