Skip to content

Commit

Permalink
Merge branch 'master' into contextus
Browse files Browse the repository at this point in the history
  • Loading branch information
stevekaplan123 committed Feb 14, 2024
2 parents f7f78ab + fe9c971 commit d77854e
Show file tree
Hide file tree
Showing 20 changed files with 80 additions and 1,050 deletions.
2 changes: 1 addition & 1 deletion api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def get(self, request, *args, **kwargs):
if not versions_params:
versions_params = ['primary']
versions_params = [self.split_piped_params(param_str) for param_str in versions_params]
fill_in_missing_segments = request.GET.get('fill_in_missing_segments', False)
fill_in_missing_segments = bool(int(request.GET.get('fill_in_missing_segments', False)))
return_format = request.GET.get('return_format', 'default')
if return_format not in self.RETURN_FORMATS:
return jsonResponse({'error': f'return_format should be one of those formats: {self.RETURN_FORMATS}.'}, status=400)
Expand Down
4 changes: 1 addition & 3 deletions build/ci/production-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ nginx:
containerImage:
imageRegistry:
tag:
SEARCH_HOST: contextus-es-http.elasticsearch.svc
SEARCH_HOST: elasticsearch-es-http.elasticsearch.svc
disableScraping: false
replicaCount: 2
resources:
Expand All @@ -150,8 +150,6 @@ cronJobs:
enabled: false
reindexElasticSearch:
enabled: true
SEARCH_HOST_ES6: "contextus-es-default-0.elasticsearch.svc"
SEARCH_HOST_ES8: "elasticsearch-8-es-default.elasticsearch.svc"
topicsIndexing:
enabled: true
trello:
Expand Down
1 change: 0 additions & 1 deletion helm-chart/sefaria-project/templates/configmap/nginx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ data:
# allow urls which aren't caught by regex above
location /api/search/ {
rewrite ^/(?:api/search)/(.*)$ /$1 break;
proxy_set_header Content-Type application/json; # es 6.0 requires this header
proxy_set_header Authorization "Basic ${ELASTIC_AUTH_HEADER}";
add_header 'Access-Control-Allow-Origin' '';
proxy_pass http://elasticsearch_upstream/;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,77 +0,0 @@
{{- if .Values.cronJobs.reindexElasticSearch.enabled }}
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ .Values.deployEnv }}-reindex-elastic-search-es6
labels:
{{- include "sefaria.labels" . | nindent 4 }}
spec:
schedule: "20 13 * * 0"
jobTemplate:
spec:
backoffLimit: 1
template:
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- mongo
topologyKey: kubernetes.io.hostname
containers:
- name: reindex-elastic-search-es6
image: "{{ .Values.web.containerImage.imageRegistry }}:{{ .Values.web.containerImage.tag }}"
resources:
limits:
memory: 9Gi
requests:
memory: 7Gi
env:
- name: SEARCH_HOST
value: "{{ .Values.cronJobs.reindexElasticSearch.SEARCH_HOST_ES6 }}"
- name: REDIS_HOST
value: "redis-{{ .Values.deployEnv }}"
- name: NODEJS_HOST
value: "node-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
- name: VARNISH_HOST
value: "varnish-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
- name: SLACK_URL
valueFrom:
secretKeyRef:
name: {{ template "sefaria.secrets.slackWebhook" . }}
key: slack-webhook
envFrom:
- secretRef:
name: {{ .Values.secrets.localSettings.ref }}
optional: true
- configMapRef:
name: local-settings-{{ .Values.deployEnv }}
- secretRef:
name: local-settings-secrets-{{ .Values.deployEnv }}
optional: true
volumeMounts:
- mountPath: /app/sefaria/local_settings.py
name: local-settings
subPath: local_settings.py
readOnly: true
command: ["bash"]
args: [
"-c",
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy && /app/run /app/scripts/scheduled/reindex_elasticsearch_cronjob_ES6.py"
]
restartPolicy: Never
volumes:
- name: local-settings
configMap:
name: local-settings-file-{{ .Values.deployEnv }}
items:
- key: local_settings.py
path: local_settings.py
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 2
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ spec:
memory: 7Gi
env:
- name: SEARCH_HOST
value: "{{ .Values.cronJobs.reindexElasticSearch.SEARCH_HOST_ES8 }}"
value: "{{ .Values.nginx.SEARCH_HOST }}"
- name: REDIS_HOST
value: "redis-{{ .Values.deployEnv }}"
- name: NODEJS_HOST
Expand Down Expand Up @@ -64,7 +64,7 @@ spec:
command: ["bash"]
args: [
"-c",
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy elasticsearch==8.8.2 git+https://github.com/Sefaria/[email protected]#egg=elasticsearch-dsl && /app/run /app/scripts/scheduled/reindex_elasticsearch_cronjob.py"
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy && /app/run /app/scripts/scheduled/reindex_elasticsearch_cronjob.py"
]
restartPolicy: Never
volumes:
Expand Down
24 changes: 12 additions & 12 deletions reader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1653,10 +1653,10 @@ def index_api(request, title, raw=False):
API for manipulating text index records (aka "Text Info")
"""
if request.method == "GET":
with_content_counts = bool(request.GET.get("with_content_counts", False))
with_content_counts = bool(int(request.GET.get("with_content_counts", False)))
i = library.get_index(title).contents(raw=raw, with_content_counts=with_content_counts)

if request.GET.get("with_related_topics", False):
if bool(int(request.GET.get("with_related_topics", False))):
i["relatedTopics"] = get_topics_for_book(title, annotate=True)

return jsonResponse(i, callback=request.GET.get("callback", None))
Expand Down Expand Up @@ -1870,7 +1870,7 @@ def _collapse_book_leaf_shapes(leaf_shapes):
else:
cat_list = title.split("/")
depth = request.GET.get("depth", 2)
include_dependents = request.GET.get("dependents", False)
include_dependents = bool(int(request.GET.get("dependents", False)))
indexes = []
if len(cat_list) == 1:
# try as corpus
Expand Down Expand Up @@ -2075,7 +2075,7 @@ def notes_api(request, note_id_or_ref):
raise Http404
oref = Ref(note_id_or_ref)
cb = request.GET.get("callback", None)
private = request.GET.get("private", False)
private = bool(int(request.GET.get("private", False)))
res = get_notes(oref, uid=creds["user_id"], public=(not private))
return jsonResponse(res, cb)

Expand Down Expand Up @@ -2149,7 +2149,7 @@ def protected_note_post(req):
@catch_error_as_json
def all_notes_api(request):

private = request.GET.get("private", False)
private = bool(int(request.GET.get("private", False)))
if private:
if not request.user.is_authenticated:
res = {"error": "You must be logged in to access you notes."}
Expand All @@ -2165,17 +2165,17 @@ def related_api(request, tref):
"""
Single API to bundle available content related to `tref`.
"""
if request.GET.get("private", False) and request.user.is_authenticated:
if bool(int(request.GET.get("private", False))) and request.user.is_authenticated:
oref = Ref(tref)
response = {
"sheets": get_sheets_for_ref(tref, uid=request.user.id),
"notes": get_notes(oref, uid=request.user.id, public=False)
}
elif request.GET.get("private", False) and not request.user.is_authenticated:
elif bool(int(request.GET.get("private", False))) and not request.user.is_authenticated:
response = {"error": "You must be logged in to access private content."}
else:
response = {
"links": get_links(tref, with_text=False, with_sheet_links=request.GET.get("with_sheet_links", False)),
"links": get_links(tref, with_text=False, with_sheet_links=bool(int(request.GET.get("with_sheet_links", False)))),
"sheets": get_sheets_for_ref(tref),
"notes": [], # get_notes(oref, public=True) # Hiding public notes for now
"webpages": get_webpages_for_ref(tref),
Expand Down Expand Up @@ -2668,7 +2668,7 @@ def name_api(request, name):
name = name[1:] if topic_override else name
# Number of results to return. 0 indicates no limit
LIMIT = int(request.GET.get("limit", 10))
ref_only = request.GET.get("ref_only", False)
ref_only = bool(int(request.GET.get("ref_only", False)))
completions_dict = get_name_completions(name, LIMIT, ref_only, topic_override)
ref = completions_dict["ref"]
topic = completions_dict["topic"]
Expand Down Expand Up @@ -2772,7 +2772,7 @@ def user_stats_api(request, uid):
assert request.method == "GET", "Unsupported Method"
u = request.user
assert (u.is_active and u.is_staff) or (int(uid) == u.id)
quick = bool(request.GET.get("quick", False))
quick = bool(int(request.GET.get("quick", False)))
if quick:
return jsonResponse(public_user_data(uid))
return jsonResponse(user_stats_data(uid))
Expand Down Expand Up @@ -4272,7 +4272,7 @@ def search_wrapper_api(request, es6_compat=False):
search_obj = get_query_obj(search_obj=search_obj, **j)
response = search_obj.execute()
if response.success():
response_json = getattr(response.to_dict(), 'body', response.to_dict())
response_json = response.to_dict().body
if es6_compat and isinstance(response_json['hits']['total'], dict):
response_json['hits']['total'] = response_json['hits']['total']['value']
return jsonResponse(response_json, callback=request.GET.get("callback", None))
Expand Down Expand Up @@ -4618,7 +4618,7 @@ def isNodeJsReachable():
except Exception as e:
logger.warn(f"Failed node healthcheck. Error: {e}")
return False

def is_database_reachable():
try:
from sefaria.system.database import db
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ django==1.11.*
djangorestframework @ https://github.com/encode/django-rest-framework/archive/3.11.1.tar.gz
djangorestframework_simplejwt==3.3.0
PyJWT==1.7.1 # pinned b/c current version 2.0.0 breaks simplejwt. waiting for 2.0.1
elasticsearch==7.17.*
elasticsearch_dsl==7.4.*
elasticsearch==8.8.2
git+https://github.com/Sefaria/[email protected]#egg=elasticsearch-dsl
geojson==2.5.0
geopy==2.3.0
gevent==20.12.0; sys_platform != 'darwin'
Expand Down
49 changes: 0 additions & 49 deletions scripts/scheduled/reindex_elasticsearch_cronjob_ES6.py
Original file line number Diff line number Diff line change
@@ -1,49 +0,0 @@
"""
This file is meant to be temporary while we are migrating to elasticsearch 8
"""
from datetime import datetime
import requests
import traceback
import os
import django
django.setup()
from sefaria.model import *
from sefaria.search_ES6 import index_all
from sefaria.local_settings import SEFARIA_BOT_API_KEY
from sefaria.pagesheetrank import update_pagesheetrank

"""
Source sheets added after last_sheet_timestamp will be missing from the index process. We want to manually index all
source sheets created after this. Depending on the database being used to index the timestamp will be different. If
running against a production database, last_sheet_timestamp will be the time this script began running. Otherwise, this
value will need to be set to the time at which the last mongo dump was created (assuming the database is using the most
up-to-date mongo dump).
"""
# last_sheet_timestamp = datetime.fromtimestamp(os.path.getmtime("/var/data/sefaria_public/dump/sefaria")).isoformat()
try:
last_sheet_timestamp = datetime.now().isoformat()
update_pagesheetrank()
index_all()
r = requests.post("https://www.sefaria.org/admin/index-sheets-by-timestamp", data={"timestamp": last_sheet_timestamp, "apikey": SEFARIA_BOT_API_KEY})
if "error" in r.text:
raise Exception("Error when calling admin/index-sheets-by-timestamp API: " + r.text)
else:
print("SUCCESS!", r.text)
except Exception as e:
tb_str = traceback.format_exc()
print("Caught exception")
post_object = {
"icon_emoji": ":facepalm:",
"username": "Reindex ElasticSearch",
"channel": "#engineering-discuss",
"attachments": [
{
"fallback": tb_str,
"color": "#a30200",
"pretext": "Cronjob Error",
"text": tb_str
}
]
}
requests.post(os.environ['SLACK_URL'], json=post_object)
raise e
2 changes: 1 addition & 1 deletion sefaria/helper/linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ class _FindRefsTextOptions:
@attr version_preferences_by_corpus: dict of dicts of the form { <corpus>: { <lang>: <vtitle> }}
"""

debug: bool = False
with_text: bool = False
debug: bool = False
max_segments: int = 0
version_preferences_by_corpus: dict = None

Expand Down
4 changes: 2 additions & 2 deletions sefaria/helper/tests/linker_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def test_find_refs_text(self, mock_is_hebrew: Mock):
assert find_refs_text.lang == 'en'

def test_find_refs_text_options(self):
find_refs_text_options = linker._FindRefsTextOptions(True, True, 10, {})
assert find_refs_text_options.debug
find_refs_text_options = linker._FindRefsTextOptions(True, False, 10, {})
assert not find_refs_text_options.debug
assert find_refs_text_options.with_text
assert find_refs_text_options.max_segments == 10
assert find_refs_text_options.version_preferences_by_corpus == {}
Expand Down
Loading

0 comments on commit d77854e

Please sign in to comment.