Skip to content

Commit

Permalink
Merge branch 'trends-updates'
Browse files Browse the repository at this point in the history
  • Loading branch information
nissamai committed Apr 6, 2022
2 parents dde5171 + f33276b commit 5d643dc
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 32 deletions.
7 changes: 5 additions & 2 deletions scripts/nation_builder_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,21 @@ def sync_sustainers_to_mongo():

trends_only = False
mongo_only = False
skip = []
i = 1
while(i < len(sys.argv)):
if sys.argv[i] == "--trends-only":
trends_only = True
elif sys.argv[i] == "--mongo-only":
mongo_only = True
elif sys.argv[i].startswith("--skip="):
skip = sys.argv[i][7:].split(",")
i+=1


if not trends_only:
sync_sustainers_to_mongo()
setAllTrends()
setAllTrends(skip)
nationbuilder_update_all_tags()
else:
setAllTrends()
setAllTrends(skip)
63 changes: 33 additions & 30 deletions sefaria/model/trend.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,24 +215,25 @@ def setCategoryTraits():
# User Traits
for daterange in active_dateranges:
site_data = {cat: 0 for cat in TOP_CATEGORIES}

all_users = getAllUsersCategories(daterange)
for uid, data in all_users.items():
TrendSet({"period": daterange.key, "uid": uid, "name": {"$in": list(map(read_in_category_key, TOP_CATEGORIES))}}).delete()

for cat, val in list(data["categories"].items()):
if cat not in TOP_CATEGORIES:
continue
for category in TOP_CATEGORIES:
all_users = getAllUsersCategories(daterange, category)
for uid, data in all_users.items():
val = data['cnt']
TrendSet({"period": daterange.key, "uid": uid, "name": read_in_category_key(category)}).delete()

# for val in list(data["categories"].items()):
# if cat not in TOP_CATEGORIES:
# continue
Trend({
"name": read_in_category_key(cat),
"name": read_in_category_key(category),
"value": val,
"datatype": "int",
"timestamp": datetime.utcnow(),
"period": daterange.key,
"scope": "user",
"uid": uid
}).save()
site_data[cat] += val
site_data[category] += val

# Site Traits
TrendSet({"period": daterange.key, "scope": "site", "name": {"$in": list(map(read_in_category_key, TOP_CATEGORIES))}}).delete()
Expand Down Expand Up @@ -378,25 +379,16 @@ def getAllUsersSheetUsage(daterange):
return {d["_id"]: d for d in results}


def getAllUsersCategories(daterange):
def getAllUsersCategories(daterange, category):
pipeline = [
{"$match": daterange.update_match({
"secondary": False,
"is_sheet": False,
"categories.0": {
"$exists": True
}})},
{"$group": {
"_id": {"uid": "$uid", "category": {"$arrayElemAt" : ["$categories", 0]}},
"cnt": { "$sum": {"$max": ["$num_times_read", 1]}}}},
"categories.0": category
})},
{"$group": {
"_id": "$_id.uid",
"categories": {"$push": {"k": "$_id.category", "v": "$cnt"}},
"total": {"$sum": "$cnt"}}},
{"$project": {
"categories": {"$arrayToObject": "$categories"},
"total": "$total"}}
]
"_id": "$uid",
"cnt": { "$sum": {"$max": ["$num_times_read", 1]}}}}]
results = db.user_history.aggregate(pipeline)
return {d["_id"]: d for d in results}

Expand Down Expand Up @@ -697,9 +689,20 @@ def setScheduleTraits():
for scheduleManager in scheduleManagers:
scheduleManager.getUsersWhoAreLearningSchedule()

def setAllTrends():
setUserSheetTraits()
setSheetTraits()
setUserLanguageTraits()
setCategoryTraits()
setScheduleTraits()
def setAllTrends(skip=[]):
print("setAllTrends")
if "userSheet" not in skip:
print("setUserSheetTraits")
setUserSheetTraits()
if "sheet" not in skip:
print("setSheetTraits")
setSheetTraits()
if "userLanguage" not in skip:
print("setUserLanguageTraits")
setUserLanguageTraits()
if "category" not in skip:
print("setCategoryTraits")
setCategoryTraits()
if "schedule" not in skip:
print("setScheduleTraits")
setScheduleTraits()

0 comments on commit 5d643dc

Please sign in to comment.