-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
122 lines (101 loc) · 3.12 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from bs4 import BeautifulSoup
from bson import ObjectId
from bson.regex import Regex
import re
from datetime import datetime
from transformers import pipeline
# Load the previously trained model for prediction
emotion_model = pipeline("text-classification", model="arpanghoshal/EmoRoBERTa")
def search_entries_by_keyword(db, keyword):
# Build the regex pattern with case-insensitivity
regex_pattern = f"(?i){re.escape(keyword)}"
# Use the $match stage in the aggregation pipeline to filter entries
pipeline = [
{
"$match": {
"$or": [
{"diary_title": {"$regex": Regex(regex_pattern)}},
{"diary_content": {"$regex": Regex(regex_pattern)}},
]
}
}
]
# Execute the aggregation pipeline and retrieve the matching entries
matching_entries = list(db.diary.aggregate(pipeline))
return matching_entries
# to extract plaintext from formatted text with html tags
def extract_plaintext(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
plaintext = soup.get_text()
return plaintext
# to save books in db for specified author if not already recommended
def save_books(db, recommended_books, author_id):
existing_books = [book['title'] for book in db.books.find({'author_id': ObjectId(author_id)}, {'title': 1})]
new_books = [book for book in recommended_books if book['title'] not in existing_books]
if new_books:
for book in new_books:
book['author_id'] = ObjectId(author_id)
db.books.insert_many(new_books, ordered=False)
def save_movies(db, recommended_movies, author_id):
existing_movies = [movie['movie_id'] for movie in db.movies.find({'author_id': ObjectId(author_id)}, {'movie_id': 1})]
new_movies = [movie for movie in recommended_movies if movie['movie_id'] not in existing_movies]
if new_movies:
for movie in new_movies[:5]:
movie['author_id'] = ObjectId(author_id)
db.movies.insert_many(new_movies, ordered=False)
# Function to extract emotion from text
def extract_emotion(text):
emotion_labels = emotion_model(text)
print(emotion_labels)
return emotion_labels[0]['label']
# extract emotions and their respective counts from start date to end date
def fetch_data(db, author_id, start_date, end_date):
counts = []
emotions = []
start_datetime = datetime.combine(start_date, datetime.min.time())
end_datetime = datetime.combine(end_date, datetime.max.time())
pipeline = [
{"$match": {
"author_id": author_id,
"diary_created": {"$gte": start_datetime, "$lte": end_datetime}
}},
{"$group": {
"_id": "$emotion",
"count": {"$sum": 1}
}},
{"$project": {
"emotion": "$_id",
"count": 1,
"_id": 0
}}
]
result = db.diary.aggregate(pipeline)
for entry in result:
counts.append(entry['count'])
emotions.append(entry['emotion'])
print("From fetch data of emotions:")
print(counts, emotions)
return counts, emotions
# general interests for user to choose from during profile and as tags during writing diary entry
interests = [
"Coding",
"Study",
"Space",
"Art",
"Reading",
"Music",
"Travel",
"Fitness",
"Cooking",
"Photography",
"Gaming",
"Writing",
"Technology",
"Nature",
"Movies",
"Sports",
"Science",
"Fashion",
"History",
"DIY Projects"
]