You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
167 lines
7.0 KiB
167 lines
7.0 KiB
3 years ago
|
import email, smtplib, os.path
|
||
|
from exchangelib import Credentials, Account, DELEGATE, Configuration, FileAttachment, ItemAttachment, Message, CalendarItem, HTMLBody
|
||
|
from cont_modules import *
|
||
|
from creds import creds
|
||
|
from config1 import acc, config
|
||
|
# test again
|
||
|
|
||
|
|
||
|
def sendmail(attachment_filename, email_target):
|
||
|
## currently takes a file name to be attached, finds the file in the local folder, attaches it to a new email and sends
|
||
|
|
||
|
body = 'test email'
|
||
|
|
||
|
item = Message(account=acc, subject='Library weeding', body=body, to_recipients=[email_target])
|
||
|
|
||
|
binary_file_content = open('./' + attachment_filename, 'rb').read()
|
||
|
|
||
|
attachment = FileAttachment(name=attachment_filename, content=binary_file_content)
|
||
|
|
||
|
item.attach(attachment)
|
||
|
|
||
|
item.send()
|
||
|
|
||
|
|
||
|
def checkmail(collection_code):
|
||
|
## currently takes an attachment file name, finds the first unread message with that file attached, and downloads it to the local folder
|
||
|
## currently working for one single email and the first attachment only, message must be marked as unread
|
||
|
# fields = [f.name for f in Message.FIELDS if f.is_searchable]
|
||
|
# print(fields)
|
||
|
for index, item in enumerate(acc.inbox.all().iterator()):
|
||
|
if item.is_read == False and len(item.attachments) >= 1:
|
||
|
# print(item.attachments[0].content)
|
||
|
if item.attachments[0].name == construct_ID('predictions', collection_code):
|
||
|
with open('./%s' % construct_ID('librarian_decisions', collection_code), 'wb') as f:
|
||
|
f.write(item.attachments[0].content)
|
||
|
item.is_read = True
|
||
|
item.save()
|
||
|
break
|
||
|
if index >= 64:
|
||
|
break
|
||
|
return construct_ID('librarian_decisions', collection_code)
|
||
|
|
||
|
def check_if_new(item):
|
||
|
if item.is_read == False and len(item.attachments) >= 1:
|
||
|
return True
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
def get_attachment(attachment):
|
||
|
## file name reading here needs to be adjusted, currently looks for a specific file. it needs to be able to read the file name and create a new file based on the code in the file name
|
||
|
# collection_code = attachment.name[11:21]
|
||
|
date, collection_code, step = read_file_name(attachment.name)
|
||
|
if 'predictions.csv' in attachment.name:
|
||
|
# print(attachment.content)
|
||
|
return attachment.content, collection_code
|
||
|
|
||
|
def read_file_name(string_file_name):
|
||
|
## need to dynamically find collection code, date, and possibly step name
|
||
|
date = string_file_name[0:10]
|
||
|
collection = re.match('^[A-Z]{1,2}_[-A-Z]{2,12}', string_file_name[11:])
|
||
|
step_start = len(date) + len(collection[0]) + 2
|
||
|
step_end = len(string_file_name) - 4
|
||
|
step = string_file_name[step_start:step_end]
|
||
|
return date, collection, step
|
||
|
|
||
|
def loop_inbox():
|
||
|
## needs functionality to check for duplicates
|
||
|
for index, item in enumerate(acc.inbox.all().iterator()):
|
||
|
if check_if_new(item):
|
||
|
## only grabbing first attachment for now
|
||
|
content, collection = get_attachment(item.attachments[0])
|
||
|
write_attachment(content, collection)
|
||
|
item.is_read = True
|
||
|
item.save()
|
||
|
attachment = read_csv_to_list_wrapper('./data/decisions/%s' % construct_ID('librarian_decisions', collection))
|
||
|
# csv_reader = csv.reader(attachment)
|
||
|
pred_index, dec_index = get_decision_and_prediction_index(attachment)
|
||
|
print(pred_index)
|
||
|
# translated = translate_csv_reader(attachment, pred_index)
|
||
|
# translated = translate_csv_reader(translated, dec_index)
|
||
|
# decision_list = compare_decisions(translated)
|
||
|
|
||
|
# integrate_into_training_set(decision_list)
|
||
|
# write_csv_from_list('data/decisions/%s' % construct_ID('librarian_final', collection), decision_list)
|
||
|
|
||
|
# translated_list = translate_final(decision_list)
|
||
|
|
||
|
# write_csv_from_list('data/decisions/%s' % construct_ID('librarian_translated_final', collection), translated_list)
|
||
|
if index >= 128:
|
||
|
## gotta stop sometime, this only checks up to the nth email
|
||
|
break
|
||
|
|
||
|
def write_attachment(content, collection_code):
|
||
|
with open('./data/decisions/%s' % construct_ID('librarian_decisions', collection_code), 'wb') as f:
|
||
|
f.write(content)
|
||
|
|
||
|
def compare_decisions(csv_list):
|
||
|
pred_index, dec_index = get_decision_and_prediction_index(csv_list)
|
||
|
csv_list[0].append('final_decision')
|
||
|
for row in csv_list[1:]:
|
||
|
row.append(((not int(row[pred_index]))*(row[dec_index] == '0'))+((int(row[pred_index]))*(row[dec_index] == '1')))
|
||
|
# print(row)
|
||
|
# if row[dec_index] == '0':
|
||
|
# print()
|
||
|
# row[pred_index] = not row[pred_index]
|
||
|
return csv_list
|
||
|
|
||
|
def get_decision_and_prediction_index(result_list):
|
||
|
## prediction and decision
|
||
|
pred = result_list[0].index('Remove from collection')
|
||
|
dec = result_list[0].index('Do you agree?')
|
||
|
# for index, heading in enumerate(result_list[0]):
|
||
|
# if heading == 'Remove from collection':
|
||
|
# pred = index
|
||
|
# elif heading == 'Do you agree?':
|
||
|
# dec = index
|
||
|
return pred, dec
|
||
|
|
||
|
def translate_final(csv_reader):
|
||
|
## translates a csv.reader from human readable 'yes', 'no' to '0' and '1'. takes column number to interpolate 'yes'/'1' for no answer
|
||
|
wrapper_list = []
|
||
|
column_index = len(csv_reader[0]) - 1
|
||
|
for row in csv_reader:
|
||
|
wrapper_list.append(row)
|
||
|
|
||
|
header = wrapper_list[0]
|
||
|
|
||
|
translated = []
|
||
|
# print(row[column_index])
|
||
|
# print(header[column_index])
|
||
|
for row in wrapper_list:
|
||
|
if row[column_index] != header[column_index]:
|
||
|
## branchless function to detect a 0 or 1 and pass the correct string using boolean multiplication
|
||
|
row[column_index] = (row[column_index] == 0)*('keep')+(row[column_index] == 1)*('remove')
|
||
|
# print('fff')
|
||
|
# if (row[column_index] == 0):
|
||
|
# row[column_index] = 'keep'
|
||
|
# elif row[column_index] == 1:
|
||
|
# row[column_index] = 'remove'
|
||
|
# row = [item.replace('keep', 0) for item in row]
|
||
|
# row = [item.replace('remove', 1) for item in row]
|
||
|
translated.append(row)
|
||
|
return translated
|
||
|
|
||
|
def integrate_into_training_set(csv_wrapper):
|
||
|
## training_set = read_csv_to_list_wrapper('./data/training_data/global_training_data.sv')
|
||
|
## temp training set
|
||
|
training_set = read_csv_to_list_wrapper('./data/training_data/training_ALL_api_out.csv')
|
||
|
final_index = csv_wrapper[0].index('final_decision')
|
||
|
withdrawn_index_dec = csv_wrapper[0].index('Withdrawn')
|
||
|
prediction_index = csv_wrapper[0].index('Remove from collection')
|
||
|
decision_index = csv_wrapper[0].index('Do you agree?')
|
||
|
withdrawn_index_train = training_set[0].index('Withdrawn')
|
||
|
|
||
|
for row in csv_wrapper[1:]:
|
||
|
row[withdrawn_index_dec] = row[final_index]
|
||
|
row.pop(final_index)
|
||
|
row.pop(decision_index)
|
||
|
row.pop(prediction_index)
|
||
|
|
||
|
if withdrawn_index_train == withdrawn_index_dec:
|
||
|
training_set.append(row)
|
||
|
else:
|
||
|
print('mismatched Withdrawn indices')
|
||
|
|
||
|
|