You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
166 lines
7.0 KiB
166 lines
7.0 KiB
import email, smtplib, os.path |
|
from exchangelib import Credentials, Account, DELEGATE, Configuration, FileAttachment, ItemAttachment, Message, CalendarItem, HTMLBody |
|
from cont_modules import * |
|
from creds import creds |
|
from config1 import acc, config |
|
# test again |
|
|
|
|
|
def sendmail(attachment_filename, email_target): |
|
## currently takes a file name to be attached, finds the file in the local folder, attaches it to a new email and sends |
|
|
|
body = 'test email' |
|
|
|
item = Message(account=acc, subject='Library weeding', body=body, to_recipients=[email_target]) |
|
|
|
binary_file_content = open('./' + attachment_filename, 'rb').read() |
|
|
|
attachment = FileAttachment(name=attachment_filename, content=binary_file_content) |
|
|
|
item.attach(attachment) |
|
|
|
item.send() |
|
|
|
|
|
def checkmail(collection_code): |
|
## currently takes an attachment file name, finds the first unread message with that file attached, and downloads it to the local folder |
|
## currently working for one single email and the first attachment only, message must be marked as unread |
|
# fields = [f.name for f in Message.FIELDS if f.is_searchable] |
|
# print(fields) |
|
for index, item in enumerate(acc.inbox.all().iterator()): |
|
if item.is_read == False and len(item.attachments) >= 1: |
|
# print(item.attachments[0].content) |
|
if item.attachments[0].name == construct_ID('predictions', collection_code): |
|
with open('./%s' % construct_ID('librarian_decisions', collection_code), 'wb') as f: |
|
f.write(item.attachments[0].content) |
|
item.is_read = True |
|
item.save() |
|
break |
|
if index >= 64: |
|
break |
|
return construct_ID('librarian_decisions', collection_code) |
|
|
|
def check_if_new(item): |
|
if item.is_read == False and len(item.attachments) >= 1: |
|
return True |
|
else: |
|
return False |
|
|
|
def get_attachment(attachment): |
|
## file name reading here needs to be adjusted, currently looks for a specific file. it needs to be able to read the file name and create a new file based on the code in the file name |
|
# collection_code = attachment.name[11:21] |
|
date, collection_code, step = read_file_name(attachment.name) |
|
if 'predictions.csv' in attachment.name: |
|
# print(attachment.content) |
|
return attachment.content, collection_code |
|
|
|
def read_file_name(string_file_name): |
|
## need to dynamically find collection code, date, and possibly step name |
|
date = string_file_name[0:10] |
|
collection = re.match('^[A-Z]{1,2}_[-A-Z]{2,12}', string_file_name[11:]) |
|
step_start = len(date) + len(collection[0]) + 2 |
|
step_end = len(string_file_name) - 4 |
|
step = string_file_name[step_start:step_end] |
|
return date, collection, step |
|
|
|
def loop_inbox(): |
|
## needs functionality to check for duplicates |
|
for index, item in enumerate(acc.inbox.all().iterator()): |
|
if check_if_new(item): |
|
## only grabbing first attachment for now |
|
content, collection = get_attachment(item.attachments[0]) |
|
write_attachment(content, collection) |
|
item.is_read = True |
|
item.save() |
|
attachment = read_csv_to_list_wrapper('./data/decisions/%s' % construct_ID('librarian_decisions', collection)) |
|
# csv_reader = csv.reader(attachment) |
|
pred_index, dec_index = get_decision_and_prediction_index(attachment) |
|
print(pred_index) |
|
# translated = translate_csv_reader(attachment, pred_index) |
|
# translated = translate_csv_reader(translated, dec_index) |
|
# decision_list = compare_decisions(translated) |
|
|
|
# integrate_into_training_set(decision_list) |
|
# write_csv_from_list('data/decisions/%s' % construct_ID('librarian_final', collection), decision_list) |
|
|
|
# translated_list = translate_final(decision_list) |
|
|
|
# write_csv_from_list('data/decisions/%s' % construct_ID('librarian_translated_final', collection), translated_list) |
|
if index >= 128: |
|
## gotta stop sometime, this only checks up to the nth email |
|
break |
|
|
|
def write_attachment(content, collection_code): |
|
with open('./data/decisions/%s' % construct_ID('librarian_decisions', collection_code), 'wb') as f: |
|
f.write(content) |
|
|
|
def compare_decisions(csv_list): |
|
pred_index, dec_index = get_decision_and_prediction_index(csv_list) |
|
csv_list[0].append('final_decision') |
|
for row in csv_list[1:]: |
|
row.append(((not int(row[pred_index]))*(row[dec_index] == '0'))+((int(row[pred_index]))*(row[dec_index] == '1'))) |
|
# print(row) |
|
# if row[dec_index] == '0': |
|
# print() |
|
# row[pred_index] = not row[pred_index] |
|
return csv_list |
|
|
|
def get_decision_and_prediction_index(result_list): |
|
## prediction and decision |
|
pred = result_list[0].index('Remove from collection') |
|
dec = result_list[0].index('Do you agree?') |
|
# for index, heading in enumerate(result_list[0]): |
|
# if heading == 'Remove from collection': |
|
# pred = index |
|
# elif heading == 'Do you agree?': |
|
# dec = index |
|
return pred, dec |
|
|
|
def translate_final(csv_reader): |
|
## translates a csv.reader from human readable 'yes', 'no' to '0' and '1'. takes column number to interpolate 'yes'/'1' for no answer |
|
wrapper_list = [] |
|
column_index = len(csv_reader[0]) - 1 |
|
for row in csv_reader: |
|
wrapper_list.append(row) |
|
|
|
header = wrapper_list[0] |
|
|
|
translated = [] |
|
# print(row[column_index]) |
|
# print(header[column_index]) |
|
for row in wrapper_list: |
|
if row[column_index] != header[column_index]: |
|
## branchless function to detect a 0 or 1 and pass the correct string using boolean multiplication |
|
row[column_index] = (row[column_index] == 0)*('keep')+(row[column_index] == 1)*('remove') |
|
# print('fff') |
|
# if (row[column_index] == 0): |
|
# row[column_index] = 'keep' |
|
# elif row[column_index] == 1: |
|
# row[column_index] = 'remove' |
|
# row = [item.replace('keep', 0) for item in row] |
|
# row = [item.replace('remove', 1) for item in row] |
|
translated.append(row) |
|
return translated |
|
|
|
def integrate_into_training_set(csv_wrapper): |
|
## training_set = read_csv_to_list_wrapper('./data/training_data/global_training_data.sv') |
|
## temp training set |
|
training_set = read_csv_to_list_wrapper('./data/training_data/training_ALL_api_out.csv') |
|
final_index = csv_wrapper[0].index('final_decision') |
|
withdrawn_index_dec = csv_wrapper[0].index('Withdrawn') |
|
prediction_index = csv_wrapper[0].index('Remove from collection') |
|
decision_index = csv_wrapper[0].index('Do you agree?') |
|
withdrawn_index_train = training_set[0].index('Withdrawn') |
|
|
|
for row in csv_wrapper[1:]: |
|
row[withdrawn_index_dec] = row[final_index] |
|
row.pop(final_index) |
|
row.pop(decision_index) |
|
row.pop(prediction_index) |
|
|
|
if withdrawn_index_train == withdrawn_index_dec: |
|
training_set.append(row) |
|
else: |
|
print('mismatched Withdrawn indices') |
|
|
|
|
|
|