import email, smtplib, os.path from exchangelib import Credentials, Account, DELEGATE, Configuration, FileAttachment, ItemAttachment, Message, CalendarItem, HTMLBody from cont_modules import * from creds import creds from config1 import acc, config # test again def sendmail(attachment_filename, email_target): ## currently takes a file name to be attached, finds the file in the local folder, attaches it to a new email and sends body = 'test email' item = Message(account=acc, subject='Library weeding', body=body, to_recipients=[email_target]) binary_file_content = open('./' + attachment_filename, 'rb').read() attachment = FileAttachment(name=attachment_filename, content=binary_file_content) item.attach(attachment) item.send() def checkmail(collection_code): ## currently takes an attachment file name, finds the first unread message with that file attached, and downloads it to the local folder ## currently working for one single email and the first attachment only, message must be marked as unread # fields = [f.name for f in Message.FIELDS if f.is_searchable] # print(fields) for index, item in enumerate(acc.inbox.all().iterator()): if item.is_read == False and len(item.attachments) >= 1: # print(item.attachments[0].content) if item.attachments[0].name == construct_ID('predictions', collection_code): with open('./%s' % construct_ID('librarian_decisions', collection_code), 'wb') as f: f.write(item.attachments[0].content) item.is_read = True item.save() break if index >= 64: break return construct_ID('librarian_decisions', collection_code) def check_if_new(item): if item.is_read == False and len(item.attachments) >= 1: return True else: return False def get_attachment(attachment): ## file name reading here needs to be adjusted, currently looks for a specific file. it needs to be able to read the file name and create a new file based on the code in the file name # collection_code = attachment.name[11:21] date, collection_code, step = read_file_name(attachment.name) if 'predictions.csv' in attachment.name: # print(attachment.content) return attachment.content, collection_code def read_file_name(string_file_name): ## need to dynamically find collection code, date, and possibly step name date = string_file_name[0:10] collection = re.match('^[A-Z]{1,2}_[-A-Z]{2,12}', string_file_name[11:]) step_start = len(date) + len(collection[0]) + 2 step_end = len(string_file_name) - 4 step = string_file_name[step_start:step_end] return date, collection, step def loop_inbox(): ## needs functionality to check for duplicates for index, item in enumerate(acc.inbox.all().iterator()): if check_if_new(item): ## only grabbing first attachment for now content, collection = get_attachment(item.attachments[0]) write_attachment(content, collection) item.is_read = True item.save() attachment = read_csv_to_list_wrapper('./data/decisions/%s' % construct_ID('librarian_decisions', collection)) # csv_reader = csv.reader(attachment) pred_index, dec_index = get_decision_and_prediction_index(attachment) print(pred_index) # translated = translate_csv_reader(attachment, pred_index) # translated = translate_csv_reader(translated, dec_index) # decision_list = compare_decisions(translated) # integrate_into_training_set(decision_list) # write_csv_from_list('data/decisions/%s' % construct_ID('librarian_final', collection), decision_list) # translated_list = translate_final(decision_list) # write_csv_from_list('data/decisions/%s' % construct_ID('librarian_translated_final', collection), translated_list) if index >= 128: ## gotta stop sometime, this only checks up to the nth email break def write_attachment(content, collection_code): with open('./data/decisions/%s' % construct_ID('librarian_decisions', collection_code), 'wb') as f: f.write(content) def compare_decisions(csv_list): pred_index, dec_index = get_decision_and_prediction_index(csv_list) csv_list[0].append('final_decision') for row in csv_list[1:]: row.append(((not int(row[pred_index]))*(row[dec_index] == '0'))+((int(row[pred_index]))*(row[dec_index] == '1'))) # print(row) # if row[dec_index] == '0': # print() # row[pred_index] = not row[pred_index] return csv_list def get_decision_and_prediction_index(result_list): ## prediction and decision pred = result_list[0].index('Remove from collection') dec = result_list[0].index('Do you agree?') # for index, heading in enumerate(result_list[0]): # if heading == 'Remove from collection': # pred = index # elif heading == 'Do you agree?': # dec = index return pred, dec def translate_final(csv_reader): ## translates a csv.reader from human readable 'yes', 'no' to '0' and '1'. takes column number to interpolate 'yes'/'1' for no answer wrapper_list = [] column_index = len(csv_reader[0]) - 1 for row in csv_reader: wrapper_list.append(row) header = wrapper_list[0] translated = [] # print(row[column_index]) # print(header[column_index]) for row in wrapper_list: if row[column_index] != header[column_index]: ## branchless function to detect a 0 or 1 and pass the correct string using boolean multiplication row[column_index] = (row[column_index] == 0)*('keep')+(row[column_index] == 1)*('remove') # print('fff') # if (row[column_index] == 0): # row[column_index] = 'keep' # elif row[column_index] == 1: # row[column_index] = 'remove' # row = [item.replace('keep', 0) for item in row] # row = [item.replace('remove', 1) for item in row] translated.append(row) return translated def integrate_into_training_set(csv_wrapper): ## training_set = read_csv_to_list_wrapper('./data/training_data/global_training_data.sv') ## temp training set training_set = read_csv_to_list_wrapper('./data/training_data/training_ALL_api_out.csv') final_index = csv_wrapper[0].index('final_decision') withdrawn_index_dec = csv_wrapper[0].index('Withdrawn') prediction_index = csv_wrapper[0].index('Remove from collection') decision_index = csv_wrapper[0].index('Do you agree?') withdrawn_index_train = training_set[0].index('Withdrawn') for row in csv_wrapper[1:]: row[withdrawn_index_dec] = row[final_index] row.pop(final_index) row.pop(decision_index) row.pop(prediction_index) if withdrawn_index_train == withdrawn_index_dec: training_set.append(row) else: print('mismatched Withdrawn indices')