ML-weeding-prediction/email_test.py

import email, smtplib, os.path
from exchangelib import Credentials, Account, DELEGATE, Configuration, FileAttachment, ItemAttachment, Message, CalendarItem, HTMLBody
from cont_modules import *
from creds import creds
from config1 import acc, config
# test again


def sendmail(attachment_filename, email_target): 
    ## currently takes a file name to be attached, finds the file in the local folder, attaches it to a new email and sends
    
    body = 'test email'

    item = Message(account=acc, subject='Library weeding', body=body, to_recipients=[email_target])

    binary_file_content = open('./' + attachment_filename, 'rb').read()

    attachment = FileAttachment(name=attachment_filename, content=binary_file_content)

    item.attach(attachment)

    item.send()


def checkmail(collection_code): 
## currently takes an attachment file name, finds the first unread message with that file attached, and downloads it to the local folder
## currently working for one single email and the first attachment only, message must be marked as unread
    # fields = [f.name for f in Message.FIELDS if f.is_searchable]
    # print(fields)
    for index, item in enumerate(acc.inbox.all().iterator()):
        if item.is_read == False and len(item.attachments) >= 1:
            # print(item.attachments[0].content)
            if item.attachments[0].name == construct_ID('predictions', collection_code):
                with open('./%s' % construct_ID('librarian_decisions', collection_code), 'wb') as f:
                    f.write(item.attachments[0].content)
                item.is_read = True
                item.save()
                break
        if index >= 64:
            break
    return construct_ID('librarian_decisions', collection_code)

def check_if_new(item):
    if item.is_read == False and len(item.attachments) >= 1:
        return True
    else:
        return False

def get_attachment(attachment):
    ## file name reading here needs to be adjusted, currently looks for a specific file. it needs to be able to read the file name and create a new file based on the code in the file name
    # collection_code = attachment.name[11:21]
    date, collection_code, step = read_file_name(attachment.name)
    if 'predictions.csv' in attachment.name:
        # print(attachment.content)
        return attachment.content, collection_code

def read_file_name(string_file_name):
    ## need to dynamically find collection code, date, and possibly step name
    date = string_file_name[0:10]
    collection = re.match('^[A-Z]{1,2}_[-A-Z]{2,12}', string_file_name[11:])
    step_start = len(date) + len(collection[0]) + 2
    step_end = len(string_file_name) - 4
    step = string_file_name[step_start:step_end]
    return date, collection, step

def loop_inbox():
    ## needs functionality to check for duplicates
    for index, item in enumerate(acc.inbox.all().iterator()):
        if check_if_new(item):
            ## only grabbing first attachment for now
            content, collection = get_attachment(item.attachments[0])
            write_attachment(content, collection)
            item.is_read = True
            item.save()
            attachment = read_csv_to_list_wrapper('./data/decisions/%s' % construct_ID('librarian_decisions', collection))
            # csv_reader = csv.reader(attachment)
            pred_index, dec_index = get_decision_and_prediction_index(attachment)
            print(pred_index)
            # translated = translate_csv_reader(attachment, pred_index)
            # translated = translate_csv_reader(translated, dec_index)
            # decision_list = compare_decisions(translated)

            # integrate_into_training_set(decision_list)
            # write_csv_from_list('data/decisions/%s' % construct_ID('librarian_final', collection), decision_list)

            # translated_list = translate_final(decision_list)
            
            # write_csv_from_list('data/decisions/%s' % construct_ID('librarian_translated_final', collection), translated_list)
        if index >= 128:
        ## gotta stop sometime, this only checks up to the nth email
            break

def write_attachment(content, collection_code):
    with open('./data/decisions/%s' % construct_ID('librarian_decisions', collection_code), 'wb') as f:
        f.write(content)

def compare_decisions(csv_list):
    pred_index, dec_index = get_decision_and_prediction_index(csv_list)
    csv_list[0].append('final_decision')
    for row in csv_list[1:]:
        row.append(((not int(row[pred_index]))*(row[dec_index] == '0'))+((int(row[pred_index]))*(row[dec_index] == '1')))
        # print(row)
        # if row[dec_index] == '0':
        #     print()
            # row[pred_index] = not row[pred_index]
    return csv_list

def get_decision_and_prediction_index(result_list):
    ## prediction and decision
    pred = result_list[0].index('Remove from collection')
    dec = result_list[0].index('Do you agree?')
    # for index, heading in enumerate(result_list[0]):
    #     if heading == 'Remove from collection':
    #         pred = index
    #     elif heading == 'Do you agree?':
    #         dec = index
    return pred, dec

def translate_final(csv_reader):
    ## translates a csv.reader from human readable 'yes', 'no' to '0' and '1'. takes column number to interpolate 'yes'/'1' for no answer
    wrapper_list = []
    column_index = len(csv_reader[0]) - 1
    for row in csv_reader:
        wrapper_list.append(row)

    header = wrapper_list[0]

    translated = []
    # print(row[column_index])
    # print(header[column_index])
    for row in wrapper_list:
        if row[column_index] != header[column_index]:
            ## branchless function to detect a 0 or 1 and pass the correct string using boolean multiplication
            row[column_index] = (row[column_index] == 0)*('keep')+(row[column_index] == 1)*('remove')
            # print('fff')
            # if (row[column_index] == 0):
            #     row[column_index] = 'keep'
            # elif row[column_index] == 1:
            #     row[column_index] = 'remove'
            # row = [item.replace('keep', 0) for item in row]
            # row = [item.replace('remove', 1) for item in row]
        translated.append(row)
    return translated

def integrate_into_training_set(csv_wrapper):
    ## training_set = read_csv_to_list_wrapper('./data/training_data/global_training_data.sv')
    ## temp training set
    training_set = read_csv_to_list_wrapper('./data/training_data/training_ALL_api_out.csv')
    final_index = csv_wrapper[0].index('final_decision')
    withdrawn_index_dec = csv_wrapper[0].index('Withdrawn')
    prediction_index = csv_wrapper[0].index('Remove from collection')
    decision_index = csv_wrapper[0].index('Do you agree?')
    withdrawn_index_train = training_set[0].index('Withdrawn')

    for row in csv_wrapper[1:]:
        row[withdrawn_index_dec] = row[final_index]
        row.pop(final_index)
        row.pop(decision_index)
        row.pop(prediction_index)

        if withdrawn_index_train == withdrawn_index_dec:
            training_set.append(row)
        else:
            print('mismatched Withdrawn indices')
adding to public repo 3 years ago			`import email, smtplib, os.path`
			`from exchangelib import Credentials, Account, DELEGATE, Configuration, FileAttachment, ItemAttachment, Message, CalendarItem, HTMLBody`
			`from cont_modules import *`
			`from creds import creds`
			`from config1 import acc, config`
			`# test again`


			`def sendmail(attachment_filename, email_target):`
			`## currently takes a file name to be attached, finds the file in the local folder, attaches it to a new email and sends`

			`body = 'test email'`

			`item = Message(account=acc, subject='Library weeding', body=body, to_recipients=[email_target])`

			`binary_file_content = open('./' + attachment_filename, 'rb').read()`

			`attachment = FileAttachment(name=attachment_filename, content=binary_file_content)`

			`item.attach(attachment)`

			`item.send()`


			`def checkmail(collection_code):`
			`## currently takes an attachment file name, finds the first unread message with that file attached, and downloads it to the local folder`
			`## currently working for one single email and the first attachment only, message must be marked as unread`
			`# fields = [f.name for f in Message.FIELDS if f.is_searchable]`
			`# print(fields)`
			`for index, item in enumerate(acc.inbox.all().iterator()):`
			`if item.is_read == False and len(item.attachments) >= 1:`
			`# print(item.attachments[0].content)`
			`if item.attachments[0].name == construct_ID('predictions', collection_code):`
			`with open('./%s' % construct_ID('librarian_decisions', collection_code), 'wb') as f:`
			`f.write(item.attachments[0].content)`
			`item.is_read = True`
			`item.save()`
			`break`
			`if index >= 64:`
			`break`
			`return construct_ID('librarian_decisions', collection_code)`

			`def check_if_new(item):`
			`if item.is_read == False and len(item.attachments) >= 1:`
			`return True`
			`else:`
			`return False`

			`def get_attachment(attachment):`
			`## file name reading here needs to be adjusted, currently looks for a specific file. it needs to be able to read the file name and create a new file based on the code in the file name`
			`# collection_code = attachment.name[11:21]`
			`date, collection_code, step = read_file_name(attachment.name)`
			`if 'predictions.csv' in attachment.name:`
			`# print(attachment.content)`
			`return attachment.content, collection_code`

			`def read_file_name(string_file_name):`
			`## need to dynamically find collection code, date, and possibly step name`
			`date = string_file_name[0:10]`
			`collection = re.match('^[A-Z]{1,2}_[-A-Z]{2,12}', string_file_name[11:])`
			`step_start = len(date) + len(collection[0]) + 2`
			`step_end = len(string_file_name) - 4`
			`step = string_file_name[step_start:step_end]`
			`return date, collection, step`

			`def loop_inbox():`
			`## needs functionality to check for duplicates`
			`for index, item in enumerate(acc.inbox.all().iterator()):`
			`if check_if_new(item):`
			`## only grabbing first attachment for now`
			`content, collection = get_attachment(item.attachments[0])`
			`write_attachment(content, collection)`
			`item.is_read = True`
			`item.save()`
			`attachment = read_csv_to_list_wrapper('./data/decisions/%s' % construct_ID('librarian_decisions', collection))`
			`# csv_reader = csv.reader(attachment)`
			`pred_index, dec_index = get_decision_and_prediction_index(attachment)`
			`print(pred_index)`
			`# translated = translate_csv_reader(attachment, pred_index)`
			`# translated = translate_csv_reader(translated, dec_index)`
			`# decision_list = compare_decisions(translated)`

			`# integrate_into_training_set(decision_list)`
			`# write_csv_from_list('data/decisions/%s' % construct_ID('librarian_final', collection), decision_list)`

			`# translated_list = translate_final(decision_list)`

			`# write_csv_from_list('data/decisions/%s' % construct_ID('librarian_translated_final', collection), translated_list)`
			`if index >= 128:`
			`## gotta stop sometime, this only checks up to the nth email`
			`break`

			`def write_attachment(content, collection_code):`
			`with open('./data/decisions/%s' % construct_ID('librarian_decisions', collection_code), 'wb') as f:`
			`f.write(content)`

			`def compare_decisions(csv_list):`
			`pred_index, dec_index = get_decision_and_prediction_index(csv_list)`
			`csv_list[0].append('final_decision')`
			`for row in csv_list[1:]:`
			`row.append(((not int(row[pred_index]))(row[dec_index] == '0'))+((int(row[pred_index]))(row[dec_index] == '1')))`
			`# print(row)`
			`# if row[dec_index] == '0':`
			`# print()`
			`# row[pred_index] = not row[pred_index]`
			`return csv_list`

			`def get_decision_and_prediction_index(result_list):`
			`## prediction and decision`
			`pred = result_list[0].index('Remove from collection')`
			`dec = result_list[0].index('Do you agree?')`
			`# for index, heading in enumerate(result_list[0]):`
			`# if heading == 'Remove from collection':`
			`# pred = index`
			`# elif heading == 'Do you agree?':`
			`# dec = index`
			`return pred, dec`

			`def translate_final(csv_reader):`
			`## translates a csv.reader from human readable 'yes', 'no' to '0' and '1'. takes column number to interpolate 'yes'/'1' for no answer`
			`wrapper_list = []`
			`column_index = len(csv_reader[0]) - 1`
			`for row in csv_reader:`
			`wrapper_list.append(row)`

			`header = wrapper_list[0]`

			`translated = []`
			`# print(row[column_index])`
			`# print(header[column_index])`
			`for row in wrapper_list:`
			`if row[column_index] != header[column_index]:`
			`## branchless function to detect a 0 or 1 and pass the correct string using boolean multiplication`
			`row[column_index] = (row[column_index] == 0)('keep')+(row[column_index] == 1)('remove')`
			`# print('fff')`
			`# if (row[column_index] == 0):`
			`# row[column_index] = 'keep'`
			`# elif row[column_index] == 1:`
			`# row[column_index] = 'remove'`
			`# row = [item.replace('keep', 0) for item in row]`
			`# row = [item.replace('remove', 1) for item in row]`
			`translated.append(row)`
			`return translated`

			`def integrate_into_training_set(csv_wrapper):`
			`## training_set = read_csv_to_list_wrapper('./data/training_data/global_training_data.sv')`
			`## temp training set`
			`training_set = read_csv_to_list_wrapper('./data/training_data/training_ALL_api_out.csv')`
			`final_index = csv_wrapper[0].index('final_decision')`
			`withdrawn_index_dec = csv_wrapper[0].index('Withdrawn')`
			`prediction_index = csv_wrapper[0].index('Remove from collection')`
			`decision_index = csv_wrapper[0].index('Do you agree?')`
			`withdrawn_index_train = training_set[0].index('Withdrawn')`

			`for row in csv_wrapper[1:]:`
			`row[withdrawn_index_dec] = row[final_index]`
			`row.pop(final_index)`
			`row.pop(decision_index)`
			`row.pop(prediction_index)`

			`if withdrawn_index_train == withdrawn_index_dec:`
			`training_set.append(row)`
			`else:`
			`print('mismatched Withdrawn indices')`