12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- import os
- import pickle
- def load_data(path):
- """
- Load Dataset from File
- """
- input_file = os.path.join(path)
- with open(input_file, "r") as f:
- data = f.read()
- return data
- def preprocess_and_save_data(dataset_path, token_lookup, create_lookup_tables):
- """
- Preprocess Text Data
- """
- text = load_data(dataset_path)
-
-
- text = text[81:]
- token_dict = token_lookup()
- for key, token in token_dict.items():
- text = text.replace(key, ' {} '.format(token))
- text = text.lower()
- text = text.split()
- vocab_to_int, int_to_vocab = create_lookup_tables(text)
- int_text = [vocab_to_int[word] for word in text]
- pickle.dump((int_text, vocab_to_int, int_to_vocab, token_dict), open('preprocess.p', 'wb'))
- def load_preprocess():
- """
- Load the Preprocessed Training data and return them in batches of <batch_size> or less
- """
- return pickle.load(open('preprocess.p', mode='rb'))
- def save_params(params):
- """
- Save parameters to file
- """
- pickle.dump(params, open('params.p', 'wb'))
- def load_params():
- """
- Load parameters from file
- """
- return pickle.load(open('params.p', mode='rb'))
|