Skip to content

Commit 48a77be

Browse files
committed
push source code
1 parent 328028c commit 48a77be

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1316
-33
lines changed

.DS_Store

6 KB
Binary file not shown.

.gitignore

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,8 @@
1-
# Prerequisites
2-
*.d
3-
4-
# Compiled Object files
5-
*.slo
6-
*.lo
7-
*.o
8-
*.obj
9-
10-
# Precompiled Headers
11-
*.gch
12-
*.pch
13-
14-
# Compiled Dynamic libraries
15-
*.so
16-
*.dylib
17-
*.dll
18-
19-
# Fortran module files
20-
*.mod
21-
*.smod
22-
23-
# Compiled Static libraries
24-
*.lai
25-
*.la
26-
*.a
27-
*.lib
28-
29-
# Executables
30-
*.exe
31-
*.out
32-
*.app
1+
# Model files
2+
model.pkl.*
3+
4+
# Dataset
5+
english_short_corpus_small_13to20_test.txt
6+
english_short_corpus_small_4to12_test.txt
7+
english_short_corpus_small_4to12_train.txt
8+
english_short_corpus_small_4to12_valid.txt

README.md

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,55 @@
1-
# autoencoder-interpretation
1+
# How to train
2+
1. Download PubMed 20k/200k RCT numbers replaced with at sign from https://github.com/Franck-Dernoncourt/pubmed-rct .
3+
2. Put glove.6B.300d.txt, train.txt, dev.txt and test.txt into the data/ .
4+
3. cd src/ and run the following command to preprocess and generate training data.
5+
```
6+
python make_dataset.py ../data/
7+
```
8+
9+
4. Run the following command to prepapre model folder. Feel free to open the config.json to tune some hyperparameters.
10+
```
11+
mkdir ../models/your_model_folder
12+
cp ../models/hnn/config.json ../models/your_model_folder
13+
```
14+
15+
5. Run the following command to start training.
16+
```
17+
python train.py ../models/your_model_folder/
18+
```
19+
20+
6. Trained model will saved in the folder 'your_model_folder/'
21+
22+
23+
24+
# How to predict
25+
## Download pretrained model
26+
1. Run the following shell
27+
```
28+
./download.sh
29+
```
30+
2. cd src/
31+
32+
## To predict test.txt
33+
1. Run the following command to predict test.txt. You are able to change x in '--epoch x' to the best epochs you ran.
34+
```
35+
python predict.py ../models/hnn/ --epoch 7 --input_mode 1 --input_dir ../data/test.pkl --output_dir result.txt
36+
37+
or
38+
39+
python predict.py ../models/your_model_folder/ --epoch 7 --input_mode 1 --input_dir ../data/test.pkl --output_dir result.txt
40+
```
41+
42+
## To predict abstract
43+
1. Run the following command to predict input.
44+
```
45+
python predict.py ../models/hnn/ --epoch 7
46+
47+
or
48+
49+
python predict.py ../models/your_model_folder/ --epoch 7
50+
```
51+
52+
2. Type in the abstract and it will output the result. Try input the following abstract.
53+
```
54+
To evaluate the performance ( efficacy , safety and acceptability ) of a new micro-adherent absorbent dressing ( UrgoClean ) compared with a hydrofiber dressing ( Aquacel ) in the local management of venous leg ulcers , in the debridement stage .$$$A non-inferiority European randomised controlled clinical trial ( RCT ) was conducted in @ centres , on patients presenting with venous or predominantly venous , mixed aetiology leg ulcers at their sloughy stage ( with more than @ % of the wound bed covered with slough at baseline ) .$$$Patients were followed over a @-week period and assessed weekly .$$$The primary judgement criteria was the relative regression of the wound surface area after the @-week treatment period .
55+
```

data/.DS_Store

6 KB
Binary file not shown.

data/config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"train_path": "../data/english_short_corpus_small_4to12_train.txt",
3+
"test_path": "../data/english_short_corpus_small_4to12_test.txt",
4+
"valid_path": "../data/english_short_corpus_small_4to12_valid.txt",
5+
"long_test_path": "../data/english_short_corpus_small_13to20_test.txt",
6+
"word_set_path": "../data/word_set.pkl",
7+
"vocab_path": "../data/vocab.pkl"
8+
}

data/long_test.pkl

1.29 MB
Binary file not shown.

data/test.pkl

722 KB
Binary file not shown.

data/train.pkl

13.3 MB
Binary file not shown.

data/valid.pkl

1.65 MB
Binary file not shown.

data/vocab.pkl

285 KB
Binary file not shown.

data/word_set.pkl

420 KB
Binary file not shown.

learning_graph.ipynb

Lines changed: 147 additions & 0 deletions
Large diffs are not rendered by default.

models/.DS_Store

6 KB
Binary file not shown.

models/seq2seq/.DS_Store

6 KB
Binary file not shown.

models/seq2seq/config.json

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"arch": "Predictor",
3+
"train": "../data/train.pkl",
4+
"words_dict": "../data/vocab.pkl",
5+
"model_parameters": {
6+
"encoder_model_name": "EncoderRNN",
7+
"decoder_model_name": "DecoderRNN",
8+
"hidden_size": 50,
9+
"batch_size": 32,
10+
"max_epochs": 200,
11+
"max_iters_in_epoch": 1e20,
12+
"grad_accumulate_steps": 1,
13+
"num_workers": 4,
14+
"weight_decay": 1e-5,
15+
"optimizer": "Adam",
16+
"learning_rate": 1e-4,
17+
"valid": "../data/valid.pkl"
18+
}
19+
}

src/.DS_Store

6 KB
Binary file not shown.
5.14 KB
Binary file not shown.
4.27 KB
Binary file not shown.
2.33 KB
Binary file not shown.
4.1 KB
Binary file not shown.
2.21 KB
Binary file not shown.
4 KB
Binary file not shown.
3.51 KB
Binary file not shown.
2.07 KB
Binary file not shown.
2.92 KB
Binary file not shown.
2.6 KB
Binary file not shown.
4.15 KB
Binary file not shown.
2.96 KB
Binary file not shown.

src/base_predictor.py

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
import torch
2+
import torch.utils.data.dataloader
3+
from torch.utils.data.dataloader import default_collate
4+
from torch.utils import data as Data
5+
from tqdm import tqdm
6+
import pdb
7+
8+
class BasePredictor():
9+
def __init__(self,
10+
batch_size=10,
11+
max_epochs=10,
12+
valid=None,
13+
device=None,
14+
metrics={},
15+
learning_rate=1e-3,
16+
max_iters_in_epoch=1e20,
17+
grad_accumulate_steps=1,
18+
num_workers=2):
19+
20+
self.batch_size = batch_size
21+
self.max_epochs = max_epochs
22+
self.valid = valid
23+
self.metrics = metrics
24+
self.learning_rate = learning_rate
25+
self.max_iters_in_epoch = max_iters_in_epoch
26+
self.grad_accumulate_steps = grad_accumulate_steps
27+
self.num_workers = num_workers
28+
29+
if device is not None:
30+
self.device = torch.device(device)
31+
else:
32+
self.device = torch.device('cuda:0' if torch.cuda.is_available()
33+
else 'cpu')
34+
35+
self.epoch = 0
36+
37+
def fit_dataset(self, data, collate_fn=default_collate, callbacks=[]):
38+
# Start the training loop.
39+
while self.epoch < self.max_epochs:
40+
41+
# train and evaluate train score
42+
print('training %i' % self.epoch)
43+
44+
dataloader = Data.DataLoader(dataset = data,
45+
batch_size = self.batch_size,
46+
shuffle = True,
47+
collate_fn = collate_fn,
48+
num_workers = self.num_workers)
49+
50+
# train epoch
51+
log_train = self._run_epoch(dataloader, True)
52+
53+
# evaluate valid score
54+
if self.valid is not None:
55+
print('dev evaluating %i' % self.epoch)
56+
dataloader = Data.DataLoader(dataset = self.valid,
57+
batch_size = self.batch_size,
58+
shuffle = False,
59+
collate_fn = collate_fn,
60+
num_workers = self.num_workers)
61+
62+
log_valid = self._run_epoch(dataloader, False)
63+
else:
64+
log_valid = None
65+
66+
for callback in callbacks:
67+
callback.on_epoch_end(log_train, log_valid, self)
68+
69+
self.epoch += 1
70+
71+
def predict_dataset(self, data,
72+
collate_fn=default_collate,
73+
batch_size=None,
74+
predict_fn=None):
75+
if batch_size is None:
76+
batch_size = self.batch_size
77+
if predict_fn is None:
78+
predict_fn = self._predict_batch
79+
80+
# set model to eval mode
81+
self.encoder.eval()
82+
self.decoder.eval()
83+
84+
# make dataloader
85+
dataloader = Data.DataLoader(dataset = data,
86+
batch_size = self.batch_size,
87+
shuffle = False,
88+
collate_fn = collate_fn,
89+
num_workers = self.num_workers)
90+
91+
ys_ = []
92+
with torch.no_grad():
93+
for batch in tqdm(dataloader):
94+
batch_y_ = predict_fn(batch)
95+
ys_.append(batch_y_)
96+
97+
return ys_
98+
99+
def save(self, path):
100+
torch.save({
101+
'epoch': self.epoch + 1,
102+
'encoder': self.encoder.state_dict(),
103+
'decoder': self.decoder.state_dict(),
104+
'optimizer': self.optimizer.state_dict()
105+
}, path)
106+
107+
def load(self, path):
108+
self.encoder.load_state_dict(torch.load(path)['encoder'])
109+
self.decoder.load_state_dict(torch.load(path)['decoder'])
110+
self.optimizer.load_state_dict(torch.load(path)['optimizer'])
111+
self.epoch = torch.load(path)['epoch']
112+
113+
def _run_epoch(self, dataloader, training):
114+
self.encoder.train(training)
115+
self.decoder.train(training)
116+
117+
loss = 0
118+
119+
# reset metric accumulators
120+
for metric in self.metrics:
121+
metric.reset()
122+
123+
if training:
124+
iter_in_epoch = min(len(dataloader), self.max_iters_in_epoch)
125+
description = 'training'
126+
else:
127+
iter_in_epoch = len(dataloader)
128+
description = 'evaluating'
129+
130+
# run batches
131+
trange = tqdm(enumerate(dataloader),
132+
total=iter_in_epoch,
133+
desc=description)
134+
for i, batch in trange:
135+
if training and i >= iter_in_epoch:
136+
break
137+
138+
if training:
139+
output, batch_loss = \
140+
self._run_iter(batch, training)
141+
142+
batch_loss /= self.grad_accumulate_steps
143+
144+
# accumulate gradient - zero_grad
145+
if i % self.grad_accumulate_steps == 0:
146+
self.optimizer.zero_grad()
147+
148+
batch_loss.backward()
149+
150+
# accumulate gradient - step
151+
if (i + 1) % self.grad_accumulate_steps == 0:
152+
self.optimizer.step()
153+
else:
154+
with torch.no_grad():
155+
output, batch_loss = \
156+
self._run_iter(batch, training)
157+
158+
# accumulate loss and metric scores
159+
loss += batch_loss.item()
160+
161+
for metric in self.metrics:
162+
metric.update(output, batch)
163+
trange.set_postfix(
164+
loss=loss / (i + 1),
165+
**{m.name: m.print_score() for m in self.metrics if m.name == 'Accuracy'})
166+
167+
# calculate average loss and metrics
168+
loss /= iter_in_epoch
169+
170+
epoch_log = {}
171+
epoch_log['loss'] = float(loss)
172+
for metric in self.metrics:
173+
score = metric.get_score()
174+
print('{}: {} '.format(metric.name, score))
175+
epoch_log[metric.name] = score
176+
177+
print('loss=%f\n' % loss)
178+
return epoch_log
179+
180+
def _run_iter(self, batch, training):
181+
""" Run iteration for training.
182+
183+
Args:
184+
batch (dict)
185+
training (bool)
186+
187+
Returns:
188+
predicts: Prediction of the batch.
189+
loss (FloatTensor): Loss of the batch.
190+
"""
191+
pass
192+
193+
def _predict_batch(self, batch):
194+
""" Run iteration for predicting.
195+
196+
Args:
197+
batch (dict)
198+
199+
Returns:
200+
predicts: Prediction of the batch.
201+
"""
202+
pass

src/callbacks.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import math
2+
import json
3+
import pickle
4+
5+
6+
class Callback:
7+
def __init__():
8+
pass
9+
10+
def on_epoch_end(log_train, log_valid, model):
11+
pass
12+
13+
14+
class MetricsLogger(Callback):
15+
def __init__(self, log_dest):
16+
self.history = {
17+
'train': [],
18+
'valid': []
19+
}
20+
self.log_dest = log_dest
21+
22+
def on_epoch_end(self, log_train, log_valid, model):
23+
log_train['epoch'] = model.epoch
24+
log_valid['epoch'] = model.epoch
25+
self.history['train'].append(log_train)
26+
self.history['valid'].append(log_valid)
27+
with open(self.log_dest, 'w') as f:
28+
json.dump(self.history, f, indent=' ')
29+
30+
def load(self, epoch):
31+
with open(self.log_dest, 'r') as f:
32+
self.history = json.loads(f.read())
33+
self.history['train'] = self.history['train'][:epoch + 1]
34+
self.history['valid'] = self.history['valid'][:epoch + 1]
35+
36+
class ModelCheckpoint(Callback):
37+
def __init__(self, filepath,
38+
monitor='loss',
39+
verbose=0,
40+
mode='min'):
41+
self._filepath = filepath
42+
self._verbose = verbose
43+
self._monitor = monitor
44+
self._best = math.inf if mode == 'min' else - math.inf
45+
self._mode = mode
46+
47+
def on_epoch_end(self, log_train, log_valid, model):
48+
score = log_valid[self._monitor]
49+
if self._mode == 'min':
50+
if score < self._best:
51+
self._best = score
52+
model.save(self._filepath)
53+
if self._verbose > 0:
54+
print('Best model saved (%f)' % score)
55+
56+
elif self._mode == 'max':
57+
if score > self._best:
58+
self._best = score
59+
model.save('{}'.format(self._filepath, model.epoch)) # save only best model ignore epoch
60+
if self._verbose > 0:
61+
print('Best model saved (%f)' % score)
62+
63+
elif self._mode == 'all':
64+
model.save('{}.{}'
65+
.format(self._filepath, model.epoch))

0 commit comments

Comments
 (0)