Skip to content

Commit b7f00d9

Browse files
committed
Add preprocessing actions and replace \n\n with \c\c to meet the embeddings
1 parent a9c020d commit b7f00d9

14 files changed

+143
-7
lines changed

primer/run_bash/cloudflared

8.59 MB
Binary file not shown.

primer/run_bash/cloudflared.log

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{"level":"info","time":"2022-04-18T08:33:02Z","message":"Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps"}
2+
{"level":"info","time":"2022-04-18T08:33:02Z","message":"Requesting new quick Tunnel on trycloudflare.com..."}
3+
{"level":"info","time":"2022-04-18T08:33:04Z","message":"Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps"}
4+
{"level":"info","time":"2022-04-18T08:33:04Z","message":"Requesting new quick Tunnel on trycloudflare.com..."}
5+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"+--------------------------------------------------------------------------------------------+"}
6+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"| Your quick Tunnel has been created! Visit it at (it may take some time to be reachable): |"}
7+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"| https://counties-integrate-risk-basin.trycloudflare.com |"}
8+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"+--------------------------------------------------------------------------------------------+"}
9+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"Cannot determine default configuration path. No file [config.yml config.yaml] in [~/.cloudflared ~/.cloudflare-warp ~/cloudflare-warp /etc/cloudflared /usr/local/etc/cloudflared]"}
10+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"Version 2022.4.1"}
11+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"GOOS: linux, GOVersion: go1.17.5, GoArch: amd64"}
12+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"Settings: map[logfile:./cloudflared.log metrics:localhost:45678 protocol:quic url:ssh://localhost:22]"}
13+
{"level":"info","autoupdateFreq":86400000,"time":"2022-04-18T08:33:07Z","message":"Autoupdate frequency is set"}
14+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"Generated Connector ID: 6d7578f4-8393-4b89-ad9d-b07470488c00"}
15+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"Initial protocol quic"}
16+
{"level":"info","time":"2022-04-18T08:33:07Z","message":"Starting metrics server on 127.0.0.1:45678/metrics"}
17+
{"level":"info","connIndex":0,"location":"AMS","time":"2022-04-18T08:33:08Z","message":"Connection 9db178f8-c2a4-4578-a4d6-c571784c418b registered"}
18+
{"level":"info","connIndex":1,"location":"LHR","time":"2022-04-18T08:33:08Z","message":"Connection 3a1967a6-2407-49c5-9cfe-d06ac78588ef registered"}
19+
{"level":"info","connIndex":2,"location":"AMS","time":"2022-04-18T08:33:09Z","message":"Connection 22cef34e-9d3b-4c8b-bbb0-af452219ccd2 registered"}
20+
{"level":"info","connIndex":3,"location":"LHR","time":"2022-04-18T08:33:10Z","message":"Connection 6d4bbbf6-1180-4d29-918f-11cd26014df0 registered"}
21+
{"level":"info","time":"2022-04-18T09:06:02Z","message":"Initiating graceful shutdown due to signal interrupt ..."}
22+
{"level":"info","connIndex":0,"time":"2022-04-18T09:06:02Z","message":"Unregistered tunnel connection"}
23+
{"level":"info","connIndex":2,"time":"2022-04-18T09:06:02Z","message":"Unregistered tunnel connection"}
24+
{"level":"info","connIndex":1,"time":"2022-04-18T09:06:03Z","message":"Unregistered tunnel connection"}
25+
{"level":"info","connIndex":3,"time":"2022-04-18T09:06:03Z","message":"Unregistered tunnel connection"}
26+
{"level":"info","time":"2022-04-18T09:06:03Z","message":"Tunnel server stopped"}
27+
{"level":"info","time":"2022-04-18T09:06:03Z","message":"Metrics server stopped"}

primer/run_bash/fine_tune_primer.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,6 @@ CUDA_LAUNCH_BLOCKING=1 python primer_main_fs.py \
2929
--rand_seed 42 \
3030
--saveTopK 5 \
3131
--test_imediate \
32-
--test_batch_size 8 \
32+
--test_batch_size 4 \
3333
--grad_ckpt \
3434
> ../finetune_${DATA_NAME}_${MODEL_NAME}_${RAND_SEED}.out 2>&1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
{
2+
"gpus": 0,
3+
"accelerator": null,
4+
"mode": "train",
5+
"debug_mode": false,
6+
"compute_rouge": false,
7+
"saveRouge": false,
8+
"progress_bar_refresh_rate": 1,
9+
"model_path": "./longformer_summ_multinews/",
10+
"ckpt_path": null,
11+
"saveTopK": 3,
12+
"resume_ckpt": null,
13+
"data_path": "../dataset/multi_news",
14+
"dataset_name": "multi_news",
15+
"tokenizer": "facebook/bart-base",
16+
"num_workers": 1,
17+
"batch_size": 4,
18+
"max_length_input": 4096,
19+
"max_length_tgt": 1024,
20+
"min_length_tgt": 0,
21+
"join_method": "concat_start_wdoc_global",
22+
"attention_dropout": 0.1,
23+
"attention_mode": "sliding_chunks",
24+
"attention_window": 512,
25+
"label_smoothing": 0.0,
26+
"adafactor": false,
27+
"fp32": false,
28+
"grad_ckpt": false,
29+
"rand_seed": 0,
30+
"primer_path": "../PRIMER_model/",
31+
"limit_valid_batches": null,
32+
"lr": 3e-05,
33+
"warmup_steps": 1000,
34+
"report_steps": 50,
35+
"val_check_interval": 1.0,
36+
"accum_data_per_step": 16,
37+
"total_steps": 50000,
38+
"num_train_data": -1,
39+
"remove_masks": false,
40+
"fix_lr": false,
41+
"test_imediate": false,
42+
"fewshot": false,
43+
"limit_test_batches": null,
44+
"beam_size": 1,
45+
"length_penalty": 1.0,
46+
"mask_num": 0,
47+
"test_batch_size": -1,
48+
"applyTriblck": false,
49+
"acc_batch": 4
50+
}

primer/run_bash/nohup.out

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nohup: ignoring input
Binary file not shown.
Binary file not shown.

primer/script/check_cuda.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import torch
2+
# print(torch.rand(1, device="cuda"))
3+
# print(torch.rand(1, device="cpu"))
4+
print(torch.cuda.is_available())
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{
2+
"gpus": 0,
3+
"accelerator": null,
4+
"mode": "train",
5+
"debug_mode": false,
6+
"compute_rouge": false,
7+
"saveRouge": false,
8+
"progress_bar_refresh_rate": 1,
9+
"model_path": "./longformer_summ_multinews/",
10+
"ckpt_path": null,
11+
"saveTopK": 3,
12+
"resume_ckpt": null,
13+
"data_path": "../dataset/multi_news",
14+
"dataset_name": "multi_news",
15+
"tokenizer": "facebook/bart-base",
16+
"num_workers": 8,
17+
"batch_size": 4,
18+
"max_length_input": 4096,
19+
"max_length_tgt": 1024,
20+
"min_length_tgt": 0,
21+
"join_method": "concat_start_wdoc_global",
22+
"attention_dropout": 0.1,
23+
"attention_mode": "sliding_chunks",
24+
"attention_window": 512,
25+
"label_smoothing": 0.0,
26+
"adafactor": false,
27+
"fp32": false,
28+
"grad_ckpt": false,
29+
"rand_seed": 0,
30+
"primer_path": "../PRIMER/",
31+
"limit_valid_batches": null,
32+
"lr": 3e-05,
33+
"warmup_steps": 1000,
34+
"report_steps": 50,
35+
"val_check_interval": 1.0,
36+
"accum_data_per_step": 16,
37+
"total_steps": 50000,
38+
"num_train_data": -1,
39+
"remove_masks": false,
40+
"fix_lr": false,
41+
"test_imediate": false,
42+
"fewshot": false,
43+
"eval_steps": 2500,
44+
"limit_test_batches": null,
45+
"beam_size": 1,
46+
"length_penalty": 1.0,
47+
"mask_num": 0,
48+
"test_batch_size": -1,
49+
"applyTriblck": false,
50+
"acc_batch": 4
51+
}

primer/script/primer_main_fs.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,12 @@ def __init__(self, args):
7070

7171
self.use_ddp = args.accelerator == "ddp"
7272
self.docsep_token_id = self.tokenizer.convert_tokens_to_ids("<doc-sep>")
73-
# self.keep_token_id = self.tokenizer.convert_tokens_to_ids("<KEEP>")
74-
# self.add_token_id = self.tokenizer.convert_tokens_to_ids("<ADD>")
75-
# self.sub_token_id = self.tokenizer.convert_tokens_to_ids("<SUB")
7673
self.tokenizer.add_special_tokens({'additional_special_tokens': ["<KEEP>", "<ADD>", "<SUB>"]})
7774
self.model.resize_token_embeddings(len(self.tokenizer))
75+
76+
self.keep_token_id = self.tokenizer.convert_tokens_to_ids("<KEEP>")
77+
self.add_token_id = self.tokenizer.convert_tokens_to_ids("<ADD>")
78+
self.sub_token_id = self.tokenizer.convert_tokens_to_ids("<SUB>")
7879

7980
# self.special_tokens_dict = {'additional_special_tokens': ['<KEEP>', '<ADD>', '<SUB>']}
8081
# num_added_toks = self.tokenizer.add_special_tokens(special_tokens_dict)
@@ -102,9 +103,9 @@ def _prepare_input(self, input_ids):
102103

103104
if self.args.join_method == "concat_start_wdoc_global":
104105
attention_mask[input_ids == self.docsep_token_id] = 2
105-
# attention_mask[input_ids == self.keep_token_id] = 2
106-
# attention_mask[input_ids == self.add_token_id] = 2
107-
# attention_mask[input_ids == self.sub_token_id] = 2
106+
attention_mask[input_ids == self.keep_token_id] = 2
107+
attention_mask[input_ids == self.add_token_id] = 2
108+
attention_mask[input_ids == self.sub_token_id] = 2
108109

109110
if self.args.attention_mode == "sliding_chunks":
110111
half_padding_mod = self.model.config.attention_window[0]

0 commit comments

Comments
 (0)