728x90
반응형
import transformers
from transformers import AutoTokenizer
# 모델 초기화
model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Create pipeline
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
tokenizer=tokenizer,
device_map="auto"
)
# 문제 불러오기
english_1 = """
빈칸에 알맞는 것을 고르시오.
The founding population of our direct ancestors is not
thought to have been much larger than 2,000 individuals;
some think the group was as small as a few hundred. How,
then, did we go from such a fragile minority population to a
tide of humanity 7 billion strong and growing? There is only
one way, according to Richard Potts. You give up on
___________. You don't try to beat back the changes. You
begin to care about consistency within a given habitate,
because such consistency isn't an option. You adapt to
variation itself. It was a brilliant strategy. Instead of
learning how to survive in just one or two ecological
environments, we took on the entire globe.
1번 "stability" 2번 "morality" 3번 "fairness" 4번 "reputation" 5번 "challenges"
"""
english_2 = """대화의 빈 칸에 알맞은 것을 고르세요.
A : Happy birthday!
B : Oh, ____________
1번 "sorry" 2번 "thank you" 3번 "June first"
"""
english_3 = """주어진 단어에 포함되는 단어를 모두 고르시오.
주어진 단어 : "Pet"
1번 "cat" 2번 "picture" 3번 "rabbit" 4번 "toothpaste"
"""
korean_1 = """동물 친구들이 곰에게 글을 읽을 때의 바른 자세에 대하여 말하고 있습니다. 토끼는 의자를 당겨서 앉아야 한다고 하였습니다. 기린은 허리를 곧게 펴야 한다고 하였습니다. 생쥐는 책과 눈의 거리를 알맞게 해야 한다고 하였습니다.
1. 토끼가 곰에게 해 준 말은 무엇입니까?
1번 "허리를 곧게 펴야 한다." 2번 "의자를 당겨서 앉아야 한다." 3번 "다리를 쭉 펴고 읽어야 한다." 4번 "책상 위에 엎드려 읽어야 한다." 5번 "책과 팔의 거리를 알맞게 해야 한다."
"""
korean_2 = """다음 중 자음자를 고르세요.
1번 "ㅏ" 2번 "ㅣ" 3번 "ㅁ" 4번 "ㅜ" 5번 "ㅛ"
"""
korean_3 = """나무 나무 무슨 나무
가자 가자 감나무
배가 아파 배나무
바람 솔솔 _______
빈칸에 들어갈 말로 가장 알맞는 것은 무엇입니까?
1번 "소나무" 2번 "감나무" 3번 "배나무" 4번 "사과나무" 5번 "포도나무"
"""
프롬프트 동작 시키기.
message = [
{"role": "system", "content": "You are a helpful assistant chatbot."},
{"role": "user", "content": english_1 + "Speak as a teacher speaks to a student. Please tell me the correct answer to the problem and provide an appropriate explanation. Please speak Korean."}
]
prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False)
import time
start = time.time()
# Generate text
sequences = pipeline(
prompt,
do_sample=True,
temperature=0.7,
top_p=0.9,
num_return_sequences=1,
max_length=2000,
)
print(sequences[0]['generated_text'])
end = time.time()
print(f"{end - start:.5f} sec")
Fine Tuning
from unsloth import FastLanguageModel, PatchFastRL
PatchFastRL("GRPO", FastLanguageModel)
from unsloth import is_bfloat16_supported
import torch
max_seq_length = 512 # Can increase for longer reasoning traces
lora_rank = 16 # Larger rank = smarter, but slower
model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit"
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = model_id,
max_seq_length = max_seq_length,
load_in_4bit = True, # False for LoRA 16bit
fast_inference = True, # Enable vLLM fast inference
max_lora_rank = lora_rank,
gpu_memory_utilization = 0.7, # Reduce if out of memory
)
model = FastLanguageModel.get_peft_model(
model,
r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 16,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
tokenizer,
chat_template = "llama-3.1",
)
def formatting_prompts_func(examples):
convos = examples["conversations"]
texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
return { "text" : texts, }
pass
from datasets import load_dataset
dataset = load_dataset("mlabonne/FineTome-100k", split = "train")
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_prompts_func, batched = True,)
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
dataset_num_proc = 2,
packing = False, # Can make training 5x faster for short sequences.
args = TrainingArguments(
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
warmup_steps = 5,
# num_train_epochs = 1, # Set this for 1 full training run.
max_steps = 60,
learning_rate = 2e-4,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
report_to = "none", # Use this for WandB etc
),
)
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
trainer,
instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]])
import time
start = time.time()
trainer_stats = trainer.train()
end = time.time()
print(f"{end - start:.5f} sec")
반응형
'데이터분석 > Pytorch' 카테고리의 다른 글
tensor 생략없이 (0) | 2024.05.03 |
---|---|
모델 학습 GPU 병행처리(nn.DataParallel) (0) | 2024.03.13 |
[Pytorch] 파이토치로 GPU 사용 (0) | 2022.11.28 |
[Pytorch] Tensor CPU, GPU 변경 (0) | 2022.11.28 |
[Pytorch] Tensor(텐서) 오류 해결 모음 (0) | 2022.11.28 |
댓글