moelanoby
/

Kok-GPT

@@ -14,16 +14,16 @@ and to use it with text generation as a base model :3 (not recommended 3: needs
 from transformers import AutoTokenizer
 import torch
-# Load tokenizer (replace with your actual tokenizer)
 tokenizer = AutoTokenizer.from_pretrained("moelanoby/Kok-GPT")
-# Load your custom model (ensure trust_remote_code=True)
 model = BucketMemoryModel.from_pretrained(
     "moelanoby/Kok-GPT",
     trust_remote_code=True
 )
-# Generate text
 def generate_text(prompt, max_length=50):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
@@ -32,10 +32,10 @@ def generate_text(prompt, max_length=50):
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Example usage
 prompt = "Hello"
 generated = generate_text(prompt)
-print(f"Generated: {generated}")
 ```
 either way it was trained on 10K rows on the fineweb dataset which is considered insufficient I did end up with an average loss of 2.3468 so yeah you can still finetune the model but the time I get stronger GPUs I'll just target 7B parameters or 14B and etc...

 from transformers import AutoTokenizer
 import torch
+# Load tokenizer >:D
 tokenizer = AutoTokenizer.from_pretrained("moelanoby/Kok-GPT")
+# Load mi model :3 (ensure trust_remote_code=True)
 model = BucketMemoryModel.from_pretrained(
     "moelanoby/Kok-GPT",
     trust_remote_code=True
 )
+# Generate text with this function :D
 def generate_text(prompt, max_length=50):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# change hello to anything you like :D
 prompt = "Hello"
 generated = generate_text(prompt)
+print(f"Generated text >:3: {generated}")
 ```
 either way it was trained on 10K rows on the fineweb dataset which is considered insufficient I did end up with an average loss of 2.3468 so yeah you can still finetune the model but the time I get stronger GPUs I'll just target 7B parameters or 14B and etc...