import React from "react";
import { CopyBlock, anOldHope } from "react-code-blocks";
import downloadIcon from "../../../assets/images/download.png";
import launchImg from "../../../assets/images/fined-tunned-llm/image1.webp";
import dockerImg from "../../../assets/images/fined-tunned-llm/image2.webp";
import healthImg from "../../../assets/images/fined-tunned-llm/image3.webp";
import modelAPIPageImg from "../../../assets/images/fined-tunned-llm/image4.webp";
import rollbackImg from "../../../assets/images/fined-tunned-llm/image5.webp";
import customRuntimeImg from "../../../assets/images/fined-tunned-llm/image6.webp";
import createPageImg from "../../../assets/images/fined-tunned-llm/image7.webp";
import Navigation from "../../Navigation";
import { scrollToId } from "../../../utils/reusableFunctions";

function FineTunnedLlamaModel() {
    const codeSnippets = {
        dependencyInstall: `pip install transformers datasets huggingface_hub mlflow kagglehub`,
        modelTokenizer: `from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from datasets import Dataset
import pandas as pd
import kagglehub
from huggingface_hub import login

# Authenticate with Hugging Face API
huggingface_token = "<your_hf_token>"
login(huggingface_token)

# Define model name
model_name = "meta-llama/Llama-3.2-1B"

# Load model configuration
config = AutoConfig.from_pretrained(model_name)
config.rope_scaling = {"type": "linear", "factor": 2.0}  # Adjust rotary position embeddings

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
model = AutoModelForCausalLM.from_pretrained(model_name, config=config)
model.resize_token_embeddings(len(tokenizer))`,
        loadDataset: `# Download dataset from Kaggle
path = kagglehub.dataset_download("jpmiller/layoutlm")
df = pd.read_csv(f"{path}/medquad.csv")
df = df.sample(frac=0.1)  # Use 10% of dataset for faster training

# Convert dataset to Hugging Face Dataset format
hf_dataset = Dataset.from_pandas(df[['text']])
hf_dataset = hf_dataset.filter(lambda example: example["text"] is not None)

# Tokenization
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=256)

# Apply tokenization
tokenized_dataset = hf_dataset.map(tokenize_function, batched=True, remove_columns=["text"])`,
        model: `from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

# Define training arguments
training_args = TrainingArguments(
    output_dir="./llama-retrained",
    num_train_epochs=3,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    fp16=True,
    save_total_limit=3,
)

# Train-test split
train_test_split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset, eval_dataset = train_test_split["train"], train_test_split["test"]

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
)

trainer.train()`,
        saveUpload: `trainer.save_model("./model-retrained")
tokenizer.save_pretrained("./model-retrained")

trainer.push_to_hub("hugging-face-model-id")
tokenizer.push_to_hub("hugging-face-model-id")`,
        loadFunction: `from mlserver import MLModel
from transformers import AutoTokenizer, AutoModelForCausalLM

class CustomHuggingFaceRuntime(MLModel):
    async def load(self) -> None:
        # Retrieve model ID from MLServer settings
        model_id = self.settings.parameters.extra["model_id"]

        # Load tokenizer and model
        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.model = AutoModelForCausalLM.from_pretrained(model_id).to("cpu")

        # Indicate the model is ready
        self.ready = True`,
        predict: `from mlserver.types import InferenceRequest, InferenceResponse, ResponseOutput

class CustomHuggingFaceRuntime(MLModel):
    async def predict(self, payload: InferenceRequest) -> InferenceResponse:
        # Extract prompt from request
        prompt = payload.inputs[0].data[0]

        # Tokenize input text and move it to CPU
        inputs = self.tokenizer(str(prompt), return_tensors="pt", truncation=True).to("cpu")

        # Generate output from the model
        output = self.model.generate(inputs["input_ids"], max_new_tokens=100, num_beams=5)

        # Decode output to readable text
        response_text = self.tokenizer.decode(output[0], skip_special_tokens=True)

        # Format response for MLServer
        response_output = ResponseOutput(
            name="generated_text",
            shape=[1],
            datatype="BYTES",
            data=[response_text]
        )

        return InferenceResponse(model_name=self.name, outputs=[response_output])`,
        modelSettings: `{
    "name": "<model service name>",
    "implementation": "custom_runtime.CustomHuggingFaceRuntime",
    "parameters": {
      "extra": {
        "model_id": "hugging-face-model-id"
      }
    }
  }`,
        dockerFile: `FROM python:3.11.2
  WORKDIR /app
  
  # Copy model configuration and runtime file
  COPY model-settings.json /app/model-settings.json
  COPY custom_runtime.py /app/custom_runtime.py
  
  # Copy dependencies
  COPY model/requirements.txt /app/model/requirements.txt
  
  # Install dependencies
  RUN pip install --no-cache-dir -r /app/model/requirements.txt
  RUN pip install --no-cache-dir mlserver-mlflow mlserver
  
  # Expose the port MLServer will run on
  EXPOSE 8080
  
  # Start MLServer
  CMD ["mlserver", "start", "/app"]`,
        requirement: `  datasets==2.14.5
  tokenizers==0.14.1
  transformers==4.34.0  # Change this to be compatible with tokenizers
  trl==0.4.4
  accelerate==0.22.0
  huggingface-hub==0.17.3  # Ensure compatibility with all dependencies`,
        dockerBuild: `docker build -t <docker_username>/<image_name>:<tag> .`,
        dockerPush: `docker push <docker_username>/<image_name>:<tag>`,
        dockerRun: `docker run -it -p 8080:8080 <docker_username>/<image_name>:<tag>`,
        curl: `curl http://localhost:8080/v2/health/ready`,
        inputPayload: `{
    "inputs": [
      {
        "name": "prompt",
        "shape": [1],
        "datatype": "BYTES",
        "data": ["What causes cancer?"]
      }
    ]
  }`,
        curlPost: `curl -X POST http://localhost:8080/v2/models/{model_name}/infer \
  -H "Content-Type: application/json" \
  -d '{
    "inputs": [
      {
        "name": "prompt",
        "shape": [1],
        "datatype": "BYTES",
        "data": ["What causes cancer?"]
      }
    ]
  }'`,
        modelOutput: `{
    "model_name": <model service name>,
    "id": "<id>",
    "parameters": {},
    "outputs": [
        {
            "name": "generated_text",
            "shape": [
                1
            ],
            "datatype": "BYTES",
            "data": [
                "What causes cancer? Cigarette smoking is the most common cause of cancer. Most people with cancer are smokers or have been smokers in the past. Breathing in other fumes and dusts over long periods of time can also lead to cancer, especially if the cancer risk is inhaled. Pipe, cigar, and other types of tobacco smoking can cause cancer if they come in contact with other people who smoke and are inhale pollutants such as dust and poor air quality. Some studies have shown an association between cancer and"
            ]
        }
    ]
}`,
        mlconfig: `{
    "docker_image":"<docker_username>/<image_name>:<tag>",
    "docker_token":<docker pat>
}`,
        publish: `from vipas.model import ModelClient
from vipas.exceptions import UnauthorizedException, NotFoundException, ClientException


# Paths to MLflow config files
mlflow_config_path = "/path/to/mlflow_config.json"  # Optional, Path to the MLflow config file which contains details like the model docker image and a valid docker personal access token. Both are required.

# Unique model ID to identify the model in Vipas.AI
model_id = "your_model_id"

try:
    # Initialize the ModelClient
    model_client = ModelClient()

    # Publish the model
    model_client.publish(
        model_id=model_id,
        model_framework_type="mlflow", mlflow_config_path=mlflow_config_path,
        auto_launch=True,  
        override_model=True 
    )
except UnauthorizedException as e:
    print(f"UnauthorizedException: {e}")
except NotFoundException as e:
    print(f"NotFoundException: {e}")
except ClientException as e:
    print(f"ClientException: {e}")
except Exception as e:
    print(f"Exception: {e}")`,
        dockerImage: `<docker_username>/<image_name>:<tag>`,
        input: `{
    "inputs": [
      {
        "name": "prompt",
        "shape": [1],
        "datatype": "BYTES",
        "data": ["How to treat headaches?"]
      }
    ]
  }`,
        sdkPredict: `from vipas import model
from vipas.exceptions import ClientException
import os 
try: 
    os.environ["VPS_AUTH_TOKEN"] = "<vps_auth_token>"
    model_client = model.ModelClient()
    request_body = {
        "inputs": [
            {
                "name": "prompt",
                "shape": [1],
                "datatype": "BYTES",
                "data": ["How to treat headaches?"]
            }
        ]
    }
    api_response = model_client.predict(model_id="<model_id>", input_data=request_body)
    print("Model output", api_response)
except ClientException as e:
    print(e)`,
    };
    return (
        <>
            <div className="flex my-8 relative break-words">
                <div className="w-[100%] xl:w-[66.66%] px-[24px]">
                    <h1 className="mb-3 heading">
                        Deploying a Fine-Tuned LLaMA Model on
                        Vipas.AI
                    </h1>
                    <div className="border rounded-[5px] w-[250px] p-2 shadow-md my-2 hover:bg-gray-100">
                        <a
                            href="https://utils.vipas.ai/vps-ipynb/ml_flow_llama_health/disease_dataset_1b.zip"
                            target="_blank"
                            className="ml-3 flex items-center"
                        >
                            <img
                                src={downloadIcon}
                                width={32}
                                height={32}
                                alt="text-summarization-download"
                            />{" "}
                            <span className="ml-2">Download Notebook</span>
                        </a>
                    </div>
                    <div className="my-6" id="introduction"> 
                        <h2 className="sub-heading">Introduction</h2>

                        <p className="my-2">
                            Large Language Models (LLMs) have become a
                            cornerstone of modern AI applications, powering
                            various natural language processing (NLP) tasks,
                            including text generation, summarization, and
                            question answering. Meta's LLaMA (Large Language
                            Model Meta AI) is one of the leading LLM
                            architectures, designed for efficient inference and
                            fine-tuning.
                        </p>
                        <p>
                            This guide provides a comprehensive, step-by-step
                            technical approach to fine-tuning the Meta LLaMA
                            model (Llama-3.2-1B) using a domain-specific dataset
                            and deploying it on{" "}
                            <a
                                href="http://vipas.ai"
                                target="_blank"
                                className="links !text-[16px]"
                            >
                                Vipas.AI
                            </a>
                            . The workflow covers dataset preparation, model
                            fine-tuning, MLflow integration, Docker
                            containerization, and final deployment &
                            monetization on the{" "}
                            <a
                                href="http://vipas.ai"
                                target="_blank"
                                className="links !text-[16px]"
                            >
                                Vipas.AI
                            </a>{" "}
                            platform.
                        </p>
                    </div>

                    <div className="my-6" id="monetize">
                        <h2 className="sub-heading">
                            Monetize Your AI Expertise & Get Discovered by Top
                            Recruiters on Vipas.AI
                        </h2>
                        <p className="my-2">
                            The AI landscape is advancing at a rapid pace, and
                            domain-specific AI models are quickly becoming the
                            next frontier in AI innovation. As an AI creator,
                            the potential to create real-world impact in
                            specialized fields—like healthcare, finance, and
                            agriculture—has never been greater. However, the
                            traditional route to turning AI expertise into
                            revenue can be daunting. Building infrastructure,
                            securing GPUs, navigating complex cloud setups, and
                            dealing with the nuances of monetizing AI work can
                            take months, if not years.
                        </p>
                        <p className="font-semibold">
                            That’s where <strong>Vipas.AI</strong> steps in,
                            giving you the opportunity to skip the technical
                            heavy-lifting and go straight to monetizing your AI
                            models. By publishing your fine-tuned
                            domain-specific LLaMA models on Vipas.AI, you can
                            turn your AI knowledge into a tangible,
                            revenue-generating asset—without the headache of
                            managing infrastructure.
                        </p>

                        <ul className="list-inside list-disc my-2">
                            <li>
                                <strong>Get Paid for Every API Call</strong>:
                                With Vipas.AI, you don’t need to worry about
                                setting up your own cloud infrastructure or
                                managing resources. You simply upload your AI
                                model, set your pricing, and get paid every time
                                someone uses it via an API call. It’s that
                                simple—no upfront costs, no ongoing maintenance.
                                Just your AI expertise earning revenue.
                            </li>
                            <li>
                                <strong>
                                    Attract Industry Leaders & Recruiters
                                </strong>
                                : Your published AI model is much more than just
                                code—it’s a <strong>live portfolio</strong>.
                                When enterprises see your model solving
                                specific, real-world challenges, they’ll
                                recognize your capabilities and come knocking
                                with job offers or collaboration opportunities,
                                without you needing to submit a traditional
                                resume. Imagine showcasing your work in a way
                                that leads directly to high-value job prospects
                                and career advancement.
                            </li>
                            <li>
                                <strong>
                                    First-Mover Advantage in AI Monetization
                                </strong>{" "}
                                – While most AI platforms focus on hosting code
                                or providing model training environments,{" "}
                                <strong>Vipas.AI</strong> goes a step further by
                                enabling creators to publish live, runnable
                                models. This gives you an{" "}
                                <strong>early advantage</strong> in the rapidly
                                expanding AI-as-a-Service marketplace. By being
                                one of the first to monetize your specialized AI
                                models, you position yourself at the forefront
                                of a revolutionary shift in how AI will be
                                consumed across industries.
                            </li>
                        </ul>
                        <p>
                            With the powerful combination of AI expertise and
                            Vipas.AI’s platform, you not only gain access to a
                            new revenue stream but also ensure that your work is
                            recognized in a way that traditional methods can’t
                            match. Whether you’re looking to monetize your
                            health-focused AI model or expand into another
                            industry, Vipas.AI makes it easy to get started—and
                            start earning—today.
                        </p>
                        <p>
                            This guide will walk you step-by-step through how to
                            fine-tune and deploy your own{" "}
                            <strong>LLaMA model</strong> on Vipas.AI. By the
                            end, you’ll not only have an optimized model ready
                            for real-world applications, but also a valuable,
                            income-generating asset in the growing field of
                            domain-specific AI. Let’s get started! 🚀
                        </p>
                    </div>

                    <div className="my-6" id="prerequisites">
                        <h2 className="sub-heading">Prerequisites</h2>
                        <p>
                            Before proceeding, ensure that the following
                            dependencies and configurations are in place:
                        </p>
                        <ul className="my-2 list-inside list-disc">
                            <li>
                                <strong>Python Environment</strong>: Python
                                (&gt;=3.11)
                            </li>
                            <li>
                                <strong>Libraries</strong>:{" "}
                                <code>
                                    transformers, datasets, huggingface_hub,
                                    mlserver, mlflow, kagglehub
                                </code>
                            </li>
                            <li>
                                <strong>Infrastructure</strong>: Docker
                                installed and running
                            </li>
                            <li>
                                <strong>Accounts</strong>: Active Vipas.AI
                                account and access to Hugging Face and Kaggle
                            </li>
                        </ul>
                    </div>
                    <div className="my-6" id="layoutlm-dataset">
                        <h2 className="sub-heading">
                            Fine-Tuning LLaMA on JPMiller's LayoutLM Dataset
                        </h2>
                        <p>
                            Fine-tuning adapts a pre-trained LLM to a
                            domain-specific task by further training it on
                            curated datasets. In this example, we utilize the
                            Llama-3.2–1B model and fine-tune it on JPMiller’s
                            LayoutLM dataset, which contains structured medical
                            text data. Checkout the deployed model on Vipas.AI{" "}
                            <a
                                href="http://vipas.ai/models/mdl-b1mxve8nrq9cj"
                                target="_blank"
                                className="links !text-[16px]"
                            >
                                here
                            </a>{" "}
                            and the files used in the model training and upload
                            can be downloaded{" "}
                            <a
                                href="https://utils.vipas.ai/vps-ipynb/ml_flow_llama_health/disease_dataset_1b.zip"
                                target="_blank"
                                className="links !text-[16px]"
                            >
                                here
                            </a>
                            .
                        </p>
                        <img
                            src={healthImg}
                            loading="lazy"
                            className="my-6 tw-w-full tw-h-auto tw-object-contain"
                            alt="health-img"
                        />

                        <p className="font-semibold">
                            Step 1: Setting Up the Environment
                        </p>
                        <p>
                            Before starting, install the required Python
                            packages:
                        </p>
                        <div className="w-full my-4">
                            <CopyBlock
                                text={codeSnippets.dependencyInstall}
                                language="python"
                                showLineNumbers={false}
                                theme={anOldHope}
                                wrapLines={true}
                                codeBlock
                            />
                        </div>
                        <p>
                            This ensures access to pre-trained models, dataset
                            handling, and MLflow logging for experiment
                            tracking.
                        </p>
                        <p className="font-semibold">
                            Step 2: Load Model and Tokenizer
                        </p>
                        <p>
                            The LLaMA model is loaded from Hugging Face's model
                            hub. To prevent out-of-vocabulary issues, we add
                            padding tokens when needed.
                        </p>
                        <div className="w-full my-4">
                            <CopyBlock
                                text={codeSnippets.modelTokenizer}
                                language="python"
                                showLineNumbers={false}
                                theme={anOldHope}
                                wrapLines={true}
                                codeBlock
                            />
                        </div>
                        <p className="my-2">
                            Explanation:
                            <ul className="list-disc list-inside">
                                <li>
                                    <strong>
                                        Rotary Position Embeddings (RoPE)
                                        Scaling
                                    </strong>
                                    : This modifies the model's ability to
                                    process longer sequences efficiently.
                                </li>
                                <li>
                                    <strong>Tokenizer Handling</strong>: Ensures
                                    padding token existence to avoid mismatches
                                    during training.
                                </li>
                                <li>
                                    <strong>Token Embeddings Resize</strong>:
                                    Adapts the model’s token embeddings after
                                    adding special tokens.
                                </li>
                            </ul>
                        </p>
                        <p className="font-semibold">
                            Step 3: Load and Prepare Dataset
                        </p>
                        <p>
                            We download and preprocess{" "}
                            <a
                                href="https://www.kaggle.com/datasets/jpmiller/layoutlm"
                                className="links !text-[16px]"
                                target="_blank"
                            >
                                JPMiller's LayoutLM
                            </a>{" "}
                            dataset using Kaggle API.
                        </p>
                        <div className="w-full my-4">
                            <CopyBlock
                                text={codeSnippets.loadDataset}
                                language="python"
                                showLineNumbers={false}
                                theme={anOldHope}
                                wrapLines={true}
                                codeBlock
                            />
                        </div>
                        <p className="my-2">Explanation:</p>
                        <ul className="my-2 list-inside list-disc">
                            <li>
                                <strong>Data Sampling</strong>: Reduces dataset
                                size to 10% for efficient experimentation.
                            </li>
                            <li>
                                <strong>Filtering Null Texts</strong>: Ensures
                                clean data input to prevent processing errors.
                            </li>
                            <li>
                                <strong>Tokenization</strong>: Converts raw text
                                into numerical representations with padding and
                                truncation to <code>max_length=256</code>.
                            </li>
                        </ul>
                        <p className="font-semibold">
                            Step 4: Fine-Tune the Model
                        </p>
                        <p>
                            We use the Hugging Face{" "}
                            <a
                                href="https://huggingface.co/docs/transformers/en/main_classes/trainer"
                                target="_blank"
                                className="links !text-[16px]"
                            >
                                Trainer
                            </a>{" "}
                            API to fine-tune the LLaMA model.
                        </p>
                        <div className="w-full my-4">
                            <CopyBlock
                                text={codeSnippets.model}
                                language="python"
                                showLineNumbers={false}
                                theme={anOldHope}
                                wrapLines={true}
                                codeBlock
                            />
                        </div>
                        <p className="font-semibold">Explanation:</p>
                        <ul className="list-disc list-inside my-2">
                            <li>
                                <strong>Gradient Accumulation</strong>:
                                Compensates for small batch sizes by
                                accumulating gradients over multiple steps.
                            </li>
                            <li>
                                <strong>FP16 Precision</strong>: Optimizes
                                training speed and reduces memory usage using
                                half-precision floating-point calculations.
                            </li>
                            <li>
                                <strong>Train-Test Split</strong>: Reserves 10%
                                of the dataset for validation.
                            </li>
                        </ul>
                        <p className="font-semibold">
                            Step 5: Save and Upload Model
                        </p>
                        <p>
                            The trained model is saved locally and pushed to
                            Hugging Face for easy access.
                        </p>
                        <div className="w-full my-4">
                            <CopyBlock
                                text={codeSnippets.saveUpload}
                                language="python"
                                showLineNumbers={false}
                                theme={anOldHope}
                                wrapLines={true}
                                codeBlock
                            />
                        </div>
                        <p>Explanation:</p>
                        <ul className="my-2 list-disc list-inside">
                            <li>
                                <strong>Model Upload</strong>: Enables seamless
                                integration with Vipas.AI by hosting the
                                fine-tuned model on Hugging Face Hub.
                            </li>
                        </ul>
                        <p className="mini-heading">Next Steps:</p>
                        <ul className="list-inside list-disc my-2">
                            <li>
                                {" "}
                                <strong>Containerize the model</strong> using
                                MLServer for inference.
                            </li>
                            <li>
                                <strong>Deploy on Vipas.AI</strong> to enable
                                real-time AI applications.
                            </li>
                        </ul>
                        <p>
                            By following these steps, researchers and developers
                            can fine-tune and deploy robust LLaMA-based NLP
                            models for domain-specific tasks efficiently.
                        </p>
                        <h2 className="sub-heading my-2" id="deploying-model-on-vipas">
                            Deploying the model on Vipas.AI
                        </h2>
                        <p>
                            Deploying large language models (LLMs) efficiently
                            is critical for production use cases. This guide
                            walks through deploying a fine-tuned LLaMA model
                            using MLServer, MLflow, and Docker on Vipas.AI. By
                            leveraging these technologies, you can scale AI
                            applications efficiently and ensure high
                            reliability.
                        </p>
                    </div>

                    <div className="my-6" id="tech-stack">
                        <h2 className="sub-heading">
                            Understanding the technology Stack
                        </h2>

                        <ul className="list-inside list-disc my-2">
                            <li>
                                <strong>MLflow</strong> for LLM
                                <p>
                                    <a
                                        href="https://mlflow.org/"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    ></a>
                                    MLflow is a robust platform for managing the
                                    ML lifecycle, including tracking
                                    experiments, model packaging, and
                                    deployment.
                                </p>
                            </li>
                            <li>
                                <strong>MLServer</strong>
                                <p>
                                    <a
                                        href="https://mlserver.readthedocs.io/en/latest/getting-started/index.html"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    >
                                        MLServer
                                    </a>{" "}
                                    is an inference server optimized for
                                    high-performance model serving, supporting:
                                </p>
                                <ul className="list-inside list-disc my-2">
                                    <li>Multi-model inference</li>
                                    <li>Adaptive batching</li>
                                    <li>Kubernetes integration</li>
                                </ul>
                            </li>
                            <li>
                                <strong>Vipas.AI</strong>
                                <p>
                                    <a
                                        href="http://Vipas.AI"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    >
                                        Vipas.AI
                                    </a>{" "}
                                    is an AI deployment and management platform
                                    that allows users to:
                                    <ul className="list-inside list-disc my-2">
                                        <li>Launch and manage AI models</li>
                                        <li>
                                            Scale deployments with autoscaling
                                        </li>
                                        <li>Monetize AI applications</li>
                                    </ul>
                                </p>
                            </li>
                        </ul>
                    </div>
                    <div className="my-6" id="creating-model">
                        <h2 className="sub-heading">
                            Creating a Model on Vipas.AI
                        </h2>
                        <p>
                            To create a model on Vipas.AI, navigate to the{" "}
                            <a
                                href="https://vipas.ai/project/create/main"
                                className="links !text-[16px]"
                                target="_blank"
                            >
                                Create Model Page
                            </a>
                            . Provide the required details such as name,
                            description, category, and permissions. Once
                            completed, click <strong>Next</strong> to proceed.
                        </p>
                        <img
                            src={createPageImg}
                            loading="lazy"
                            className="my-6 tw-w-full tw-h-auto tw-object-contain"
                            alt="create-project"
                        />

                        <p>
                            In the <strong>Custom Runtime</strong> tab, download
                            the pre-configured <strong>Dockerfile</strong> and{" "}
                            <strong>model-settings.json</strong> files, which
                            will serve as the foundation for your custom runtime
                            deployment. For more information, refer to the{" "}
                            <a
                                href="https://docs.vipas.ai/developer-docs/mlflow-model"
                                target="_blank"
                                className="links !text-[16px]"
                            >
                                MLflow Model Documentation
                            </a>
                            ,{" "}
                            <a
                                href="https://docs.vipas.ai/developer-docs/steps-to-create-a-model"
                                className="links !text-[16px]"
                                target="_blank"
                            >
                                Creating a model
                            </a>
                            .
                        </p>
                        <img
                            src={customRuntimeImg}
                            loading="lazy"
                            className="my-6 tw-w-full tw-h-auto tw-object-contain"
                            alt="custom-runtime"
                        />
                    </div>
                    <div className="my-6" id="custom-runtime">
                        <h2 className="sub-heading">
                            Preparing the Model for Custom Runtime
                        </h2>
                        <div className="my-2">
                            <h3 className="mini-heading">
                                Defining a Custom MLServer Runtime
                            </h3>
                            <p>
                                In this section, we define a custom inference
                                runtime for serving the fine-tuned LLaMA model
                                using MLServer. To achieve this, we override the{" "}
                                <code>load</code> and <code>predict</code>{" "}
                                functions in the{" "}
                                <code>CustomHuggingFaceRuntime</code> class,
                                which extends <code>MLModel</code>.
                            </p>
                            <p>
                                This approach ensures that MLServer can properly{" "}
                                <strong>load the model</strong> and{" "}
                                <strong>handle inference requests</strong>,
                                making it suitable for deployment.
                            </p>
                            <p>
                                To learn more about overriding custom runtime
                                methods visit{" "}
                                <a
                                    href="https://docs.vipas.ai/developer-docs/mlflow-model"
                                    target="_blank"
                                    className="links !text-[16px]"
                                >
                                    MLflow Model Documentation
                                </a>
                                ,{" "}
                                <a
                                    href="https://mlserver.readthedocs.io/en/latest/user-guide/custom.html"
                                    className="links !text-[16px]"
                                    target="_blank"
                                >
                                    MLserver Docs on custom inference runtimes
                                </a>
                                .
                            </p>
                            <h4 className="mini-heading">
                                Overriding the <code>load</code> Function
                            </h4>
                            <p className="my-2">
                                The <code>load</code> function is responsible
                                for loading the tokenizer and model when the
                                server starts. Here’s what happens inside{" "}
                                <code>load</code>:
                            </p>
                            <p className="font-semibold">Steps:</p>
                            <ol className="list-decimal list-inside ml-2 my-2">
                                <li>
                                    The model ID is retrieved from{" "}
                                    <code>
                                        self.settings.parameters.extra["model_id"]
                                    </code>
                                    .
                                </li>
                                <li>
                                    The <strong>tokenizer</strong> is loaded
                                    using{" "}
                                    <code>
                                        AutoTokenizer.from_pretrained(model_id)
                                    </code>
                                    .
                                </li>
                                <li>
                                    The fine-tuned <strong>LLaMA model</strong>{" "}
                                    is loaded using{" "}
                                    <code>
                                        AutoModelForCausalLM.from_pretrained(model_id)
                                    </code>
                                    , and it is moved to the CPU.
                                </li>
                                <li>
                                    The <code>self.ready = True</code> flag is
                                    set to indicate that the model is
                                    successfully loaded and ready for inference.
                                    <div className="w-full my-4">
                                        <CopyBlock
                                            text={codeSnippets.loadFunction}
                                            language="python"
                                            showLineNumbers={false}
                                            theme={anOldHope}
                                            wrapLines={true}
                                            codeBlock
                                        />
                                    </div>
                                </li>
                            </ol>
                            <p>
                                By overriding <code>load</code>, we ensure that
                                the model and tokenizer are{" "}
                                <strong>properly initialized</strong> before
                                handling any inference requests.
                            </p>

                            <h4 className="mini-heading">
                                Overriding the <code>predict</code> Function
                            </h4>
                            <p className="my-2">
                                The <code>predict</code> function{" "}
                                <strong>
                                    processes incoming inference requests
                                </strong>{" "}
                                and generates responses.
                            </p>
                            <p className="font-semibold">Steps:</p>
                            <ol className="list-decimal list-inside ml-2 my-2">
                                <li>
                                    The function receives an{" "}
                                    <code>InferenceRequest</code> object
                                    containing input data.
                                </li>
                                <li>
                                    The <strong>prompt</strong> text is
                                    extracted from{" "}
                                    <code>payload.inputs[0].data[0]</code>.
                                </li>
                                <li>
                                    The extracted text is{" "}
                                    <strong>tokenized</strong> into tensors for
                                    processing.
                                </li>
                                <li>
                                    The <strong>model generates output</strong>{" "}
                                    using <code>self.model.generate()</code>,
                                    specifying:
                                    <ul className="list-inside list-disc">
                                        <li>
                                            <code>max_new_tokens=100</code> to
                                            limit response length.
                                        </li>
                                        <li>
                                            <code>num_beams=5</code> for
                                            enhanced response quality.
                                        </li>
                                    </ul>
                                </li>
                                <li>
                                    The generated output is{" "}
                                    <strong>decoded</strong> back into
                                    human-readable text.
                                </li>
                                <li>
                                    The response is <strong>formatted</strong>{" "}
                                    as an <code>InferenceResponse</code> object,
                                    ensuring MLServer compatibility.
                                </li>
                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.predict}
                                        language="python"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                            </ol>
                            <p>
                                By overriding <code>load</code>, we ensure that
                                the model and tokenizer are{" "}
                                <strong>properly initialized</strong> before
                                handling any inference requests.
                            </p>
                            <p>
                                By overriding <code>predict</code>, we ensure
                                that <strong>model inference</strong> is
                                efficiently handled, returning structured
                                responses in MLServer’s expected format.
                            </p>
                        </div>
                    </div>
                    <div className="my-6" id="configure-deployment">
                        <h3 className="sub-heading">
                            Configuring the Deployment
                        </h3>
                        <p>
                            To deploy the fine-tuned LLaMA model using{" "}
                            <strong>MLServer</strong>, we need to configure the{" "}
                            <strong>
                                model settings, Dockerfile, and dependencies
                            </strong>{" "}
                            before containerizing and deploying the model.
                        </p>
                        <ol className="my-2 list-decimal list-inside">
                            <li>
                                <span className="mini-heading">
                                    Defining <code>model-settings.json</code>
                                </span>
                                <p>
                                    The <code>model-settings.json</code> file
                                    defines how <strong>MLServer</strong> loads
                                    and configures the model. This file is
                                    crucial for specifying the{" "}
                                    <strong>
                                        model service name, runtime
                                        implementation, and model parameters
                                    </strong>
                                    .
                                </p>
                                <p className="font-semibold">
                                    Structure of{" "}
                                    <code>model-settings.json</code>
                                </p>
                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.modelSettings}
                                        language="python"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                                <p className="font-semibold">Explanation:</p>
                                <ul className="list-inside list-disc my-2">
                                    <li>
                                        <code>name →</code> The model service
                                        name (Given by Vipas.AI).
                                    </li>
                                    <li>
                                        <code>implementation →</code> Defines
                                        the custom runtime class
                                        (CustomHuggingFaceRuntime), located in{" "}
                                        <strong>custom_runtime.py</strong>.
                                    </li>
                                    <li>
                                        <code>parameters.extra.model_id →</code>{" "}
                                        Specifies the{" "}
                                        <strong>
                                            Hugging Face model repository
                                        </strong>{" "}
                                        or <strong>local model path</strong>{" "}
                                        from where the model will be loaded.
                                    </li>
                                </ul>
                                <p>
                                    This configuration ensures that MLServer
                                    correctly initializes and serves the
                                    fine-tuned LLaMA model.
                                </p>
                            </li>
                            <li>
                                <span className="mini-heading my-2">
                                    Creating the Dockerfile
                                </span>
                                <p className="">
                                    The <strong>Dockerfile</strong> is
                                    responsible for creating a{" "}
                                    <strong>self-contained</strong>,{" "}
                                    <strong>reproducible environment</strong>{" "}
                                    for running the model inside a container.
                                </p>
                                <p className="font-semibold">Dockerfile</p>
                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.dockerFile}
                                        language="python"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                            </li>

                            <li>
                                <span className="mini-heading my-2">
                                    Defining <code>requirements.txt</code>{" "}
                                    (Model Dependencies){" "}
                                </span>
                                <p className="">
                                    The <code>requirements.txt</code> file
                                    specifies all required Python libraries to
                                    run the model:
                                </p>
                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.requirement}
                                        language="text"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                                <p>
                                    By installing these dependencies, we ensure
                                    the model runs{" "}
                                    <strong>
                                        smoothly inside the container
                                    </strong>
                                    .
                                </p>
                            </li>
                            <li>
                                <span className="mini-heading my-2">
                                    Building and Pushing the Docker Image
                                </span>
                                <p>
                                    After defining the <code>Dockerfile</code>,{" "}
                                    <code>model-settings.json</code> and{" "}
                                    <code>requirements.txt</code>, we need to
                                    build the <strong>Docker image</strong> and
                                    push it to a{" "}
                                    <strong>Docker registry</strong>.
                                </p>
                                <p className="font-semibold">
                                    Building the Docker Image
                                </p>
                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.dockerBuild}
                                        language="text"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                                <p className="font-semibold">
                                    Pushing the Docker Image to a Registry
                                </p>

                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.dockerPush}
                                        language="text"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                                <p>
                                    By following these steps, the model is
                                    packaged into a production-ready Docker
                                    container that can be deployed on Vipas.AI.
                                    For additional guidance, visit the{" "}
                                    <a
                                        href="https://docs.vipas.ai/developer-docs/mlflow-model"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    >
                                        Vipas.AI MLflow Documentation
                                    </a>
                                    .
                                </p>
                            </li>
                        </ol>
                    </div>

                    <div className="my-6" id="docker-container">
                        <h2 className="sub-heading">
                            Testing the Docker Container
                        </h2>
                        <ol className="list-decimal list-inside my-2">
                            <li>
                                <span className="my-2 mini-heading">
                                    Running the Docker Container
                                </span>
                                <p>
                                    After building the Docker image, testing the
                                    container interactively ensures that the
                                    model loads correctly and the environment is
                                    set up properly. Use the following command
                                    to run the Docker container interactively:
                                </p>
                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.dockerRun}
                                        language="python"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                                <p className="font-semibold">
                                    Explanation of the Command:
                                </p>
                                <ul className="list-disc list-inside my-2">
                                    <li>
                                        <code>-it</code>: Run the container in
                                        interactive mode, allowing you to see
                                        logs and interact with the process.
                                    </li>
                                    <li>
                                        <code>-p 8080:8080</code>: Maps the
                                        default MLServer port inside the
                                        container to your local machine,
                                        enabling external API calls.
                                    </li>
                                </ul>

                                <p>
                                    When the container starts, MLServer will
                                    initialize and load the fine-tuned LLaMA
                                    model. You should see logs indicating that
                                    the model and tokenizer are being loaded
                                    from Hugging Face. If there are any issues
                                    during initialization, they will be
                                    displayed in the terminal for debugging.
                                </p>
                            </li>
                            <li>
                                <span className="my-2 mini-heading">
                                    Making a Prediction
                                </span>
                                <p>
                                    Once the container is running and the model
                                    is loaded, you can test predictions by
                                    making an API call to the MLServer endpoint.
                                    Follow these steps:
                                </p>
                                <ol className="my-2 list-decimal list-inside ml-2">
                                    <li>
                                        <span className="font-semibold">
                                            Verify the Endpoint is Running:
                                        </span>
                                        <p>
                                            Use a tool like <code>curl</code> or
                                            Postman to confirm the server is
                                            live:
                                        </p>
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.curl}
                                                language="shell"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                        <p>
                                            A successful response will indicate
                                            that the server is ready to accept
                                            requests.
                                        </p>
                                    </li>
                                    <li>
                                        <span className="font-semibold">
                                            Prepare the Input Payload:
                                        </span>
                                        <p>
                                            Create a JSON payload to send a
                                            request to the model. The payload
                                            should follow the KFServing V2
                                            inference protocol:
                                        </p>
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.inputPayload}
                                                language="json"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                    </li>
                                    <li>
                                        <span className="font-semibold">
                                            Make an Inference Request:
                                        </span>
                                        <p>
                                            Use <code>curl</code> to send a POST
                                            request to the{" "}
                                            <code>
                                                /v2/models/{`{model_name}`}
                                                /infer
                                            </code>{" "}
                                            endpoint:
                                        </p>
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.curlPost}
                                                language="json"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                        <p>
                                            Replace{" "}
                                            <code>{"{model_name}"}</code> with
                                            the name of your model as defined in{" "}
                                            <code>model-settings.json</code>.
                                        </p>
                                        <p className="font-semibold my-2">
                                            Inspect the Response:
                                        </p>
                                        <p>
                                            The response will contain the
                                            model's prediction. For example:
                                        </p>
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.modelOutput}
                                                language="json"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                    </li>
                                </ol>
                            </li>
                        </ol>
                    </div>

                    <div className="my-6" id="hosting-model">
                        <h2 className="sub-heading">
                            Hosting the Model on Vipas.AI
                        </h2>
                        <div className="">
                            <h3 className="mini-heading">
                                Publishing the Model via Vipas.AI SDK
                            </h3>
                            <p>MLflow config json file content</p>
                            <div className="w-full my-4">
                                <CopyBlock
                                    text={codeSnippets.mlconfig}
                                    language="json"
                                    showLineNumbers={false}
                                    theme={anOldHope}
                                    wrapLines={true}
                                    codeBlock
                                />
                            </div>
                            <p>
                                You can publish the model using Vipas.AI SDK{" "}
                                <a
                                    href="https://docs.vipas.ai/developer-docs/sdk-publish"
                                    target="_blank"
                                    className="links !text-[16px]"
                                >
                                    publish
                                </a>{" "}
                                method.
                            </p>
                            <div className="w-full my-4">
                                <CopyBlock
                                    text={codeSnippets.publish}
                                    language="python"
                                    showLineNumbers={false}
                                    theme={anOldHope}
                                    wrapLines={true}
                                    codeBlock
                                />
                            </div>
                        </div>
                        <div className="my-6">
                            <h3 className="mini-heading">
                                Deploying via Vipas.AI UI
                            </h3>
                            <p>
                                After building and pushing the Docker image for
                                the fine-tuned LLaMA model to Docker Hub, follow
                                these steps to deploy it on Vipas.AI:
                            </p>
                            <ol className="list-decimal list-inside my-2">
                                <li>
                                    <span className="font-semibold">
                                        Provide the Docker Image:
                                    </span>
                                    <p>
                                        Enter the <strong>Docker image</strong>{" "}
                                        in the following format:
                                    </p>
                                    <div className="w-full my-4">
                                        <CopyBlock
                                            text={codeSnippets.dockerImage}
                                            language="python"
                                            showLineNumbers={false}
                                            theme={anOldHope}
                                            wrapLines={true}
                                            codeBlock
                                        />
                                    </div>
                                    <p>
                                        This image should include everything
                                        required to serve the fine-tuned LLaMA
                                        model, such as dependencies and runtime
                                        specifications.
                                    </p>
                                </li>
                                <li>
                                    <span className="font-semibold">
                                        Enter Docker Personal Access Token
                                        (PAT):
                                    </span>
                                    <p>
                                        Along with the Docker image, provide
                                        your{" "}
                                        <strong>
                                            Docker Personal Access Token (PAT)
                                        </strong>{" "}
                                        to authenticate the pull operation.
                                    </p>
                                    <a
                                        href="https://docs.docker.com/security/for-developers/access-tokens/"
                                        className="links !text-[16px]"
                                        target="_blank"
                                    >
                                        Learn more about generating a Docker
                                        PAT.
                                    </a>
                                    <img
                                        src={dockerImg}
                                        loading="lazy"
                                        className="my-6 tw-w-full tw-h-auto tw-object-contain"
                                        alt="model-docker"
                                    />
                                </li>
                                <li>
                                    <span className="font-semibold">
                                        Proceed to Staging:
                                    </span>
                                    <p>
                                        After entering the required details,
                                        click the "Next" button to stage your
                                        model. Staging ensures the Docker image
                                        is validated and ready for deployment.
                                    </p>
                                    <a
                                        href="https://docs.vipas.ai/developer-docs/stage-the-model"
                                        className="links !text-[16px]"
                                        target="_blank"
                                    >
                                        Learn more about staging the model on
                                        Vipas.AI.
                                    </a>
                                </li>
                                <li>
                                    <span className="font-semibold">
                                        Launch from the Project Table:
                                    </span>
                                    <p>
                                        Once staged, your model will appear in
                                        the{" "}
                                        <a
                                            href="https://vipas.ai/projects"
                                            target="_blank"
                                            className="links !text-[16px]"
                                        >
                                            Project Table
                                        </a>
                                        , which displays all your models and
                                        apps, including their status,
                                        accessibility, and more. You can launch
                                        the model directly from here.
                                    </p>
                                    <a
                                        href="https://docs.vipas.ai/developer-docs/launching-an-app-or-model"
                                        className="links !text-[16px]"
                                        target="_blank"
                                    >
                                        Learn more about the Project Table.
                                    </a>
                                    <img
                                        src={launchImg}
                                        loading="lazy"
                                        className="my-6 tw-w-full tw-h-auto tw-object-contain"
                                        alt="launch-project"
                                    />
                                </li>
                                <li>
                                    <span className="font-semibold">
                                        Deploy and Monitor:
                                    </span>
                                    <p>
                                        After launching, the model will be
                                        deployed and available for use. Vipas.AI
                                        automatically handles autoscaling and
                                        traffic management.
                                    </p>
                                    <a
                                        href="https://docs.vipas.ai/developer-docs/mlflow-model"
                                        className="links !text-[16px]"
                                    >
                                        Detailed guide on deploying an ML model
                                        on Vipas.AI.
                                    </a>
                                    <p>
                                        Checkout the Vipas.AI hosted model{" "}
                                        <a
                                            href="http://vipas.ai/models/mdl-b1mxve8nrq9cj"
                                            className="links !text-[16px]"
                                        >
                                            here
                                        </a>
                                        .
                                    </p>
                                </li>
                            </ol>
                        </div>
                    </div>

                    <div className="my-6" id="testing-deployment">
                        <h2 className="sub-heading">
                            Testing the Deployment of Your Model
                        </h2>
                        <p>
                            Once your model is deployed on Vipas.AI, you can
                            test its functionality by following these steps:
                        </p>
                        <div className="">
                            <h4 className="mini-heading">Prediction from UI</h4>
                            <ol className="list-inside list-decimal my-2 ml-2">
                                <li>
                                    <span className="font-semibold">
                                        Access the Project Table:
                                    </span>
                                    <p>
                                        Navigate to the{" "}
                                        <a
                                            href="https://vipas.ai/projects"
                                            target="_blank"
                                            className="links !text-[16px]"
                                        >
                                            Project Table
                                        </a>
                                        , where all your models and apps are
                                        listed. Use the{" "}
                                        <strong>Model ID</strong> to search for
                                        the model you just deployed.
                                    </p>
                                </li>
                                <li>
                                    <span className="font-semibold">
                                        Open the Model Page:
                                    </span>
                                    <p>
                                        Locate your model in the Project Table.
                                        From the Actions Menu, select the Open
                                        option to access the model page.
                                    </p>
                                    <a
                                        href="https://docs.vipas.ai/developer-docs/detailed-actions-and-usage"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    >
                                        Learn more about the Actions Menu.
                                    </a>
                                </li>
                                <li>
                                    <span className="font-semibold">
                                        Run the Model:
                                    </span>
                                    <p>
                                        On the model page, you will find the{" "}
                                        <strong>Run</strong> button. Click this
                                        button to open the{" "}
                                        <strong>Model API Page</strong>, where
                                        you can test the model.
                                    </p>
                                    <img
                                        src={rollbackImg}
                                        loading="lazy"
                                        className="my-6 tw-w-full tw-h-auto tw-object-contain"
                                        alt="rollback"
                                    />

                                    <p>
                                        On this page, users can click on the '
                                        <strong>Run</strong>' button to enter
                                        their prompt and receive a prediction.
                                        Below is an example of the input body
                                        format:
                                    </p>
                                    <div className="w-full my-4">
                                        <CopyBlock
                                            text={codeSnippets.input}
                                            language="python"
                                            showLineNumbers={false}
                                            theme={anOldHope}
                                            wrapLines={true}
                                            codeBlock
                                        />
                                    </div>
                                </li>
                                <li>
                                    <span className="Test the Prediction:"></span>
                                    On the <strong>Model API Page</strong>,
                                    enter the <strong>prediction body</strong>{" "}
                                    (input data) into the provided input box.
                                    Click <strong>Predict</strong> to submit the
                                    request and view the model's response.
                                    <img
                                        src={modelAPIPageImg}
                                        loading="lazy"
                                        className="my-6 tw-w-full tw-h-auto tw-object-contain"
                                        alt="model-API-page"
                                    />
                                </li>
                            </ol>
                        </div>
                        <div className="">
                            <h4 className="mini-heading">
                                Prediction from SDK
                            </h4>
                            <p>
                                Users can predict using the{" "}
                                <strong>Vipas SDK</strong>, which allows
                                seamless integration of model inference in
                                Python scripts. Refer to the{" "}
                                <a
                                    href="https://docs.vipas.ai/developer-docs/async-and-sync"
                                    className="links !text-[16px]"
                                >
                                    Vipas.AI SDK documentation
                                </a>{" "}
                                for detailed information.
                            </p>
                            <p className="font-semibold">
                                Prediction Snippet using Vipas SDK:
                            </p>
                            <div className="w-full my-4">
                                <CopyBlock
                                    text={codeSnippets.sdkPredict}
                                    language="python"
                                    showLineNumbers={false}
                                    theme={anOldHope}
                                    wrapLines={true}
                                    codeBlock
                                />
                            </div>
                        </div>
                    </div>

                    <div className="my-6" id="publishing-on-vipas">
                        <h2 className="sub-heading">
                            Why Publishing on Vipas.AI Can Be Life-Changing
                        </h2>
                        <ul className="my-2 list-disc list-inside">
                            <li>
                                You’ve now learned how to fine-tune and deploy a
                                powerful LLaMA model on Vipas.AI. But what’s
                                next?{" "}
                                <strong>
                                    This is where your AI journey truly begins
                                </strong>
                                .
                            </li>
                            <li>
                                💰{" "}
                                <strong>
                                    Turn Your AI Knowledge into Passive Income
                                </strong>{" "}
                                – Every time someone uses your model, you earn.
                                No need to chase clients or projects—
                                <strong>your AI works for you 24/7.</strong>
                            </li>
                            <li>
                                💼{" "}
                                <strong>
                                    Get Hired Without Job Applications
                                </strong>{" "}
                                – AI recruiters are scouting top talent{" "}
                                <strong>
                                    not just from resumes but from deployed
                                    models
                                </strong>
                                . When enterprises use your AI,{" "}
                                <strong>
                                    you become a proven, high-value hire
                                </strong>
                                .
                            </li>
                            <li>
                                🏆{" "}
                                <strong>
                                    Build Your AI Reputation & Thought
                                    Leadership
                                </strong>{" "}
                                – Vipas.AI isn’t just a platform—it’s an{" "}
                                <strong>
                                    ecosystem where AI innovators are recognized
                                </strong>
                                . Your profile, model usage stats, and
                                performance benchmarks help you stand out in the
                                global AI community.
                            </li>
                            <li>
                                🔗{" "}
                                <strong>
                                    Network with AI Investors & Enterprises
                                </strong>{" "}
                                – As a top AI creator, you’ll get access to
                                industry events, funding opportunities, and
                                partnerships with businesses that want to
                                integrate cutting-edge AI.
                            </li>
                        </ul>
                        <p>
                            So don’t just code AI—monetize it, build your
                            career, and take control of your AI future. 🚀 Sign
                            up on Vipas.AI today and turn your AI expertise into
                            real-world success!
                        </p>
                    </div>
                    <div className="my-6" id="references">
                        <h2 className="font-semibold">References</h2>
                        <ul className="list-disc list-inside my-2">
                            <li>
                                <a
                                    href="https://docs.vipas.ai/developer-docs/mlflow-model"
                                    target="_blank"
                                    className="links !text-[16px]"
                                >
                                    Vipas.AI MLflow documentation
                                </a>
                            </li>
                            <li>
                                <a
                                    href="https://mlflow.org/"
                                    target="_blank"
                                    className="links !text-[16px]"
                                >
                                    MLflow Documentation
                                </a>
                            </li>
                            <li>
                                <a
                                    href="https://github.com/SeldonIO/MLServer"
                                    target="_blank"
                                    className="links !text-[16px]"
                                >
                                    MLServer Documentation
                                </a>
                            </li>
                            <li>
                                <a
                                    href="http://vipas.ai/models/mdl-b1mxve8nrq9cj"
                                    target="_blank"
                                    className="links !text-[16px]"
                                >
                                    Vipas.AI Hosted model.
                                </a>
                            </li>
                            <li>
                                <a
                                    href="https://utils.vipas.ai/vps-ipynb/ml_flow_llama_health/disease_dataset_1b.zip"
                                    target="_blank"
                                    className="links !text-[16px]"
                                >
                                    Model Directory Download
                                </a>
                            </li>
                        </ul>
                    </div>
                    
                    <Navigation
                       
                        next={"Next: Deploying LLaMA Model Using MLflow on Vipas.AI: A Comprehensive Guide"}
                        nextNav={"deploying-llama-using-mlflow"}
                    />
                </div>
                <div className="h-full hidden xl:block xl:w-[33.33%] px-[24px] sticky top-16">
                    <p className="font-semibold my-8 ">On this page</p>
    <ul className="border-t-2 py-4">
        <li className="link-list links" onClick={()=>scrollToId('introduction')}>
                Introduction
        </li>
        <li className="link-list links" onClick={()=>scrollToId('monetize')}>
            Monetize Your AI Expertise & Get Discovered by Top Recruiters on Vipas.AI
        </li>
        <li className="link-list links" onClick={()=>scrollToId('prerequisites')}>
            Prerequisites
        </li>
        <li className="link-list links" onClick={()=>scrollToId('layoutlm-dataset')}>
            Fine-Tuning LLaMA on JPMiller's LayoutLM Dataset
        </li>
        <li className="link-list links" onClick={()=>scrollToId('deploying-model-on-vipas')}>
            Deploying the model on Vipas.AI
        </li>
        <li className="link-list links" onClick={()=>scrollToId('tech-stack')}>
            Understanding the technology Stack
        </li>
        <li className="link-list links" onClick={()=>scrollToId('creating-model')}>
            Creating a Model on Vipas.AI
        </li>
        <li className="link-list links" onClick={()=>scrollToId('custom-runtime')}>
            Preparing the Model for Custom Runtime
        </li>
        <li className="link-list links" onClick={()=>scrollToId('configure-deployment')}>
            Configuring the Deployment
        </li>
        <li className="link-list links" onClick={()=>scrollToId('docker-container')}>
            Testing the Docker Container
        </li>
        <li className="link-list links" onClick={()=>scrollToId('hosting-model')}>
            Hosting the Model on Vipas.AI
        </li>
<li className="link-list links" onClick={()=>scrollToId('testing-deployment')}>
            Testing the Deployment of Your Model
        </li>
        <li className="link-list links" onClick={()=>scrollToId('publishing-on-vipas')}>
            Why Publishing on Vipas.AI Can Be Life-Changing
        </li>
        <li className="link-list links" onClick={()=>scrollToId('references')}>
            References
        </li>

    </ul>
                </div>
            </div>
        </>
    );
}

export default FineTunnedLlamaModel;
