import React from "react";
import { CopyBlock, anOldHope } from "react-code-blocks";
import downloadIcon from "../../../assets/images/download.png";
import modelAPIImage from "../../../assets/images/llama-using-mlflow/image1.webp"
import launchMenuImage from "../../../assets/images/llama-using-mlflow/image2.webp"
import customRuntimeImage2 from "../../../assets/images/llama-using-mlflow/image3.webp"
import rollbackImage from "../../../assets/images/llama-using-mlflow/image4.webp"
import createImage from "../../../assets/images/llama-using-mlflow/image5.webp"
import customRuntimeImage1 from "../../../assets/images/llama-using-mlflow/image6.webp"
import llamaImage from "../../../assets/images/llama-using-mlflow/image7.webp"
import dockerImage from "../../../assets/images/llama-using-mlflow/image8.webp"
import Navigation from "../../Navigation";
import { scrollToId } from "../../../utils/reusableFunctions";

function DeployingLlamaUsingMLflow() {
    const codeSnippets = {
        load: `
from mlserver import MLModel
from transformers import AutoTokenizer, AutoModelForCausalLM
        
class CustomHuggingFaceRuntime(MLModel):
    async def load(self) -> bool:
        hf_token = "<your_hugging_face_token>"
        model_name = self.settings.parameters.extra.get("pretrained_model", "meta-llama/Llama-3.2-3B")
        print(f"Loading model {model_name} with Hugging Face token...")
        
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
        self.model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=hf_token)
        print("Model and tokenizer loaded successfully!")
        return True`,

        predict: `
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
    inputs = payload.inputs[0].data[0]
    print(f"Received prompt: {inputs}")

    tokens = self.tokenizer(inputs, return_tensors="pt")
    output_tokens = self.model.generate(tokens["input_ids"], max_length=50)
    response = self.tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    print(f"Generated response: {response}")

    response_output = ResponseOutput(name="response", shape=[1], datatype="BYTES", data=[response])
    return InferenceResponse(model_name=self.name, outputs=[response_output])`,
        dockerfile: `FROM python:3.11.2-slim

WORKDIR /app

# Copy configuration and application files
COPY requirements.txt /app/requirements.txt
COPY custom_runtime.py /app/custom_runtime.py
COPY model-settings.json /app/model-settings.json

# Install dependencies
RUN pip install -r /app/requirements.txt
RUN pip install mlserver mlserver-mlflow

# Expose the default MLServer port
EXPOSE 8080

# Start MLServer
CMD ["mlserver", "start", "/app"]`,
        modelSettingJson: `{
    "name": "<vipas_model_service_name>",//provided by vipas
    "implementation": "custom_runtime.CustomHuggingFaceRuntime",
    "parameters": {
      "extra": {
        "pretrained_model": "meta-llama/Llama-3.2-3B"
      }
    }
}`,
        dockerBuild: `docker build -t <docker_username>/<image_name>:<tag> .`,
        dockerPush: `docker push <docker_username>/<image_name>:<tag>`,
        dockerRun: `docker run -it -p 8080:8080 <docker_username>/<image_name>:<tag>`,
        curl: `curl http://localhost:8080/v2/health/ready`,
        inputPayload: `{
    "inputs": [
      {
        "name": "prompt",
        "shape": [1],
        "datatype": "BYTES",
        "data": ["What is the capital of France?"]
      }
    ]
  }`,
        inferenceRequest: `curl -X POST http://localhost:8080/v2/models/{model_name}/infer \
  -H "Content-Type: application/json" \
  -d '{
    "inputs": [
      {
        "name": "prompt",
        "shape": [1],
        "datatype": "BYTES",
        "data": ["What causes cancer?"]
      }
    ]
  }'`,
        output: `{
    "model_name": {model_name},
    "outputs": [
      {
        "name": "response",
        "shape": [1],
        "datatype": "BYTES",
        "data": ["The capital of France is Paris."]
      }
    ]
  }`,
        configJson: `{
    "docker_image":"<docker_username>/<image_name>:<tag>",
    "docker_token":<docker pat>
}`,
        publish: `from vipas.model import ModelClient
from vipas.exceptions import UnauthorizedException, NotFoundException, ClientException


# Paths to MLflow config files
mlflow_config_path = "/path/to/mlflow_config.json"  # Optional, Path to the MLflow config file which contains details like the model docker image and a valid docker personal access token. Both are required.

# Unique model ID to identify the model in Vipas.AI
model_id = "your_model_id"

try:
    # Initialize the ModelClient
    model_client = ModelClient()

    # Publish the model
    model_client.publish(
        model_id=model_id,
        model_framework_type="mlflow", mlflow_config_path=mlflow_config_path,
        auto_launch=True,  
        override_model=True 
    )
except UnauthorizedException as e:
    print(f"UnauthorizedException: {e}")
except NotFoundException as e:
    print(f"NotFoundException: {e}")
except ClientException as e:
    print(f"ClientException: {e}")
except Exception as e:
    print(f"Exception: {e}")`,
        dockerImage: `<docker_username>/<image_name>:<tag>`,
        predictInput: `{
    "inputs": [
      {
        "name": "prompt",
        "shape": [1],
        "datatype": "BYTES",
        "data": ["what is the capital of Paris?"]
      }
    ]
}`,
        vipasPredict: `from vipas import model
from vipas.exceptions import ClientException
import os 
try: 
    os.environ["VPS_AUTH_TOKEN"] = "<vps_auth_token>"
    model_client = model.ModelClient()
    request_body = {
        "inputs": [
            {
                "name": "prompt",
                "shape": [1],
                "datatype": "BYTES",
                "data": ["What is the capital of France?"]
            }
        ]
    }
    api_response = model_client.predict(model_id="<model_id>", input_data=request_body)
    print("Model output", api_response)
except ClientException as e:
    print(e)
`,
    };
    return (
        <>
            <div className="flex my-8 relative break-words ">
                <div className="w-[100%] xl:w-[66.66%] px-[24px]">
                    <h1 className="mb-3 heading">
                        Deploying LLaMA Model Using MLflow on Vipas.AI: A
                        Comprehensive Guide
                    </h1>
                    <div className="border rounded-[5px] w-[250px] p-2 shadow-md my-2 hover:bg-gray-100">
                               <a
                                   href="https://utils.vipas.ai/vps-ipynb/mlflow_llama/hugging_face_llama.zip"
                                   target="_blank"
                                   className="ml-3 flex items-center"
                               >
                                   <img
                                       src={downloadIcon}
                                       width={32}
                                       height={32}
                                       alt="text-summarization-download"
                                   />{" "}
                                   <span className="ml-2">
                                       Download Notebook
                                   </span>
                               </a>
                           </div>
                    <div className="my-6" id="introduction">
                        <h2 className="sub-heading">Introduction</h2>
                        <p className="my-2">
                            The LLaMA 3.2 model, developed by Meta, represents a
                            cutting-edge advancement in the field of natural
                            language processing. With its multilingual
                            capabilities and optimization for tasks like
                            summarization and agentic retrieval, LLaMA 3.2’s
                            instruction-tuned versions deliver exceptional
                            performance, making it a preferred choice for
                            real-world AI applications.
                        </p>
                        <img src={llamaImage} loading="lazy" className='my-6 tw-w-full tw-h-auto tw-object-contain' alt="llama"/>

                        <p className="my-2">
                            Deploying such advanced models effectively is
                            crucial for maximizing their impact in production
                            scenarios. This blog explores the use of MLflow to streamline the deployment of the LLaMA
                            3.2 model on the Vipas.AI platform, ensuring
                            scalability, reliability, and ease of management.
                        </p>
                        <ul className="my-4 list-disc">
                            <li>
                                <a
                                    href="https://mlflow.org/"
                                    target="_blank"
                                    className="links !text-[16px]"
                                >
                                    MLflow
                                </a>
                                : A comprehensive platform for managing the ML
                                lifecycle, including tracking experiments,
                                managing models, and ensuring seamless
                                deployment.
                            </li>
                            <li>
                                <a
                                    href="https://mlserver.readthedocs.io/en/latest/getting-started/index.html"
                                    target="_blank"
                                    className="links !text-[16px]"
                                >
                                    MLServer
                                </a>
                                : A robust serving framework that supports
                                multi-model inference, adaptive batching, and
                                scalability, making it an ideal choice for
                                deploying high-performance models like LLaMA
                                3.2.
                            </li>
                            <li>
                                <a
                                    href="http://Vipas.AI"
                                    target="_blank"
                                    className="links !text-[16px]"
                                >
                                    Vipas.AI
                                </a>
                                : A state-of-the-art platform designed to
                                revolutionize AI model deployment and
                                management, offering intuitive tools to launch,
                                scale, and monetize AI applications in dynamic,
                                real-world environments.
                            </li>
                        </ul>
                        <p>
                            This blog provides a detailed walkthrough of
                            deploying the LLaMA 3.2 model using these
                            technologies, highlighting best practices and
                            technical insights to optimize the deployment
                            process. You can checkout the model deployed{" "}
                            <a
                                href="https://vipas.ai/models/mdl-s5cdasend8dza"
                                className="links !text-[16px]"
                            >
                                here
                            </a>{" "}
                            and you can download the files for the model{" "}
                            <a
                                href="https://utils.vipas.ai/vps-ipynb/mlflow_llama/hugging_face_llama.zip"
                                className="links !text-[16px]"
                            >
                                here
                            </a>
                            .
                        </p>
                    </div>

                    <div className="my-6" id="monetize">
                        <h2 className="sub-heading">
                            Monetize Your AI Expertise & Get Noticed by
                            Enterprises on Vipas.AI!
                        </h2>
                        <p className="my-2">
                            Building AI models is just the beginning —{" "}
                            <strong>
                                turning them into a scalable business is where
                                real success lies!
                            </strong>{" "}
                            Imagine a world where every time someone uses your
                            model, <strong>you earn money</strong> while your AI
                            solutions gain <strong>global visibility</strong>.
                        </p>
                        <ul className="my-4 list-disc">
                            <li>
                                {" "}
                                <strong>Earn Passive Income</strong> – Deploy
                                your LLaMA-powered models and charge per API
                                call.{" "}
                                <strong>
                                    No complex setup, no infrastructure
                                    costs—just pure AI monetization
                                </strong>
                                .
                            </li>
                            <li>
                                <strong>
                                    Attract Global Enterprises & Recruiters
                                </strong>{" "}
                                – AI startups, SMBs, and enterprises are looking
                                for <strong>ready-to-use AI solutions</strong>.
                                If your model solves a real-world problem, it{" "}
                                <strong>becomes an industry asset</strong>,
                                increasing your credibility and job
                                opportunities.
                            </li>
                            <li>
                                <strong>
                                    Deploy AI Models Without Engineering
                                    Overhead
                                </strong>{" "}
                                – No need to worry about{" "}
                                <strong>scaling</strong>,{" "}
                                <strong>
                                    inference optimizations, or cloud costs
                                </strong>
                                . Vipas.AI <strong>automates everything</strong>{" "}
                                while you focus on AI development.
                            </li>
                            <li>
                                <strong>Your AI, Your Revenue</strong> – Unlike
                                traditional platforms where AI research remains
                                locked in code repositories, Vipas.AI ensures
                                that{" "}
                                <strong>
                                    your AI solutions work instantly and
                                    generate revenue
                                </strong>{" "}
                                from users worldwide.
                            </li>
                        </ul>
                        <p className="my-4">
                            This guide will walk you through{" "}
                            <strong>
                                deploying the LLaMA model using MLflow on
                                Vipas.AI
                            </strong>{" "}
                            — from training to deployment to{" "}
                            <strong>real-time monetization</strong>.{" "}
                            <strong>
                                Start now, scale effortlessly, and take control
                                of your AI career!{" "}
                            </strong>
                            🚀
                        </p>
                    </div>

                    <div className="my-6" id="tech-stack">
                        <h2 className="sub-heading">
                            Understanding the Technology Stack
                        </h2>
                        <ol className="my-4 list-decimal list-inside">
                            <li>
                                <span className="mini-heading">
                                    LLaMA Model
                                </span>
                                <p className="my-4">
                                    The LLaMA 3.2 model, developed by Meta, is a
                                    multilingual large language model optimized
                                    for tasks such as text summarization,
                                    language translation, and dialogue
                                    generation. Its instruction-tuned variants
                                    leverage supervised fine-tuning (SFT) and
                                    reinforcement learning with human feedback
                                    (RLHF) to enhance alignment with human
                                    preferences for safety and helpfulness. Key
                                    architectural features include:
                                </p>
                                <ul className="list-disc list-inside">
                                    <li>
                                        {" "}
                                        <strong>
                                            Transformer Architecture
                                        </strong>
                                        : Optimized for scalability and
                                        performance.
                                    </li>
                                    <li>
                                        <strong>Multilingual Support</strong>:
                                        Trained on a diverse set of languages,
                                        making it suitable for global use cases.
                                    </li>
                                    <li>
                                        <strong>Enhanced Context Length</strong>
                                        : Capable of processing up to 128k
                                        tokens, supporting extensive input data.
                                    </li>
                                </ul>
                                <p className="my-2">
                                    Use cases include agentic retrieval, code
                                    generation, and multilingual summarization,
                                    making LLaMA a versatile choice for various
                                    industries.
                                </p>
                            </li>
                            <li>
                                <span className="mini-heading">
                                    MLflow and MLServer
                                </span>
                                <p className="my-2">
                                    MLflow and MLServer together create a robust
                                    and scalable solution for managing and
                                    deploying machine learning models. While
                                    MLflow focuses on the lifecycle management
                                    of models, MLServer specializes in efficient
                                    and high-performance serving of these models
                                    in production environments.
                                </p>
                                <p className="font-semibold">
                                    Key Features of MLflow
                                </p>
                                <ul className="list-disc list-inside">
                                    <li>
                                        <strong>
                                            <a
                                                href="https://mlflow.org/docs/latest/tracking.html#quickstart"
                                                className="links !text-[16px]"
                                                target="_blank"
                                            >
                                                Tracking
                                            </a>
                                        </strong>
                                        : Logs parameters, metrics, and
                                        artifacts from training runs.
                                    </li>
                                    <li>
                                        <strong>Projects</strong>: Standardized
                                        packaging of ML code for
                                        reproducibility.
                                    </li>
                                    <li>
                                        <strong>Models</strong>:
                                        Framework-agnostic model packaging and
                                        deployment.
                                    </li>
                                    <li>
                                        <strong>Model Registry</strong>:
                                        Centralized storage for model versions,
                                        enabling lifecycle management.
                                    </li>
                                </ul>
                                <p className="my-2">
                                    For this deployment, Hugging Face’s{" "}
                                    <code>AutoTokenizer</code> and{" "}
                                    <code>AutoModelForCausalLM</code> were used
                                    to load a pretrained LLaMA model. The
                                    artifacts were logged with MLflow to ensure
                                    seamless reproducibility and version
                                    control. MLflow’s integration capabilities
                                    allowed us to package and deploy the model
                                    efficiently.
                                </p>
                                <p className="my-2 font-semibold">
                                    Key Features of MLServer
                                </p>
                                <ul className="list-disc list-inside">
                                    <li>
                                        <strong>Multi-Model Serving</strong>:
                                        Run multiple models in a single process.
                                    </li>
                                    <li>
                                        <strong>Adaptive Batching</strong>:
                                        Dynamically group inference requests for
                                        enhanced throughput.
                                    </li>
                                    <li>
                                        <strong>Kubernetes Integration</strong>:
                                        Deploy models on Kubernetes using
                                        frameworks like KServe and Seldon Core.
                                    </li>
                                </ul>
                                <p>
                                    By integrating Dockerized models with
                                    MLServer, Vipas.AI provides an optimized
                                    environment for real-time inference at
                                    scale. Developers can leverage its intuitive
                                    dashboard to manage endpoints, monitor
                                    usage, and ensure high availability. Learn
                                    more about deploying a MLflow model on vipas
                                    at{" "}
                                    <a
                                        href="https://docs.vipas.ai/developer-docs/mlflow-model"
                                        className="links !text-[16px]"
                                        target="_blank"
                                    >
                                        Vipas.AI Documentation
                                    </a>
                                    ,{" "}
                                    <a
                                        href="https://mlflow.org/docs/latest/models.html#storage-format"
                                        className="links !text-[16px]"
                                        target="_blank"
                                    >
                                        MLflow Documentation
                                    </a>
                                    ,{" "}
                                    <a
                                        href="https://mlserver.readthedocs.io/en/stable/user-guide/custom.html"
                                        className="links !text-[16px]"
                                        target="_blank"
                                    >
                                        ML server Documentation
                                    </a>
                                </p>
                            </li>
                        </ol>
                    </div>

                    <div className="my-6" id="custom-runtime">
                        <h2 className="sub-heading">
                            Preparing the LLaMA Model in Custom Runtime
                        </h2>
                        <ol className="list-decimal list-inside">
                            <li>
                                <span className="mini-heading">
                                    Creating a Model on Vipas.AI
                                </span>
                                <p className="my-4">
                                    To create a model on Vipas.AI, navigate to
                                    the{" "}
                                    <a
                                        href="https://vipas.ai/project/create/main"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    >
                                        Create Model Page
                                    </a>
                                    . Provide the required details such as name,
                                    description, category, and permissions. Once
                                    completed, click <strong>Next</strong> to
                                    proceed.
                                </p>
                                <img src={createImage} loading="lazy" className='my-6 tw-w-full tw-h-auto tw-object-contain' alt="create-project"/>

                                <p className="my-4">
                                    {" "}
                                    In the <strong>Custom Runtime</strong> tab,
                                    download the pre-configured{" "}
                                    <strong>Dockerfile </strong>and{" "}
                                    <strong>model-settings.json</strong> files,
                                    which will serve as the foundation for your
                                    custom runtime deployment. For more
                                    information, refer to the{" "}
                                    <a
                                        href="https://docs.vipas.ai/developer-docs/mlflow-model"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    >
                                        MLflow Model Documentation
                                    </a>
                                    ,{" "}
                                    <a
                                        href="https://docs.vipas.ai/developer-docs/steps-to-create-a-model"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    >
                                        Creating a model
                                    </a>
                                    .
                                </p>
                                <img src={customRuntimeImage1} loading="lazy" className='my-6 tw-w-full tw-h-auto tw-object-contain' alt="custom-runtime"/>
                                <img src={customRuntimeImage2} loading="lazy" className='my-6 tw-w-full tw-h-auto tw-object-contain' alt="custom-runtime"/>

                            </li>
                            <li>
                                <span className="mini-heading">
                                    Making Custom Runtime Changes
                                </span>
                                <p className="my-4">
                                    After obtaining the configuration files, you
                                    can modify the runtime to suit your model’s
                                    requirements. This involves overriding the{" "}
                                    <code>load</code> and <code>predict</code>{" "}
                                    functions in the{" "}
                                    <code>custom_runtime.py</code> file to
                                    handle model initialization and inference.
                                    To learn more about overriding custom
                                    runtime methods visit{" "}
                                    <a
                                        href="https://docs.vipas.ai/developer-docs/mlflow-model"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    >
                                        MLflow Model Documentation
                                    </a>
                                    ,{" "}
                                    <a
                                        href="https://mlserver.readthedocs.io/en/latest/user-guide/custom.html"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    >
                                        MLserver Docs
                                    </a>{" "}
                                    on{" "}
                                    <a
                                        href="https://mlserver.readthedocs.io/en/latest/user-guide/custom.html"
                                        target="_blank"
                                        className="links !text-[16px]"
                                    >
                                        custom inference runtimes
                                    </a>
                                    .
                                </p>
                                <p className="font-semibold">
                                    Overriding the Load Function
                                </p>
                                <p className="">
                                    The <code>load</code> function ensures the
                                    LLaMA model and tokenizer are downloaded
                                    from Hugging Face and initialized correctly.
                                </p>
                                <ul className="list-inside list-disc">
                                    <li>
                                        {" "}
                                        Extracts the model name from
                                        model-settings.json.
                                    </li>
                                    <li>
                                        Uses the Hugging Face token to
                                        authenticate and securely download the
                                        model and tokenizer.
                                    </li>
                                </ul>
                                <p className="font-semibold">Implementation</p>
                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.load}
                                        language="python"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                                <p className="my-2 font-semibold">
                                    Overriding the Predict Function
                                </p>
                                <p>
                                    The <code>predict</code> function processes
                                    inference requests by tokenizing input text,
                                    generating predictions, and decoding the
                                    output into human-readable text.
                                </p>
                                <p className="font-semibold">
                                    Steps in <code>predict</code>:
                                </p>
                                <ol className="list-decimal list-inside">
                                    <li>
                                        Extract input data from the{" "}
                                        <code>InferenceRequest</code> object.
                                    </li>
                                    <li>
                                        {" "}
                                        Tokenize input using{" "}
                                        <code>AutoTokenizer</code>.
                                    </li>
                                    <li>
                                        Generate predictions using the model’s{" "}
                                        <code>generate</code> method.
                                    </li>
                                    <li>
                                        Decode the output tokens into a readable
                                        response.
                                    </li>
                                </ol>
                                <p className="font-semibold">Implementation</p>
                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.predict}
                                        language="python"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                            </li>
                            <li>
                                <span className="mini-heading">
                                    Packaging the Model
                                </span>
                                <p>
                                    The final step involves packaging the model
                                    into a Docker container for deployment. This
                                    ensures portability and simplifies the
                                    deployment process on Vipas.AI.
                                </p>
                                <p className="font-semibold">
                                    Creating a Dockerfile
                                </p>
                                <p>
                                    The Dockerfile defines the runtime
                                    environment, dependencies, and entry point
                                    for the application.
                                </p>
                                <p className="font-semibold">
                                    Example Dockerfile
                                </p>
                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.dockerfile}
                                        language="text"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                                <p className="font-semibold">
                                    Defining Model Settings
                                </p>
                                <p>
                                    The <code>model-settings.json</code> file
                                    specifies the configuration for the runtime
                                    and model implementation.
                                </p>
                                <p className="font-semibold">
                                    Example model-settings.json:
                                </p>
                                <div className="w-full my-4">
                                    <CopyBlock
                                        text={codeSnippets.modelSettingJson}
                                        language="json"
                                        showLineNumbers={false}
                                        theme={anOldHope}
                                        wrapLines={true}
                                        codeBlock
                                    />
                                </div>
                                <p className="font-semibold">
                                    Building and Pushing the Docker Image
                                </p>
                                <ol className="list-decimal list-inside">
                                    <li>
                                        Build the Docker image:
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.dockerBuild}
                                                language="text"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                    </li>

                                    <li>
                                        Push the Docker image to a container
                                        registry:
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.dockerPush}
                                                language="text"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                    </li>
                                </ol>
                                <p>
                                    {" "}
                                    By following these steps, the LLaMA model is
                                    packaged into a production-ready Docker
                                    container that can be deployed on Vipas.AI.
                                    For additional guidance, visit the{" "}
                                    <a
                                        href="https://docs.vipas.ai/developer-docs/mlflow-model"
                                        className="links !text-[16px]"
                                    >
                                        Vipas.AI MLflow Documentation
                                    </a>
                                    .
                                </p>
                            </li>
                        </ol>                       
                    </div>
                    <div className="my-6" id="testing-docker">
                                <span className="mini-heading ">
                                    Testing the Docker Container
                                </span>
                                <ol className="list-decimal list-inside ml-4">
                                    <li>
                                        <strong>
                                            Running the Docker Container
                                        </strong>
                                        <p>
                                            After building the Docker image,
                                            testing the container interactively
                                            ensures that the model loads
                                            correctly and the environment is set
                                            up properly. Use the following
                                            command to run the Docker container
                                            interactively:
                                        </p>
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.dockerRun}
                                                language="text"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                        <p className="font-semibold">Explanation of the Command:</p>
                                        <ul className="my-4 list-inside list-disc">
                                            <li><code>-it</code>: Run the container in interactive mode, allowing you to see logs and interact with the process.</li>
                                            <li><code>-p 8080:8080</code>: Maps the default MLServer port inside the container to your local machine, enabling external API calls.</li>
                                            <li><code>llama-mlserver</code>: Specifies the name of the Docker image.</li>
                                        </ul>
                                        <p>When the container starts, MLServer will initialize and load the LLaMA model. You should see logs indicating that the model and tokenizer are being loaded from Hugging Face. If there are any issues during initialization, they will be displayed in the terminal for debugging.</p>
                                    </li>
                                    <li><strong>Making a Prediction</strong>
                                    <p>Once the container is running and the model is loaded, you can test predictions by making an API call to the MLServer endpoint. Follow these steps:</p>
                                    <ol className="ml-4 list-inside list-decimal">
                                        <li><strong>Verify the Endpoint is Running:</strong>
                                        <p>Use a tool like curl or Postman to confirm the server is live:</p>
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.curl}
                                                language="text"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                        <p>A successful response will indicate that the server is ready to accept requests.</p>
                                        </li>
                                        <li> <strong>Prepare the Input Payload:</strong>
                                            <p>Create a JSON payload to send a request to the model. The payload should follow the KFServing V2 inference protocol:</p>
                                            <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.inputPayload}
                                                language="json"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                        </li>
                                        <li>
                                            <strong>Make an Inference Request:</strong>
                                            <p>Use <code>curl</code> to send a POST request to the <code>/v2/models/{'{model_name}'}/infer</code> endpoint:</p>
                                            <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.inferenceRequest}
                                                language="json"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                            <p>Replace <code>{'{model_name}'}</code> with the name of your model as defined in <code>model-settings.json</code>.</p>
                                        </li>
                                        <li>
                                            <strong>Inspect the Response:</strong>
                                            <p>The response will contain the model's prediction. For example:</p>
                                            <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.output}
                                                language="json"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                        </li>
                                    </ol>
                                    </li>


                                </ol>
                            </div>
                            <div className="my-6" id="llama-model">
                                <span className="mini-heading">
                                Deploying the LLaMA Model on Vipas.AI
                                </span>
                                <ol className="ml-4 list-decimal list-inside my-4">
                                    <li><strong>Publishing the Model via Vipas.AI SDK</strong>
                                        <p>MLflow config json file content</p>
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.configJson}
                                                language="json"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                        <p>You can publish the model using Vipas.AI SDK <a href="https://docs.vipas.ai/developer-docs/sdk-publish" className="links !text-[16px]">publish</a> method.</p>
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.publish}
                                                language="python"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                    </li>
                                    <li>
                                        <strong>Deploying via Vipas.AI UI</strong>
                                        <p>After building and pushing the Docker image for the LLaMA model to Docker Hub, follow these steps to deploy it on Vipas.AI:</p>
                                        <ol className="list-inside list-decimal ml-4">
                                            <li><strong>Provide the Docker Image:</strong>
                                            <p>Enter the <strong>Docker image</strong> in the following format:</p>
                                            <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.dockerImage}
                                                language="text"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                        <p>This image should include everything required to serve the LLaMA model, such as dependencies and runtime specifications.</p>
                                            </li>
                                            <li><strong>Enter Docker Personal Access Token (PAT):</strong>
                                            <p>Along with the Docker image, provide your <strong>Docker Personal Access Token (PAT)</strong> to authenticate the pull operation.</p>
                                            <a href="https://docs.docker.com/security/for-developers/access-tokens/" className="links !text-[16px]" target="_blank">Learn more about generating a Docker PAT.</a>
                                            <img src={dockerImage} loading="lazy" className='my-6 tw-w-full tw-h-auto tw-object-contain' alt="docker-image"/>

                                            </li>

                                            <li><strong>Proceed to Staging:</strong>
                                            <p>After entering the required details, click the "Next" button to stage your model. Staging ensures the Docker image is validated and ready for deployment.</p>
                                            <a href="https://docs.vipas.ai/developer-docs/stage-the-model" className="links !text-[16px]" target="_blank">Learn more about staging the model on Vipas.AI.</a>
                                            </li>

                                            <li><strong>Launch from the Project Table:</strong>
                                            <p>Once staged, your model will appear in the <a hre="https://vipas.ai/projects" className="links !text-[16px]" target="_blank">Project Table</a>, which displays all your models and apps, including their status, accessibility, and more. You can launch the model directly from here.</p>
                                            <a href="https://docs.vipas.ai/developer-docs/launching-an-app-or-model" className="links !text-[16px]" target="_blank">Learn more about the Project Table.</a>
                                            <img src={launchMenuImage} loading="lazy" className='my-6 tw-w-full tw-h-auto tw-object-contain' alt="launch-project"/>

                                            </li>

                                            <li><strong>Deploy and Monitor:</strong>
                                            <p>After launching, the model will be deployed and available for use. Vipas.AI automatically handles autoscaling and traffic management.</p>
                                            <a href="https://docs.vipas.ai/developer-docs/mlflow-model" className="links !text-[16px]" target="_blank">Detailed guide on deploying an ML model on Vipas.AI</a>
                                            </li>
                                        </ol>
                                    </li>
                                </ol>
                            </div>
                            <div className="my-6" id="test-model-deployment">
                                <h3 className="sub-heading">Testing the Deployment of Your Model</h3>
                                <p>Once your model is deployed on Vipas.AI, you can test its functionality by following these steps:</p>
                                <h4 className="mini-heading">Prediction from UI</h4>
                                <ol className="list-inside list-decimal my-4">
                                    <li><strong>Access the Project Table:</strong>
                                    <p>Navigate to the <a href="https://vipas.ai/projects" target="_blank" className="links !text-[16px]">
                                    Project Table</a>, where all your models and apps are listed. Use the <strong>Model ID</strong> to search for the model you just deployed.</p>
                                    </li>
                                    <li>
                                        <strong>Open the Model Page:</strong>
                                        <p>Locate your model in the Project Table. From the <strong>Actions Menu</strong>, select the <strong>Open</strong> option to access the model page.</p>
                                        <a href="" className="">🔗 Learn more about the Actions Menu.</a>
                                    </li>
                                    <li>
                                        <strong>Run the Model:</strong>
                                        <p>On the model page, you will find the <strong>Run</strong> button. Click this button to open the <strong>Model API Page</strong>, where you can test the model.</p>
                                        <img src={rollbackImage} loading="lazy" className='my-6 tw-w-full tw-h-auto tw-object-contain' alt="rollback-project"/>

                                        <p>On this page, users can click on the 'Run' button to enter their prompt and receive a prediction. Below is an example of the input body format:</p>
                                        <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.predictInput}
                                                language="json"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                                        <p>Ensure that you do not change the "<code>name</code>" tag in the request body, as it is required for proper model inference.</p>
                                    </li>
                                    <li>
                                        <strong>Test the Prediction:</strong>
                                        <p>On the <strong>Model API Page</strong>, enter the <strong>prediction body</strong> (input data) into the provided input box. Click <strong>Predict</strong> to submit the request and view the model's response.You can check the deployed model <a href="https://vipas.ai/models/mdl-s5cdasend8dza" className="links !text-[16px]" target="_blank">here</a>.</p>
                                        <img src={modelAPIImage} loading="lazy" className='my-6 tw-w-full tw-h-auto tw-object-contain' alt="model-api"/>

                                    </li>
                                </ol>
                                <h3 className="mini-heading my-2">Prediction from SDK</h3>
                                <p>Users can predict using the Vipas SDK, which allows seamless integration of model inference in Python scripts. Refer to the <a href="https://docs.vipas.ai/developer-docs/async-and-sync" className="links !text-[16px]">Vipas.AI SDK documentation</a> for detailed information.</p>
                                <p className="font-semibold">Prediction Snippet using Vipas SDK:</p>
                                <div className="w-full my-4">
                                            <CopyBlock
                                                text={codeSnippets.vipasPredict}
                                                language="text"
                                                showLineNumbers={false}
                                                theme={anOldHope}
                                                wrapLines={true}
                                                codeBlock
                                            />
                                        </div>
                            </div>
                            <div className="my-6" id="vipas-advanatges">
                                <h2 className="sub-heading">Advantages of Using Vipas.AI</h2>
                                <p className="font-semibold">Why You Should Monetize Your AI Models on Vipas.AI</p>
                                <p>If you’re a researcher, developer, or AI enthusiast, <strong>you don’t need to wait for a job or funding to profit from your skills</strong>. Vipas.AI provides a <strong>direct path to monetization and industry recognition</strong>.
</p>
<ul className="list-inside list-disc">
    <li><strong>💰 Revenue Without Clients or Contracts</strong> – Once your model is deployed, <strong>anyone using it generates income for you</strong> — passive AI earnings at scale.</li>
    <li><strong>💼 AI Hiring is Changing—Show, Don’t Tell</strong> – Recruiters don’t just read resumes. They <strong>test AI models in real-world scenarios</strong>. Having a published AI model on Vipas.AI <strong>automatically sets you apart</strong>.
</li>
<li><strong>🌍 AI for Enterprises & SMBs</strong> – Businesses are willing to pay for <strong>domain-specific AI solutions</strong> without the hassle of hiring a team, setting up cloud servers, or managing training pipelines. <strong>This is your opportunity to sell AI directly</strong>.</li>
<li><strong>🏆 The AI Gold Rush Has Started—Be an Early Mover!</strong> – While most platforms only let developers share code, Vipas.AI is a marketplace where <strong>fully operational AI models generate income. If you’re here early, you gain an edge.</strong>
</li>
</ul>
<p className="font-semibold">Monetize. Get Hired. Scale. Start now on Vipas.AI and turn your AI skills into a thriving business!
</p>
                            </div>
                            <div className="font-semibold my-2" id="reference">
                                <h2 className="sub-heading">
                                References
                                </h2>
                                <ul className="list-inside list-disc">
                                    <li>
                                        <a href="https://docs.vipas.ai/developer-docs/mlflow-model" className="links !text-[16px]" target="_blank">Deploying MLflow model on Vipas.AI.
</a>
                                    </li>
                                    <li>
                                        <a href="https://mlflow.org/docs/latest/index.html" className="links !text-[16px]" target="_blank">MLflow</a> and  <a href="https://mlserver.readthedocs.io/en/latest/user-guide/custom.html" className="links !text-[16px]" target="_blank">MLServer</a> official documentation.
                                    </li>
                                    <li>
                                        <a href="https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/" className="links !text-[16px]" target="_blank">LLaMA</a> documentation.
                                    </li>
                                    <li>
                                    Hugging face <a href="https://huggingface.co/meta-llama/Llama-3.2-3B" className="links !text-[16px]" target="_blank">LLaMa</a> documentation
                                    </li>
                                    <li>
                                        <a href="http://vipas.ai/models/mdl-s5cdasend8dza" className="links !text-[16px]" target="_blank">Deployed Model in Vipas.AI</a>
                                    </li>
                                    <li>
                                    Download Files for model Building <a href="https://utils.vipas.ai/vps-ipynb/mlflow_llama/hugging_face_llama.zip" className="links !text-[16px]" target="_blank">here</a>.
                                    </li>

                                </ul>
                            </div>

                            <Navigation
                        previous={"Previous: Deploying a Fine-Tuned LLaMA Model on Vipas.AI"}
                        previousNav={"fine-tuned-llama"}
                       next={"Next: Deploying a Fine-Tuned YOLO Model for Bone Fracture Detection on Vipas.AI"}
                       nextNav={"fine-tuned-yolo-model-bone-fracture-detection"}
                   />
                </div>
                <div className="h-full hidden xl:block xl:w-[33.33%] px-[24px] sticky top-16">
                    <p className="font-semibold my-8 ">On this page</p>
                    <ul className="border-t-2 py-4">
                        <li className="link-list links" onClick={()=>scrollToId('introduction')}>
                            Introduction
                        </li>
                        <li className="link-list links" onClick={()=>scrollToId('monetize')}>
                        Monetize Your AI Expertise & Get Noticed by Enterprises on Vipas.AI!
                        </li>
                        <li className="link-list links" onClick={()=>scrollToId('tech-stack')}>
                        Understanding the Technology Stack
                        </li>
                        <li className="link-list links" onClick={()=>scrollToId('custom-runtime')}>
                        Preparing the LLaMA Model in Custom Runtime
                        </li>
                        <li className="link-list links" onClick={()=>scrollToId('testing-docker')}>
                        Testing the Docker Container
                        </li>
                        <li className="link-list links" onClick={()=>scrollToId('llama-model')}>
                        Deploying the LLaMA Model on Vipas.AI
                        </li>
                        <li className="link-list links" onClick={()=>scrollToId('test-model-deployment')}>
                        Testing the Deployment of Your Model
                        </li>
                        <li className="link-list links" onClick={()=>scrollToId('vipas-advanatges')}>
                        Advantages of Using Vipas.AI
                        </li>
                        <li className="link-list links" onClick={()=>scrollToId('reference')}>
                        References
                        </li> 

                    </ul>

                </div>
            </div>
        </>
    );
}

export default DeployingLlamaUsingMLflow;
