{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Multilabel Classification Problem" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import zipfile\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import requests\n", "from aisee import DatasetFromDataFrame, Trainer, VisionClassifier\n", "from PIL import Image" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get the data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this section of code, we are **downloading and extracting a dataset of poster images** from a .zip file hosted on **Hugging Face**. \n", "\n", "First, the current working directory of the notebook is obtained and stored in the variable `notebook_dir`. \n", "\n", "Next, the URL of the .zip file is defined and stored in the `url` variable. \n", "\n", "The dataset is then downloaded using the `requests` library, and the content of the response is written to a file named `data.zip` in the current working directory. \n", "\n", "Finally, the `zipfile` library is utilized to extract the contents of the downloaded .zip file into the current working directory, after which the .zip file is closed." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Get the current working directory of the notebook\n", "notebook_dir = os.getcwd()\n", "\n", "# Define the URL to download the dataset\n", "url = 'https://huggingface.co/IIC/aisee_resources/resolve/main/poster.zip'\n", "\n", "response = requests.get(url)\n", "\n", "# Download the dataset from the URL and save it as \"data.zip\" in the current working directory\n", "with open('data.zip', 'wb') as file:\n", " file.write(response.content)\n", "\n", "# Extract the contents of the downloaded zip file into the current working directory\n", "archivo_zip = zipfile.ZipFile(os.path.join(notebook_dir, 'data.zip'))\n", "archivo_zip.extractall(os.path.join(notebook_dir))\n", "archivo_zip.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we have a folder called **multi label daset** with a subfolder with all the images and a .csv file with the label information.\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Id | \n", "Genre | \n", "Action | \n", "Adventure | \n", "Animation | \n", "Biography | \n", "Comedy | \n", "Crime | \n", "Documentary | \n", "Drama | \n", "... | \n", "N/A | \n", "News | \n", "Reality-TV | \n", "Romance | \n", "Sci-Fi | \n", "Short | \n", "Sport | \n", "Thriller | \n", "War | \n", "Western | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "tt0086425 | \n", "['Comedy', 'Drama'] | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
1 | \n", "tt0085549 | \n", "['Drama', 'Romance', 'Music'] | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
2 | \n", "tt0086465 | \n", "['Comedy'] | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
3 | \n", "tt0086567 | \n", "['Sci-Fi', 'Thriller'] | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
4 | \n", "tt0086034 | \n", "['Action', 'Adventure', 'Thriller'] | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
5 rows × 27 columns
\n", "\n", " | Action | \n", "Adventure | \n", "Animation | \n", "Biography | \n", "Comedy | \n", "Crime | \n", "Documentary | \n", "Drama | \n", "Family | \n", "Fantasy | \n", "... | \n", "Reality-TV | \n", "Romance | \n", "Sci-Fi | \n", "Short | \n", "Sport | \n", "Thriller | \n", "War | \n", "Western | \n", "path | \n", "fold | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "multi_label_dataset/Images/tt0086425.jpg | \n", "train | \n", "
1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "multi_label_dataset/Images/tt0085549.jpg | \n", "train | \n", "
2 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "multi_label_dataset/Images/tt0086465.jpg | \n", "train | \n", "
3 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "multi_label_dataset/Images/tt0086567.jpg | \n", "test | \n", "
4 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "multi_label_dataset/Images/tt0086034.jpg | \n", "test | \n", "
5 rows × 27 columns
\n", "