id query code labels metrics f1 trajectory
184 681 "Using the information from the audio file '1284-1180-0003.flac', can you answer this: what place did the person from the audio wish to explore?"

GT

output0 = automatic_speech_recognition(audio="1284-1180-0003.flac")
output1 = question_answering(text=output0['text'], question="what place did the person from the audio wish to explore?")

PRED

from mnm.tool_api import automatic_speech_recognition, question_answering
audio_file_path = '/root/input_data/1284-1180-0003.flac'
transcription = automatic_speech_recognition(audio=audio_file_path)
transcribed_text = transcription['text']
question = "What place did the person from the audio wish to explore?"
answer = question_answering(question=question, text=transcribed_text)

GT

automatic_speech_recognition
question_answering

PRED

automatic_speech_recognition
question_answering

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
188 2183 "I have an image COCO_train2014_000000256855.jpg. Can you help me understand the text presented in the image, find related information on Wikipedia and determine the overall sentiment conveyed?"

GT

output0 = optical_character_recognition(image="COCO_train2014_000000256855.jpg")
output1 = wikipedia_simple_search(text=output0['text'])
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import optical_character_recognition, wikipedia_simple_search, text_classification
output0 = optical_character_recognition('/root/input_data/COCO_train2014_000000256855.jpg')
output1 = wikipedia_simple_search(output0['text'])
output2 = text_classification(output1['text'])

GT

optical_character_recognition
wikipedia_simple_search
text_classification

PRED

optical_character_recognition
wikipedia_simple_search
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
191 4356 You are an educator trying to create compelling lesson plans around a recently trending topic of Monkeypox. You are interested in pulling information about Monkeypox, creating a storyline around the information, to make the lessons more engaging. Additionally, you want to determine if the text is leaning towards positive or negative narrative depending on the severity of Monkeypox.

GT

output0 = wikipedia_simple_search(text="What is monkeypox")
output1 = text_generation(text=f"create a compelling storyline about Monkeypox based on {output0['text']}")
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import wikipedia_simple_search, text_generation, text_classification
output0 = wikipedia_simple_search("Monkeypox")
output1 = text_generation(output0['text'])
output2 = text_classification(output0['text'])

GT

wikipedia_simple_search
text_generation
text_classification

PRED

wikipedia_simple_search
text_generation
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
9 2216 I have this file named '14021.jpg'. Could you take a look and determine what's inside? After identifying, could you create a fun story relating to whatever object or creature is shown in this image? And then, based on the story, answer the question What adventure did the main character of the story go on?

GT

output0 = image_classification(image="14021.jpg")
output1 = text_generation(text=f"a fun story relating to the object or creature identified in {output0['text']}")
output2 = question_answering(text=output1['text'], question="What adventure did the main character of the story go on?")

PRED

from PIL import Image
from mnm.tool_api import image_classification, text_generation, question_answering

# Load the image using PIL
image_path = '/root/input_data/14021.jpg'
image = Image.open(image_path)

# Classify the image
classification_result = image_classification(image)

# Generate a fun story related to the classified object 'gasmask, respirator, gas helmet'
prompt = "In a post-apocalyptic world, a lone survivor finds an old gasmask. This gasmask isn't ordinary; it has a story to tell. The survivor puts it on and suddenly, "
story_result = text_generation(prompt)

# The question about the main character's adventure
question = "What adventure did the main character of the story go on?"

# Answer the question based on the story text
adventure_result = question_answering(question, story_result['text'])

GT

image_classification
text_generation
question_answering

PRED

image_classification
text_generation
question_answering

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
10 3221 "I'm prepping for a trivia night and I want to blow everyone's mind with some unknown facts about July 20. Can you find out some engaging ones and spin them into a fascinating anecdote for me? After, could you then condense it all down to a brief, snappy version as well?"

GT

output0 = get_date_fact(date="20-Jul")
output1 = text_generation(text=f"an engaging anecdote based on {output0['text']}")
output2 = text_summarization(text=output1['text'])

PRED

from mnm.tool_api import get_date_fact, text_generation, text_summarization
facts_about_july_20 = get_date_fact("July 20")
anecdote = text_generation(f"On July 20, {facts_about_july_20['year']}, a significant event unfolded: {facts_about_july_20['text']}. Let me tell you a story about that day.")
brief_anecdote = text_summarization(anecdote['text'])

GT

get_date_fact
text_generation
text_summarization

PRED

get_date_fact
text_generation
text_summarization

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
11 3299 "Can you tell me something interesting about July 11th, then condense the information into a concise and readable format, and finally, provide a related engaging visual representation?"

GT

output0 = get_date_fact(date="11-Jul")
output1 = text_summarization(text=output0['text'])
output2 = image_generation(text=f"an engaging visual representation related to {output1['text']}")

PRED

from mnm.tool_api import get_date_fact, text_summarization, image_generation
output0 = get_date_fact("July 11")
output1 = text_summarization(output0['text'])
output2 = image_generation(output1['text'])

GT

get_date_fact
text_summarization
image_generation

PRED

get_date_fact
text_summarization
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
13 1837 I have an image 2336649.jpg. I need to understand the key points of this scene in a more condensed form first and then I need a continuation of the scene's narrative. Could you please analyze the image, provide a summary of what's happening, and then generate a follow-up scenario?

GT

output0 = image_captioning(image="2336649.jpg")
output1 = text_generation(text=f"Summarize the text: {output0['text']}")
output2 = text_generation(text=f"a narrative continuation of {output1['text']}")

PRED

from mnm.tool_api import image_captioning, text_generation
from PIL import Image
import io

# Load the image and convert it to a PIL image
image_path = '/root/input_data/2336649.jpg'
with open(image_path, 'rb') as image_file:
image_data = image_file.read()
image = Image.open(io.BytesIO(image_data))

# Generate a caption for the image
output0 = image_captioning(image)

# Use the summary from the image captioning as the prompt for text generation
summary_text = output0['text']

# Generate a continuation of the scene's narrative
output1 = text_generation(summary_text)

GT

image_captioning
text_generation
text_generation

PRED

image_captioning
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
15 754 "I would like to find out an interesting fact about May 7th and then analyze the sentiment of the fact presented. Is the fact generally positive or negative?"

GT

output0 = get_date_fact(date="7-May")
output1 = text_classification(text=output0['text'])

PRED

from mnm.tool_api import get_date_fact, text_classification
fact_result = get_date_fact("05/07")
sentiment_result = text_classification(fact_result['text'])

GT

get_date_fact
text_classification

PRED

get_date_fact
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
176 1089 I'm working on a project based on a 'School programme aimed at creating a generation of high-tech entrepreneurs.' Need to create a brochure cover for it. Can we generate a write-up that captures the spirit of the program, then create a vibrant image to represent it? After that, can we edit the image - add a group of diverse young people working together on a high-tech project please, to personify the target group?

GT

output0 = text_generation(text="a brochure cover write-up that captures the spirit of the School programme aimed at creating a generation of high-tech entrepreneurs.")
output1 = image_generation(text=f"a vibrant image to represent {output0['text']}")
output2 = image_editing(image=output1['image'], prompt="add a group of diverse young people working together on a high-tech project")

PRED

from mnm.tool_api import text_generation, image_generation, image_editing
output0 = text_generation(text="Create a generation of high-tech entrepreneurs through a dedicated school programme that fosters innovation, collaboration, and technological expertise among young students.")
output1 = image_generation(text=output0['text'])
output2 = image_editing(image=output1['image'], prompt="Add a group of diverse young people working together on a high-tech project to the image.")

GT

text_generation
image_generation
image_editing

PRED

text_generation
image_generation
image_editing

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
177 307 I'm creating a presentation slide on gender equality at workplaces. I require an image depicting a "Women worker looking at rear view mirror smiling". Once the image is ready, I need an accurate and succinct description of the image that would make the presentation more informative and easily understandable to the audience.

GT

output0 = image_generation(text="an image depicting a woman worker looking at rear view mirror smiling")
output1 = image_captioning(image=output0['image'])

PRED

from mnm.tool_api import image_generation, image_captioning
output0 = image_generation(text='Women worker looking at rear view mirror smiling')
output1 = image_captioning(image=output0['image'])

GT

image_generation
image_captioning

PRED

image_generation
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
178 3533 "I'd like to visualize an event from the year 2014. Could you find an interesting fact from this year and then provide an image representation of it? Afterwards, can you identify what the primary subject of that image is?"

GT

output0 = get_year_fact(year="2014")
output1 = image_generation(text=f"an image representation of {output0['text']}")
output2 = object_detection(image=output1['image'])

PRED

from mnm.tool_api import get_year_fact, image_generation, object_detection
year_fact = get_year_fact("2014")
generated_image = image_generation(year_fact['text'])
detected_objects = object_detection(generated_image['image'])

GT

get_year_fact
image_generation
object_detection

PRED

get_year_fact
image_generation
object_detection

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
179 182 "I recently watched this amazing movie called 'Moonlight' that was released in 2016. I want to know more about its genre and who directed it. Can you help me find that information?"

GT

output0 = search_movie(movie_title="Moonlight", movie_year="2016")

PRED

from mnm.tool_api import search_movie
movie_info = search_movie(movie_title="Moonlight", movie_year="2016")

GT

search_movie

PRED

search_movie

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
181 4037 "Hey, I recently watched Twilight and wondered what's the compatibility between Edward Cullen and Bella Swan in a love relationship. Also, entertain me with an intriguing trivia fact related to their love score. Then, could you analyze the sentiment of the fact you just shared?"

GT

output0 = love_calculator(first_name="Edward Cullen", second_name="Bella Swan")
output1 = get_trivia_fact(number=output0['number'])
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import love_calculator, get_trivia_fact, text_classification
output0 = love_calculator('Edward Cullen', 'Bella Swan')
output1 = get_trivia_fact(output0['number'])
output2 = text_classification(output1['text'])

GT

love_calculator
get_trivia_fact
text_classification

PRED

love_calculator
get_trivia_fact
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
182 3222 "I'm working on a historical fiction novel set around July 20. Could you find some amazing real-life occurrences that happened on this date and then generate a vivid, elaborate narrative based on those facts? Afterwards, could you create a concise summary of the narrative that I could use as an outline?"

GT

output0 = get_date_fact(date="20-Jul")
output1 = text_generation(text=f"a vivid, elaborate narrative based on {output0['text']}")
output2 = text_summarization(text=output1['text'])

PRED

from mnm.tool_api import get_date_fact, text_generation, text_summarization
output0 = get_date_fact(date="July 20")
output1 = text_generation(text=output0['text'])
output2 = text_summarization(text=output1['text'])

GT

get_date_fact
text_generation
text_summarization

PRED

get_date_fact
text_generation
text_summarization

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
16 629 "Could you analyze image 2353266.jpg and provide me with the total number of sections or distinctive parts that make up the image?"

GT

output0 = image_segmentation(image="2353266.jpg")
output1 = count(objects=output0['objects'])

PRED

from mnm.tool_api import image_segmentation, count
segmentation_result = image_segmentation(image_path='/root/input_data/2353266.jpg')
number_of_sections = count(segmentation_result['objects'])

GT

image_segmentation
count

PRED

image_segmentation
count

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
18 3293 "I'm curious about the historical significance of May 4th. Could you find out an interesting fact about this date, provide a summarized version of it, and then use that information to find a related page on Wikipedia? I'd appreciate the most critical details."

GT

output0 = get_date_fact(date="4-May")
output1 = text_summarization(text=output0['text'])
output2 = wikipedia_simple_search(text=output1['text'])

PRED

from mnm.tool_api import get_date_fact, text_summarization, wikipedia_simple_search
output0 = get_date_fact("May 4")
output1 = text_summarization(output0['text'])
output2 = wikipedia_simple_search(output1['text'])

GT

get_date_fact
text_summarization
wikipedia_simple_search

PRED

get_date_fact
text_summarization
wikipedia_simple_search

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
19 1686 "For a digital art project, I need an image of a woman wearing glasses posing behind her birthday cake. Could you produce such an image, focus on the upper portion to capture her expression, and also provide me with the positions of the objects that are detected in the cropped image?"

GT

output0 = image_generation(text="An image focused on the upper portion of a woman wearing glasses posing behind her birthday cake.")
output1 = image_crop_top(image=output0['image'])
output2 = object_detection(image=output1['image'])

PRED

from mnm.tool_api import image_generation, image_crop_top, object_detection
output0 = image_generation(text="a woman wearing glasses posing behind her birthday cake")
output1 = image_crop_top(image=output0['image'])
output2 = object_detection(image=output1['image'])

GT

image_generation
image_crop_top
object_detection

PRED

image_generation
image_crop_top
object_detection

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
20 3826 I am writing a novel which involves the significance of numbers, specifically the number 2. Can you compile an enlightening bite-sized fact about the number 2 and then generate a textual elaboration about it that I can weave into my storyline?

GT

output0 = get_trivia_fact(number="2")
output1 = text_generation(text=f"Summarize the text: {output0['text']}")
output2 = text_generation(text=f"an enlightening and detailed elaboration on {output1['text']}")

PRED

from mnm.tool_api import get_trivia_fact, text_generation
fact_about_number_2 = get_trivia_fact("2")
elaboration_about_number_2 = text_generation(fact_about_number_2['text'])

GT

get_trivia_fact
text_generation
text_generation

PRED

get_trivia_fact
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
21 1131 As we're navigating through the holiday season, could you please use the prompt 'Poll: How are you preparing for Christmas?' to generate a text that outlines common Christmas preparations? After that, I'd love if you could visually depict these preparations as an image. Lastly, could you specifically segment the image so I can clearly see the different parts of the Christmas preparations - such as decoration, meal preparation, gift wrapping, etc.?

GT

output0 = text_generation(text="a text that outlines common Christmas preparations based on the prompt \"Poll: How are you preparing for Christmas?\"")
output1 = image_generation(text=f"a visual depiction of {output0['text']}")
output2 = image_segmentation(image=output1['image'])

PRED

from mnm.tool_api import text_generation, image_generation, image_segmentation
output0 = text_generation(text='Poll: How are you preparing for Christmas?')
output1 = image_generation(text='Decorating the house for Christmas with lights and a tree')
output2 = image_segmentation(image=output1['image'])

GT

text_generation
image_generation
image_segmentation

PRED

text_generation
image_generation
image_segmentation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
22 765 "I'm curious to see a visual representation of an interesting historical fact that took place on the 19th of October. Can you help me with this?"

GT

output0 = get_date_fact(date="19-Oct")
output1 = image_generation(text=f"a visual representation of {output0['text']}")

PRED

from mnm.tool_api import get_date_fact, image_generation
output0 = get_date_fact("October 19")
output1 = image_generation(text=output0['text'])

GT

get_date_fact
image_generation

PRED

get_date_fact
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
23 14 "I've got this review for a movie: 'it is amusing , and that 's all it needs to be . '. Could you analyze this review and let me know if the movie sentiment is positive or negative?"

GT

output0 = text_classification(text="it is amusing , and that 's all it needs to be . ")

PRED

from mnm.tool_api import text_classification
sentiment_result = text_classification("it is amusing , and that 's all it needs to be .")

GT

text_classification

PRED

text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
24 2531 "For social media marketing, could you manipulate the 111376-input.png to depict the cow wearing a hat, then retain only the left part of this modified image to showcase the full appearance of the cow with the hat, and then generate a catchy caption that would attract viewers' interest?"

GT

output0 = image_editing(image="111376-input.png", prompt="Have the cow wear a hat.")
output1 = image_crop_left(image=output0['image'])
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import image_editing, image_crop_left, image_captioning
output0 = image_editing(image='/root/input_data/111376-input.png', prompt="a cow wearing a hat")
output1 = image_crop_left(image=output0['image'])
output2 = image_captioning(image=output1['image'])

GT

image_editing
image_crop_left
image_captioning

PRED

image_editing
image_crop_left
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
25 4070 I'm writing a review for a movie, 'CODA', that was released in 2021. Could you pull up the necessary details about this movie, generate a comprehensive review using those details, and then condense it to a brief, critical overview that I could use for my blog post?

GT

output0 = search_movie(movie_title="CODA", movie_year="2021")
output1 = text_generation(text=f"a comprehensive review of the movie, CODA, based on {output0['text']}")
output2 = text_generation(text=f"Summarize the text: {output1['text']}")

PRED

from mnm.tool_api import search_movie, text_generation
output0 = search_movie(movie_title='CODA', movie_year='2021')
output1 = text_generation(text=f"Write a comprehensive review for the movie titled 'CODA', released in 2021. Here are some details: {output0['text']}")
output2 = text_generation(text=f"Condense the following comprehensive review into a brief, critical overview suitable for a blog post: {output1['text']}")

GT

search_movie
text_generation
text_generation

PRED

search_movie
text_generation
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
27 3443 "I am planning to write a blog post about the significant events anticipated in the year 2023. Could you help me by first sharing some interesting historical facts for the year, then come up with a captivating introduction for my post based on those facts? Additionally, I'd need a creative illustration that matches the introduction to grab the reader's attention."

GT

output0 = get_year_fact(year="2023")
output1 = text_generation(text=f"a captivating introduction for a blog post based on {output0['text']}")
output2 = image_generation(text=f"a creative illustration that matches {output1['text']}")

PRED

from mnm.tool_api import get_year_fact, text_generation, image_generation
year_facts = get_year_fact('2023')
intro_prompt = "In 2023, a remarkable engineering feat was accomplished as the Istanbul Canal reached completion. This event not only reshapes the geography but also marks a significant milestone in the annals of history. Let's delve into the year 2023, a year of innovation and progress, and explore the significant events that are anticipated to unfold."
introduction = text_generation(intro_prompt)
illustration = image_generation(intro_prompt)

GT

get_year_fact
text_generation
image_generation

PRED

get_year_fact
text_generation
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
28 2203 I have an image, specifically '11665.jpg' and I want to create a story from it. After determining the main subject of the photo, I would like to generate a comprehensive narrative, then classify it to see if the overall sentiment of the story is positive or negative.

GT

output0 = image_captioning(image="11665.jpg")
output1 = text_generation(text=f"a comprehensive narrative based on {output0['text']}")
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import image_captioning, text_generation, text_classification
caption_result = image_captioning(image='/root/input_data/11665.jpg')
narrative_result = text_generation(text=caption_result['text'])
sentiment_result = text_classification(text=narrative_result['text'])

GT

image_captioning
text_generation
text_classification

PRED

image_captioning
text_generation
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
29 219 Based on the phrase "You've got to ask yourself one question: Do I feel lucky?" could you generate a possible dialogue or continuation that could come naturally after this? And then, could you give me a short summary of what the generated text primarily conveys?

GT

output0 = text_generation(text="a possible dialogue or continuation that could come naturally after \"You've got to ask yourself one question: Do I feel lucky?\"")
output1 = text_summarization(text=output0['text'])

PRED

from mnm.tool_api import text_generation, text_summarization
generated_dialogue = text_generation(text="You've got to ask yourself one question: Do I feel lucky?")
summary = text_summarization(text=generated_dialogue['text'])

GT

text_generation
text_summarization

PRED

text_generation
text_summarization

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
30 28 "I am currently studying plant biology and came across a passage that mentions different types of chloroplasts and something called a 'plastid.' The passage reads: 'These chloroplasts, which can be traced back directly to a cyanobacterial ancestor, are known as primary plastids ("plastid" in this context means almost the same thing as chloroplast). All primary chloroplasts belong to one of three chloroplast lineages—the glaucophyte chloroplast lineage, the rhodophyte, or red algal chloroplast lineage, or the chloroplastidan, or green chloroplast lineage. The second two are the largest, and the green chloroplast lineage is the one that contains the land plants.' Can you help out and tell me 'What does 'plastid' mean?' based on this passage?"

GT

output0 = question_answering(text="These chloroplasts, which can be traced back directly to a cyanobacterial ancestor, are known as primary plastids (\"plastid\" in this context means almost the same thing as chloroplast). All primary chloroplasts belong to one of three chloroplast lineages—the glaucophyte chloroplast lineage, the rhodophyte, or red algal chloroplast lineage, or the chloroplastidan, or green chloroplast lineage. The second two are the largest, and the green chloroplast lineage is the one that contains the land plants.", question="What does 'plastid' mean?")

PRED

from mnm.tool_api import question_answering
output0 = question_answering(question="What does 'plastid' mean?", text="These chloroplasts, which can be traced back directly to a cyanobacterial ancestor, are known as primary plastids ('plastid' in this context means almost the same thing as chloroplast). All primary chloroplasts belong to one of three chloroplast lineages—the glaucophyte chloroplast lineage, the rhodophyte, or red algal chloroplast lineage, or the chloroplastidan, or green chloroplast lineage. The second two are the largest, and the green chloroplast lineage is the one that contains the land plants.")

GT

question_answering

PRED

question_answering

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
31 1564 I am designing a game and want to create an environment based on 'a bathroom with a sink and television in it'. I need to generate this scene and specifically focus on the left side of the image, to extract any text possibly appearing there - such as signboards, mirrors with text imprint, tv screen text etc, for further translation into different languages in my game.

GT

output0 = image_generation(text="a game environment based on a bathroom with a sink and television in it with a specific focus on the left side")
output1 = image_crop_left(image=output0['image'])
output2 = optical_character_recognition(image=output1['image'])

PRED

from mnm.tool_api import image_generation, image_crop_left, optical_character_recognition
output0 = image_generation("a bathroom with a sink and television in it")
output1 = image_crop_left(output0['image'])
output2 = optical_character_recognition(output1['image'])

GT

image_generation
image_crop_left
optical_character_recognition

PRED

image_generation
image_crop_left
optical_character_recognition

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
32 3913 "I'm working on a classroom project about the number 3. Could you help me by providing an interesting fact about the number 3 and then creating an image that visually symbolizes that fact? After it is created, I would like for the image to be broken down into its segmented components for a more thorough analysis."

GT

output0 = get_trivia_fact(number="3")
output1 = image_generation(text=f"an image that visually symbolizes the interesting fact about the number 3 from {output0['text']}")
output2 = image_segmentation(image=output1['image'])

PRED

from mnm.tool_api import get_trivia_fact, image_generation, image_segmentation
fact_about_number_3 = get_trivia_fact("3")
generated_image = image_generation(fact_about_number_3['text'])
segmented_components = image_segmentation(generated_image['image'])

GT

get_trivia_fact
image_generation
image_segmentation

PRED

get_trivia_fact
image_generation
image_segmentation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
33 2683 "I have a picture identified as '270650-input.png'. I wish to add skis to the wheel of the vehicle in the photo. After modifying the image, I would like to obtain the lower half for use in a promotional material. Please also provide a suitable caption that can give context to the modified image."

GT

output0 = image_editing(image="270650-input.png", prompt="put skis on the wheel")
output1 = image_crop_bottom(image=output0['image'])
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import image_editing, image_crop_bottom, image_captioning
edited_image_result = image_editing(image='/root/input_data/270650-input.png', prompt='Add skis to the wheel of the vehicle in the photo.')
cropped_image_result = image_crop_bottom(image=edited_image_result['image'])
caption_result = image_captioning(image=cropped_image_result['image'])

GT

image_editing
image_crop_bottom
image_captioning

PRED

image_editing
image_crop_bottom
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
34 493 "With the specified file titled '12633.jpg', I'd like you to determine what the image is of. After that, could you generate a new stylized image of the same subject to use in my upcoming presentation?"

GT

output0 = image_classification(image="12633.jpg")
output1 = image_generation(text=f"a new stylized image of {output0['text']} for an upcoming presentation")

PRED

from mnm.tool_api import image_classification, image_generation
classification_result = image_classification(image='/root/input_data/12633.jpg')
stylized_image_result = image_generation(text=classification_result['text'])

GT

image_classification
image_generation

PRED

image_classification
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
35 150 "I'm preparing for a trivia night and need to impress my friends. Could you provide me with an interesting fact about the number 8?"

GT

output0 = get_trivia_fact(number="8")

PRED

from mnm.tool_api import get_trivia_fact
fact_about_number_8 = get_trivia_fact("8")

GT

get_trivia_fact

PRED

get_trivia_fact

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
36 380 "Hey, I need to create an invitation card for my friend. It should feature an image of a woman playing with a video game controller. Once you've got that, could you please transcribe the text from the generated image to help me build the message for the invitation?"

GT

output0 = image_generation(text="An image of a woman playing with a video game controller for an invitation card.")
output1 = optical_character_recognition(image=output0['image'])

PRED

from mnm.tool_api import image_generation, optical_character_recognition
output0 = image_generation(text="a woman playing with a video game controller")
output1 = optical_character_recognition(image=output0['image'])

GT

image_generation
optical_character_recognition

PRED

image_generation
optical_character_recognition

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
37 3694 "My son is turning seven and I want to make a unique birthday card for him. Get me a math fact focused on the number 7. Then, let's make a fitting image using this fact, something that screams birthday fun and numbers! Finally, modify the image by overlaying a big, bold 'Happy 7th Birthday!' text at the top. Use Overlay big, bold, and colourful 'Happy 7th Birthday!' text at the top of the image. for the image modification instructions."

GT

output0 = get_math_fact(number="7")
output1 = image_generation(text=f"a fun and festive birthday image using {output0['text']}")
output2 = image_editing(image=output1['image'], prompt="Overlay big, bold, and colourful 'Happy 7th Birthday!' text at the top of the image.")

PRED

from mnm.tool_api import get_math_fact, image_generation, image_editing
math_fact = get_math_fact('7')
description = "A birthday celebration with balloons, cake, and the number 7, which is the only dimension, besides the familiar 3, in which a vector cross product can be defined."
birthday_image = image_generation(description)
overlay_prompt = "Overlay big, bold, and colourful 'Happy 7th Birthday!' text at the top of the image."
final_image = image_editing(birthday_image['image'], overlay_prompt)

GT

get_math_fact
image_generation
image_editing

PRED

get_math_fact
image_generation
image_editing

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
38 650 I have an audio file titled '237-134500-0000.flac'. Could you transcribe the audio and then generate a story that could naturally follow this conversation?

GT

output0 = automatic_speech_recognition(audio="237-134500-0000.flac")
output1 = text_generation(text=f"a story that could naturally follow the conversation in {output0['text']}")

PRED

from mnm.tool_api import automatic_speech_recognition, text_generation
transcription = automatic_speech_recognition(audio='/root/input_data/237-134500-0000.flac')
story = text_generation(text=transcription['text'])

GT

automatic_speech_recognition
text_generation

PRED

automatic_speech_recognition
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
39 590 "I have a photo stored as '270650-input.png'. I need to create a picture where there are skis on the wheel. Once you make that, just keep the bottom part of the photo, where the wheel is located."

GT

output0 = image_editing(image="270650-input.png", prompt="put skis on the wheel")
output1 = image_crop_bottom(image=output0['image'])

PRED

from mnm.tool_api import image_editing, image_crop_bottom
edited_image_result = image_editing(image='/root/input_data/270650-input.png', prompt="Add skis on the wheel.")
cropped_image_result = image_crop_bottom(image=edited_image_result['image'])

GT

image_editing
image_crop_bottom

PRED

image_editing
image_crop_bottom

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
40 4098 I am looking for information on a movie titled "Everything Everywhere All at Once" released in 2022. Following this, could you create a text description of an imaginary sequel based on the initial movie's info? Also, I'd love if you could generate an accompanying promotional poster design for this hypothetical sequel.

GT

output0 = search_movie(movie_title="Everything Everywhere All at Once", movie_year="2022")
output1 = text_generation(text=f"a text description of an imaginary sequel based on {output0['text']}")
output2 = image_generation(text=f"a promotional poster design for the hypothetical sequel described in {output1['text']}")

PRED

from mnm.tool_api import search_movie, text_generation, image_generation
output0 = search_movie(movie_title="Everything Everywhere All at Once", movie_year="2022")
output1 = text_generation(text=output0['text'])
output2 = image_generation(text=output1['text'])

GT

search_movie
text_generation
image_generation

PRED

search_movie
text_generation
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
41 82 "Could you analyze the image 1592452.jpg and let me know which different objects are contained within it? I am trying to identify key elements in this night time cityscape."

GT

output0 = object_detection(image="1592452.jpg")

PRED

from PIL import Image
from mnm.tool_api import object_detection
image = Image.open('/root/input_data/1592452.jpg')
output0 = object_detection(image)

GT

object_detection

PRED

object_detection

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
44 3337 "I'm having a special event on May 4th and I want to make a unique visual connection with it. Can you tell me an interesting historical event that occurred on this date, create a visual representation of it, and then confirm what the main subject of the image is?"

GT

output0 = get_date_fact(date="4-May")
output1 = image_generation(text=f"a unique visual representation of {output0['text']}")
output2 = image_classification(image=output1['image'])

PRED

from mnm.tool_api import get_date_fact, image_generation, image_classification
output0 = get_date_fact("May 4th")
output1 = image_generation(output0['text'])
output2 = image_classification(output1['image'])

GT

get_date_fact
image_generation
image_classification

PRED

get_date_fact
image_generation
image_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
168 3596 "I'm working on a 2020 year-in-review project. Can you give me a fun fact about the year 2020, create a striking representation of that fact as an image, and then transcribe any text present in the image?"

GT

output0 = get_year_fact(year="2020")
output1 = image_generation(text=f"a striking representation of {output0['text']}")
output2 = optical_character_recognition(image=output1['image'])

PRED

from mnm.tool_api import get_year_fact, image_generation, optical_character_recognition
output0 = get_year_fact('2020')
output1 = image_generation(output0['text'])
output2 = optical_character_recognition(output1['image'])

GT

get_year_fact
image_generation
optical_character_recognition

PRED

get_year_fact
image_generation
optical_character_recognition

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
170 4095 I just finished watching '12 Years a Slave' from 2013 and I'm curious to dig a little deeper into it. Could you help me find more details about this film like the genre and who directed it? Then, could you compose a brief summary based on this information? To top it off, I want to see an intriguing image representation inspired by the summary you provided.

GT

output0 = search_movie(movie_title="12 Years a Slave", movie_year="2013")
output1 = text_generation(text=f"a brief summary based on the detailed information of the film from {output0['text']}")
output2 = image_generation(text=f"an intriguing image representation inspired by the summary from {output1['text']}")

PRED

from mnm.tool_api import search_movie, text_generation, image_generation
output0 = search_movie(movie_title='12 Years a Slave', movie_year='2013')
output1 = text_generation(text=output0['text'])
output2 = image_generation(text=output1['text'])

GT

search_movie
text_generation
image_generation

PRED

search_movie
text_generation
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
171 196 "Can you tell me what the weather forecast is for the coordinates 17.14 longitude and 32.81 latitude? I am planning my week and need to know if it's best to carry an umbrella or don a sunhat."

GT

output0 = get_weather(lon="17.14", lat="32.81")

PRED

from mnm.tool_api import get_weather
weather_forecast = get_weather(lon="17.14", lat="32.81")

GT

get_weather

PRED

get_weather

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
173 674 "I have a lengthy recording, 2300-131720-0000.flac. Can you help me provide a short and concise summary of what was discussed, so I can easily review the main points?"

GT

output0 = automatic_speech_recognition(audio="2300-131720-0000.flac")
output1 = text_summarization(text=output0['text'])

PRED

from mnm.tool_api import automatic_speech_recognition, text_summarization
transcription = automatic_speech_recognition(audio='/root/input_data/2300-131720-0000.flac')
summary = text_summarization(text=transcription['text'])
summary_text = summary['text']

GT

automatic_speech_recognition
text_summarization

PRED

automatic_speech_recognition
text_summarization

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
174 2192 Suppose I have '17464.jpg' and I want to create a social media post about what is in the image, but I'm not entirely sure what to write. Could you analyze the image, generate a captivating text based on it, and provide a succinct version that would suit as a caption?

GT

output0 = image_captioning(image="17464.jpg")
output1 = text_generation(text=f"a captivating text about {output0['text']}")
output2 = text_summarization(text=output1['text'])

PRED

from mnm.tool_api import image_captioning, text_generation, text_summarization
output0 = image_captioning(image='/root/input_data/17464.jpg')
output1 = text_generation(text=output0['text'])
output2 = text_summarization(text=output1['text'])

GT

image_captioning
text_generation
text_summarization

PRED

image_captioning
text_generation
text_summarization

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
175 4096 I'm trying to remember more about the movie '12 Years a Slave' that I watched in 2013. Could you provide me with a few key details like director and genre? After that, I'd love if you could generate a text that highlights the main plot twists, essentially creating a retrospective of the story. For a visual aid, could you create an image that evokes the mood and elements from this retrospective text?

GT

output0 = search_movie(movie_title="12 Years a Slave", movie_year="2013")
output1 = text_generation(text=f"a retrospective highlighting the main plot twists of {output0['text']}")
output2 = image_generation(text=f"an image that evokes the mood and elements from {output1['text']}")

PRED

from mnm.tool_api import search_movie, text_generation, image_generation
output0 = search_movie(movie_title='12 Years a Slave', movie_year='2013')
output1 = text_generation(text="Retrospective of the movie '12 Years a Slave':")
output2 = image_generation(text=output1['text'])

GT

search_movie
text_generation
image_generation

PRED

search_movie
text_generation
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
48 136 "I'm preparing for a 2023-themed trivia night. Can you find any interesting or notable facts about the year 2023 to feature in the game?"

GT

output0 = get_year_fact(year="2023")

PRED

from mnm.tool_api import get_year_fact
output0 = get_year_fact("2023")

GT

get_year_fact

PRED

get_year_fact

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
51 1871 "Hey, can you help me understand what's happening in this image 2350021.jpg by describing it? Also, could you condense this description into a crisp summary and finally, could you find the most relevant Wikipedia page for this context?"

GT

output0 = image_captioning(image="2350021.jpg")
output1 = text_summarization(text=output0['text'])
output2 = wikipedia_simple_search(text=output1['text'])

PRED

from PIL import Image
from mnm.tool_api import image_captioning, text_summarization, wikipedia_simple_search

# Load the image and generate a description
image = Image.open('/root/input_data/2350021.jpg')
description = image_captioning(image)

# Summarize the description
summary = text_summarization(description['text'])

# Find the most relevant Wikipedia page for the context
wikipedia_result = wikipedia_simple_search(summary['text'])

GT

image_captioning
text_summarization
wikipedia_simple_search

PRED

image_captioning
text_summarization
wikipedia_simple_search

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
53 4150 I'm about to start an essay on the 2012 movie 'Argo' but can't recall all the details. Can you bring up the key information, condense the plotline into a crisp summary, and create a representative image that illustrates the film's essence?

GT

output0 = search_movie(movie_title="Argo", movie_year="2012")
output1 = text_summarization(text=output0['text'])
output2 = image_generation(text=f"a representative image that illustrates the essence of {output1['text']}")

PRED

from mnm.tool_api import search_movie, text_summarization, image_generation
output0 = search_movie(movie_title="Argo", movie_year="2012")
output1 = text_summarization(text=output0['text'].split('Plot: ')[1].strip())
output2 = image_generation(text=output1['text'])

GT

search_movie
text_summarization
image_generation

PRED

search_movie
text_summarization
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
160 2771 I have a quirky image 2382602.jpg. I want to make it a bit funnier. Could you please find the motorcycle in the image and replace it with a goblin emoji?

GT

output0 = object_detection(image="2382602.jpg")
output1 = select_object(objects=output0['objects'], object_name="motorcycle")
output2 = emoji(image="2382602.jpg", object=output1['object'], emoji="goblin")

PRED

from mnm.tool_api import object_detection, select_object, emoji
detection_result = object_detection(image='/root/input_data/2382602.jpg')
motorcycle_object = select_object(objects=detection_result['objects'], object_name='motorcycle')
result_image = emoji(image='/root/input_data/2382602.jpg', object=motorcycle_object['object'], emoji='goblin')

GT

object_detection
select_object
emoji

PRED

object_detection
select_object
emoji

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
163 4076 "Can you get me some information about the movie 'Argo' released in 2012? After obtaining it, use that to create a convincing movie review as if you personally watched it. Then, analyze the tone of that review and let me know if it's essentially positive or negative in sentiment?"

GT

output0 = search_movie(movie_title="Argo", movie_year="2012")
output1 = text_generation(text=f"a convincing movie review based on {output0['text']} as if you personally watched it")
output2 = text_generation(text=output1['text'])

PRED

from mnm.tool_api import search_movie, text_generation
movie_info = search_movie(movie_title='Argo', movie_year='2012')
movie_review = text_generation(text=f"I just watched {movie_info['text']} It was an amazing experience!")

GT

search_movie
text_generation
text_generation

PRED

search_movie
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
164 2410 I'm planning to revamp my home interior, using "417814-input.png" as reference. I want to see how the space would appear if the carpet was replaced with a wooden floor. Also, could you determine the total number of objects present in the image once the modifications are made?

GT

output0 = image_editing(image="417814-input.png", prompt="Let the carpet be changed to wooden floor.")
output1 = object_detection(image=output0['image'])
output2 = count(objects=output1['objects'])

PRED

from PIL import Image
from mnm.tool_api import image_editing, object_detection, count

# Open the image using PIL
image_path = '/root/input_data/417814-input.png'
image = Image.open(image_path)

# Edit the image to replace the carpet with a wooden floor
output0 = image_editing(image, "Replace the carpet with a wooden floor.")

# Detect objects in the edited image
output1 = object_detection(output0['image'])

# Count the total number of objects in the edited image
output2 = count(output1['objects'])

GT

image_editing
object_detection
count

PRED

image_editing
object_detection
count

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
166 94 "I have an image 2385634.jpg. Could you help me analyze the different components of this scene for my blog on waste management? It would be great if I could visually separate the oven from the rest of the image to demonstrate how much space it occupies."

GT

output0 = image_segmentation(image="2385634.jpg")

PRED

from PIL import Image
from mnm.tool_api import image_segmentation
import numpy as np
import os

# Load the image using PIL
image_path = '/root/input_data/2385634.jpg'
image = Image.open(image_path)

# Perform image segmentation to get masks for all objects
segmentation_result = image_segmentation(image)

# Find the segmentation mask for the oven
oven_mask = None
for obj in segmentation_result['objects']:
if obj['label'] == 'oven':
oven_mask = obj['mask']
break

# Create a new image that highlights the oven by masking out the rest
oven_mask_image = np.repeat(oven_mask[:, :, np.newaxis], 3, axis=2)
oven_only_image = np.where(oven_mask_image, np.array(image), 0)

# Convert the oven_only_image numpy array to a PIL image
oven_only_pil_image = Image.fromarray(oven_only_image.astype(np.uint8))

# Define the output path for the oven-only image
output_image_path = '/root/output_data/oven_only_image.jpg'

# Create the output directory if it does not exist
os.makedirs(os.path.dirname(output_image_path), exist_ok=True)

# Save the oven-only image to the output path
oven_only_pil_image.save(output_image_path)

GT

image_segmentation

PRED

image_segmentation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
167 3997 "I'd like to know how romantically compatible William Thacker and Anna Scott are. Based on that, feed me a math fact that can lighten the mood, and then tell me if the fact is intriguing based on its sentiments."

GT

output0 = love_calculator(first_name="William Thacker", second_name="Anna Scott")
output1 = get_math_fact(number=output0['number'])
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import love_calculator, get_math_fact, text_classification
output0 = love_calculator(first_name="William Thacker", second_name="Anna Scott")
output1 = get_math_fact(number="random")
output2 = text_classification(text=output1['text'])

GT

love_calculator
get_math_fact
text_classification

PRED

love_calculator
get_math_fact
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
57 3601 "I'm doing a presentation for my students about the number 3. Can you first provide some fascinating mathematical details about '3', and then, based on those details, create an engaging and comprehensive lesson plan? After that, please condense this lesson plan into a brief, succinct overview for my reference during the presentation."

GT

output0 = get_math_fact(number="3")
output1 = text_generation(text=f"an engaging and comprehensive lesson plan based on mathematical details of {output0['text']}")
output2 = text_generation(text=f"Summarize the text: {output1['text']}")

PRED

from mnm.tool_api import get_math_fact, text_generation
output0 = get_math_fact('3')
output1 = text_generation("Create a comprehensive and engaging lesson plan for students about the number 3. Start with the fact that a triangle is the only figure which, if all endpoints have hinges, will never change its shape unless the sides themselves are bent. Include activities and discussion points that help students explore the significance of the number 3 in mathematics.")

GT

get_math_fact
text_generation
text_generation

PRED

get_math_fact
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
58 3132 "Looking at the photo labelled as image 2375698.jpg, I am curious to know which place it is. Once we identify the location, it would be great if you could create a new image that represents the cultural aspects or landmarks of that city. Could you also produce a captivating caption for the new image that encompasses the essence of the city?"

GT

output0 = visual_question_answering(image="2375698.jpg", question="Which place is it?")
output1 = image_generation(text=f"an image that represents the cultural aspects or landmarks of {output0['text']}")
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import image_captioning, image_generation, visual_question_answering
image_path = '/root/input_data/2375698.jpg'
caption_result = image_captioning(image_path)
question_about_location = "Which city is this train station located in?"
location_result = visual_question_answering(image_path, question_about_location)
generated_image_result = image_generation('Cultural landmarks of London')
new_image_caption_result = image_captioning(generated_image_result['image'])

GT

visual_question_answering
image_generation
image_captioning

PRED

image_captioning
visual_question_answering
image_generation
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
59 3316 "For a school project, I need to illustrate a memorable event that occurred on the 22nd of March. Could you provide me with a fascinating occurrence on that date? Afterwards, could you create an image that accurately portrays this event? For the final touch, please include a small calendar on the top right corner, highlighting the date '22-Mar'."

GT

output0 = get_date_fact(date="22-Mar")
output1 = image_generation(text=f"an image that accurately portrays the fascinating occurrence of {output0['text']}")
output2 = image_editing(image=output1['image'], prompt="include a small calendar on the top right corner, highlighting the date '22-Mar'")

PRED

from mnm.tool_api import get_date_fact, image_generation, image_editing
date_fact = get_date_fact("2023-03-22")
event_image = image_generation(date_fact['text'])
final_image = image_editing(event_image['image'], "Add a small calendar on the top right corner, highlighting the date '22-Mar'.")

GT

get_date_fact
image_generation
image_editing

PRED

get_date_fact
image_generation
image_editing

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
60 1556 "I have a specific layout in my mind for a room interior - 'A hotel room with a bed and a chair and white walls'. Could you help me visualize just the left portion of this image, so I could get a detailed understanding? Also, a textual picture of the same would be greatly helpful."

GT

output0 = image_generation(text="A detailed image of the left portion of a hotel room with a bed and a chair and white walls.")
output1 = image_crop_left(image=output0['image'])
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import image_generation, image_crop_left, image_captioning
output0 = image_generation(text="A hotel room with a bed and a chair and white walls")
output1 = image_crop_left(output0['image'])
output2 = image_captioning(output1['image'])

GT

image_generation
image_crop_left
image_captioning

PRED

image_generation
image_crop_left
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
61 50 "Could you identify and extract the graffiti text from the image 'COCO_train2014_000000028742.jpg'? I would appreciate it if you could directly translate the words written there."

GT

output0 = optical_character_recognition(image="COCO_train2014_000000028742.jpg")

PRED

from mnm.tool_api import optical_character_recognition
output0 = optical_character_recognition(image='/root/input_data/COCO_train2014_000000028742.jpg')

GT

optical_character_recognition

PRED

optical_character_recognition

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
62 2352 I came across an image named '11899.jpg' in an online archive. It really piqued my interest and I wish to know what it depicts. Could you also tell me something about it from a reliable source? Additionally, I'd like to gauge if the general ambiance of the description is positive or negative.

GT

output0 = image_classification(image="11899.jpg")
output1 = wikipedia_simple_search(text=output0['text'])
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import image_classification, wikipedia_simple_search, text_classification
output0 = image_classification(image='/root/input_data/11899.jpg')
output1 = wikipedia_simple_search(text=output0['text'])
output2 = text_classification(text=output1['text'])

GT

image_classification
wikipedia_simple_search
text_classification

PRED

image_classification
wikipedia_simple_search
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
65 2976 "From an audio file named '237-134500-0000.flac', Is it possible to create a visual depiction of the narration and identify the key objects present in the generated image?"

GT

output0 = automatic_speech_recognition(audio="237-134500-0000.flac")
output1 = image_generation(text=f"a visual depiction of Frank talking about a divorce case that really angers him with reference to {output0['text']}")
output2 = object_detection(image=output1['image'])

PRED

from mnm.tool_api import automatic_speech_recognition, image_generation, object_detection
output0 = automatic_speech_recognition(audio='/root/input_data/237-134500-0000.flac')
output1 = image_generation(text=output0['text'])
output2 = object_detection(image=output1['image'])

GT

automatic_speech_recognition
image_generation
object_detection

PRED

automatic_speech_recognition
image_generation
object_detection

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
67 3758 "I'm creating a fun presentation about the number 7 for my math class. Could you help me find a unique mathematical fact about the number 7 and then generate a relevant image that illustrates that fact? To make it more effective, I want you to crop the image to highlight the top portion."

GT

output0 = get_math_fact(number="7")
output1 = image_generation(text=f"a relevant image that illustrates the unique mathematical fact about the number 7 noted in {output0['text']}")
output2 = image_crop_top(image=output1['image'])

PRED

from mnm.tool_api import get_math_fact, image_generation, image_crop_top
output0 = get_math_fact(number="7")
output1 = image_generation(text=output0['text'])
output2 = image_crop_top(image=output1['image'])

GT

get_math_fact
image_generation
image_crop_top

PRED

get_math_fact
image_generation
image_crop_top

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
68 3223 "I'm trying to write my son a birthday card for his birthday on July 11. Can you help me come up with a unique and interesting fact about this date and also generate a fun and heartwarming birthday wish message related to this fact? Summarize it nicely so it can fit on the card."

GT

output0 = get_date_fact(date="11-Jul")
output1 = text_generation(text=f"a fun and heartwarming birthday wish message related to {output0['text']}")
output2 = text_generation(text=f"Summarize the text: {output1['text']}")

PRED

from mnm.tool_api import get_date_fact, text_generation
date_fact = get_date_fact("07/11")
birthday_wish_prompt = f"Did you know? {date_fact['text']}. Now, here's a birthday wish for you: "
birthday_wish = text_generation(birthday_wish_prompt)

GT

get_date_fact
text_generation
text_generation

PRED

get_date_fact
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
152 4046 "Could you analyze the love possibilities between Romeo Montague and Juliet Capulet based on their names? Additionally, based on whatever percentage of compatibility they receive, is it possible to share some fun trivia associated with that number, and maybe even generate a creatively designed image which portrays this fact?"

GT

output0 = love_calculator(first_name="Romeo Montague", second_name="Juliet Capulet")
output1 = get_trivia_fact(number=output0['number'])
output2 = image_generation(text=f"a creatively designed image which portrays the fact associated with {output1['text']}")

PRED

from mnm.tool_api import love_calculator, get_trivia_fact, image_generation
output0 = love_calculator("Romeo Montague", "Juliet Capulet")
output1 = get_trivia_fact(output0["number"])
output2 = image_generation(output1["text"])

GT

love_calculator
get_trivia_fact
image_generation

PRED

love_calculator
get_trivia_fact
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
153 3241 Task: "Find out something interesting about 20th July in history and then generate a brief creative narrative on that fact. Then, determine who was the main person involved in the event. Who was the key person involved in the event that happened on 20th July?"

GT

output0 = get_date_fact(date="20-Jul")
output1 = text_generation(text=f"a brief creative narrative on the interesting fact that happened on 20th July: {output0['text']}")
output2 = question_answering(text=output1['text'], question="Who was the key person involved in the event that happened on 20th July?")

PRED

from mnm.tool_api import get_date_fact, text_generation, question_answering
output0 = get_date_fact("07/20")
output1 = text_generation(f"On 20th July 1977, {output0['text']}.")
output2 = question_answering("Who was the main person involved in the event?", output1['text'])

GT

get_date_fact
text_generation
question_answering

PRED

get_date_fact
text_generation
question_answering

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
154 3796 "I'm preparing a lesson plan for my Math class about the number 3. Could you give me an intriguing trivia about this number and then generate a child-friendly story that incorporates this fact? Afterwards, could you tell me whether the story gives a positive or negative sentiment?"

GT

output0 = get_trivia_fact(number="3")
output1 = text_generation(text=f"a child-friendly story that incorporates the trivia fact: {output0['text']}")
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import get_trivia_fact, text_generation, text_classification
trivia_fact = get_trivia_fact("3")
story_prompt = f"Once upon a time in a world with {trivia_fact['text']}, "
story = text_generation(story_prompt)
sentiment_result = text_classification(story['text'])

GT

get_trivia_fact
text_generation
text_classification

PRED

get_trivia_fact
text_generation
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
155 3593 "I'm working on a special project for 2023. Can you provide an interesting fact about that year and create an illustrative picture based on it? Once done, also help me with extracting the text from the generated image."

GT

output0 = get_year_fact(year="2023")
output1 = image_generation(text=f"an illustrative picture based on {output0['text']}")
output2 = optical_character_recognition(image=output1['image'])

PRED

from mnm.tool_api import get_year_fact, image_generation, optical_character_recognition
output0 = get_year_fact("2023")
output1 = image_generation("Istanbul Canal will be finished")
output2 = optical_character_recognition(output1['image'])

GT

get_year_fact
image_generation
optical_character_recognition

PRED

get_year_fact
image_generation
optical_character_recognition

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
157 120 "I have always been curious about the significance of July 11 in history. Could you tell me some interesting historical facts or events that happened on this date?"

GT

output0 = get_date_fact(date="11-Jul")

PRED

from mnm.tool_api import get_date_fact
date_facts = get_date_fact("July 11")

GT

get_date_fact

PRED

get_date_fact

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
158 1596 "I am writing content for a pizzeria's website and I need a vivid, generated image displaying a chef pulling a pizza out of a pizza oven. However, I particularly need the left portion of the image that includes the bustling activities of the chef and the oven. Plus, I also need individual components of the image like the pizza, oven, and chef's gear separated out into different segments to give a detailed perspective, possibly for infographics or detailed visual descriptions."

GT

output0 = image_generation(text="a vivid, generated image displaying a chef pulling a pizza out of a pizza oven with bustling activities")
output1 = image_crop_left(image=output0['image'])
output2 = image_segmentation(image=output1['image'])

PRED

from mnm.tool_api import image_generation, image_crop_left, image_segmentation
output0 = image_generation(text="a chef pulling a pizza out of a pizza oven")
output1 = image_crop_left(image=output0['image'])
output2 = image_segmentation(image=output1['image'])

GT

image_generation
image_crop_left
image_segmentation

PRED

image_generation
image_crop_left
image_segmentation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
159 4078 "I'm organizing a small film discussion about 'Birdman or (The Unexpected Virtue of Ignorance)' from 2014. Could you help me by pulling up the main details and using that data to create a brief introduction for the discussion? And once the intro is generated, I'd like to understand the sentiment of the discussion prompt – whether it leans more towards being positive or negative?"

GT

output0 = search_movie(movie_title="Birdman or (The Unexpected Virtue of Ignorance)", movie_year="2014")
output1 = text_generation(text=f"a brief introduction for the film discussion based on {output0['text']}")
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import search_movie, text_generation, text_classification
output0 = search_movie("Birdman or (The Unexpected Virtue of Ignorance)", "2014")
output1 = text_generation(f"Let's discuss the movie titled '{output0['text']}' which was released in 2014. ")
output2 = text_classification(output1['text'])

GT

search_movie
text_generation
text_classification

PRED

search_movie
text_generation
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
74 3695 "I'm preparing a fun educational activity for my nephew and want to use the number '9'. Can you provide me with an intriguing math fact about this number and then create a relevant image depicting this fun fact? After the image is ready, I'd also appreciate a caption summarising what the image portrays."

GT

output0 = get_math_fact(number="9")
output1 = image_generation(text=f"a relevant image depicting the intriguing math fact about number 9: {output0['text']}")
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import get_math_fact, image_generation, image_captioning
math_fact_about_9 = get_math_fact('9')
description_for_image = "9 is a composite number, its proper divisors being 1 and 3"
generated_image = image_generation(description_for_image)
caption_for_image = image_captioning(generated_image['image'])

GT

get_math_fact
image_generation
image_captioning

PRED

get_math_fact
image_generation
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
77 2959 "I have a recorded audio file '1284-1180-0003.flac'. Could you help me create a picturesque image of the land of Oz based on this narration? After that, please add a tiny hot air balloon floating above the scenery so it looks like I am exploring from the sky."

GT

output0 = automatic_speech_recognition(audio="1284-1180-0003.flac")
output1 = image_generation(text=f"a picturesque image of the land of Oz based on {output0['text']}")
output2 = image_editing(image=output1['image'], prompt="hot air balloon floating above the scenery")

PRED

from mnm.tool_api import automatic_speech_recognition, image_generation, image_editing
transcription = automatic_speech_recognition(audio='/root/input_data/1284-1180-0003.flac')
generated_image = image_generation(text=transcription['text'])
edited_image = image_editing(image=generated_image['image'], prompt="Add a tiny hot air balloon floating above the scenery.")

GT

automatic_speech_recognition
image_generation
image_editing

PRED

automatic_speech_recognition
image_generation
image_editing

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
78 126 "I'm planning a trivia night for a friend's birthday on 7th May. Can you find some unique historical event or fact that happened on this day that I can use in the trivia?"

GT

output0 = get_date_fact(date="7-May")

PRED

from mnm.tool_api import get_date_fact
historical_fact = get_date_fact(date="05/07")

GT

get_date_fact

PRED

get_date_fact

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
144 2035 "I have an image COCO_train2014_000000113236.jpg. Could you extract any text present in the image and then use that information to generate a detailed, contextual description about traffic rules and how it's related to this image? Then, analyze and identify the overall sentiment of the generated description."

GT

output0 = optical_character_recognition(image="COCO_train2014_000000113236.jpg")
output1 = text_generation(text=f"a detailed, contextual description about traffic rules related to {output0['text']}")
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import optical_character_recognition, text_generation, text_classification
output0 = optical_character_recognition(image='/root/input_data/COCO_train2014_000000113236.jpg')
output1 = text_generation(text=f"{output0['text']} Traffic rules")
output2 = text_classification(text=output1['text'])

GT

optical_character_recognition
text_generation
text_classification

PRED

optical_character_recognition
text_generation
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
146 4018 "Can you analyze the romantic compatibility between 'Fitzwilliam Darcy' and 'Elizabeth Bennet'? Based on the numerical result, can we have an interesting trivia related to the number? Using these details, could you write a unique text or piece of fiction?"

GT

output0 = love_calculator(first_name="Fitzwilliam Darcy", second_name="Elizabeth Bennet")
output1 = get_trivia_fact(number=output0['number'])
output2 = text_generation(text=f"a unique piece of fiction or text based on the romantic compatibility between Fitzwilliam Darcy and Elizabeth Bennet and an interesting trivia fact related to their compatibility number, {output1['text']}")

PRED

from mnm.tool_api import love_calculator, get_trivia_fact, text_generation
output0 = love_calculator(first_name='Fitzwilliam Darcy', second_name='Elizabeth Bennet')
output1 = get_trivia_fact(number=str(output0['number']))
output2 = text_generation(text=f"{output0['message']} Fitzwilliam Darcy and Elizabeth Bennet's love is as enduring as the {output1['text']}.")

GT

love_calculator
get_trivia_fact
text_generation

PRED

love_calculator
get_trivia_fact
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
149 32 "I'm working on a project about aviation and I need a unique depiction. Could you generate an illustration based on the description 'A silver jet is flying high in the sky above the clouds' and send it to me?"

GT

output0 = image_generation(text="a unique illustration of a silver jet flying high in the sky above the clouds")

PRED

from mnm.tool_api import image_generation
description = "A silver jet is flying high in the sky above the clouds"
generated_image = image_generation(text=description)

GT

image_generation

PRED

image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
151 4350 I'm working on a project about the origin of everyday tools and I am particularly intrigued about chainsaws. Can you research the question 'Why were chainsaws invented', provide a detailed elaboration beyond the basic information and then evaluate the tone of that elaboration - whether it is generally positive, negative or neutral?

GT

output0 = text_generation(text="Tell me about Why were chainsaws invented")
output1 = text_generation(text=f"a detailed elaboration beyond the basic information about {output0['text']}")
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import text_generation, text_classification
elaboration_result = text_generation(text="Why were chainsaws invented? Provide a detailed elaboration on the history and original purpose of chainsaws.")
elaboration_text = elaboration_result['text']
tone_result = text_classification(text=elaboration_text)

GT

text_generation
text_generation
text_classification

PRED

text_generation
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
82 3893 "I've always been fascinated by the number 9. Can you tell me an interesting fact about it and then create a visual representation of that fact? Afterwards, I'd like to analyze the image and identify the key components of it."

GT

output0 = get_trivia_fact(number="9")
output1 = image_generation(text=f"a visual representation of {output0['text']}")
output2 = image_segmentation(image=output1['image'])

PRED

from mnm.tool_api import get_trivia_fact, image_generation, image_segmentation
fact_about_number_9 = get_trivia_fact('9')
visual_representation = image_generation(fact_about_number_9['text'])
segmentation_result = image_segmentation(visual_representation['image'])

GT

get_trivia_fact
image_generation
image_segmentation

PRED

get_trivia_fact
image_generation
image_segmentation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
83 323 "Can I use the description 'A man is doing tricks with a frisbee' to generate a corresponding visual and tell me what the central object or subject in the created picture is?"

GT

output0 = image_generation(text="A visual representation of a man doing tricks with a frisbee")
output1 = object_detection(image=output0['image'])

PRED

from mnm.tool_api import image_generation, object_detection
generated_image_result = image_generation(text="A man is doing tricks with a frisbee")
detected_objects_result = object_detection(image=generated_image_result['image'])
# Code to find the central object is not included in the summary as it is a calculation based on the results of object_detection.

GT

image_generation
object_detection

PRED

image_generation
object_detection

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
137 3588 "I want to learn something unique about the year 2015. Could you find a compelling fact from that year, create a suitable image that captures the essence of that fact, and identify any texts appearing in the image?"

GT

output0 = get_year_fact(year="2015")
output1 = image_generation(text=f"a suitable image that captures the essence of {output0['text']}")
output2 = optical_character_recognition(image=output1['image'])

PRED

from mnm.tool_api import get_year_fact, image_generation, optical_character_recognition
output0 = get_year_fact("2015")
output1 = image_generation(output0['text'])
output2 = optical_character_recognition(output1['image'])

GT

get_year_fact
image_generation
optical_character_recognition

PRED

get_year_fact
image_generation
optical_character_recognition

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
138 2431 "Can you help me with a photo? For 177572-input.png, I'd like to manipulate it so the woman in the picture is holding a camera. Then, can you identify every object in the image for me? And then find me the camera she is holding in that image."

GT

output0 = image_editing(image="177572-input.png", prompt="make the woman hold a camera")
output1 = object_detection(image=output0['image'])
output2 = select_object(objects=output1['objects'], object_name="camera")

PRED

from mnm.tool_api import image_editing, object_detection, select_object
edited_image_result = image_editing(image='/root/input_data/177572-input.png', prompt="A woman holding a camera")
objects_detection_result = object_detection(image=edited_image_result['image'])
camera_object = select_object(objects=objects_detection_result['objects'], object_name="camera")

GT

image_editing
object_detection
select_object

PRED

image_editing
object_detection
select_object

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
139 10 "I have a review here which says, 'forced, familiar and thoroughly condescending.' I need to understand if this sentiment is positive or negative to shape my response accordingly."

GT

output0 = text_classification(text="forced , familiar and thoroughly condescending . ")

PRED

from mnm.tool_api import text_classification
sentiment_result = text_classification(text="forced, familiar and thoroughly condescending.")

GT

text_classification

PRED

text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
140 522 "I just celebrated my dog's birthday and took a cute picture. The image is saved as '134597-input.png'. But I forgot to put his party hat on for the picture! Can we add a party hat to the dog in the photo? Afterwards, can you also let me know what objects are detected in the new picture?"

GT

output0 = image_editing(image="134597-input.png", prompt="put a party hat on the dog")
output1 = object_detection(image=output0['image'])

PRED

from mnm.tool_api import image_editing, object_detection
edited_image_result = image_editing(image='/root/input_data/134597-input.png', prompt='A dog with a party hat.')
edited_image_path = '/root/playground/134597-edited.png'
edited_image_result['image'].save(edited_image_path)
objects_detected_result = object_detection(image=edited_image_path)

GT

image_editing
object_detection

PRED

image_editing
object_detection

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
142 194 "I'm planning a trip to latitude 29.79 and longitude 95.39. Could you inform me about the expected weather conditions in that region?"

GT

output0 = get_weather(lon="95.39", lat="29.79")

PRED

from mnm.tool_api import get_weather
import requests

# Latitude and longitude for the specified region
latitude = "29.79"
longitude = "95.39"

# Call the get_weather function with the provided coordinates
try:
weather_info = get_weather(lon=longitude, lat=latitude)
print(weather_info)
except requests.exceptions.JSONDecodeError as e:
print("Failed to decode JSON from the weather API response:", e)
except Exception as e:
print("An error occurred while fetching the weather information:", e)

GT

get_weather

PRED

get_weather

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
143 4091 I'm curious about the movie 'The Shape of Water' released in 2017. Could you provide me with an understanding of its details including genre, director, etc.? Once that's done, can we generate a narrative description that talks about the main plot following the pool worker's journey? Following the text, it would also be interesting to see an image that encapsulates the atmosphere and mood of the movie.

GT

output0 = search_movie(movie_title="The Shape of Water", movie_year="2017")
output1 = text_generation(text=f"a narrative description that talks about the main plot following the pool worker's journey in {output0['text']}")
output2 = image_generation(text=f"an image that encapsulates the atmosphere and mood of the movie from {output1['text']}")

PRED

from mnm.tool_api import search_movie, text_generation, image_generation
movie_info = search_movie(movie_title='The Shape of Water', movie_year='2017')
narrative_description = text_generation(text="The main plot follows the journey of a pool worker in the movie 'The Shape of Water'.")
image_result = image_generation(text=narrative_description['text'])

GT

search_movie
text_generation
image_generation

PRED

search_movie
text_generation
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
88 1713 "I want an illustration depicting 'A man lounges on a bench watching the ocean.' Once the image is created, could you focus mainly on the lower half, specifically where any text or writing might be visible in the image? Afterwards, I'd like to know what the writing or text says in the cropped image."

GT

output0 = image_generation(text="an illustration depicting \"A man lounges on a bench watching the ocean.\"")
output1 = image_crop_bottom(image=output0['image'])
output2 = optical_character_recognition(image=output1['image'])

PRED

from mnm.tool_api import image_generation, image_crop_bottom, optical_character_recognition
output0 = image_generation(text="A man lounges on a bench watching the ocean.")
output1 = image_crop_bottom(image=output0['image'])
output2 = optical_character_recognition(image=output1['image'])

GT

image_generation
image_crop_bottom
optical_character_recognition

PRED

image_generation
image_crop_bottom
optical_character_recognition

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
91 2304 Given the '18404.jpg', I need to identify what's portrayed in it. Once we figure that out, could you create a new image that depicts the same subject? I would also like to only see the top portion of the newly generated image.

GT

output0 = image_classification(image="18404.jpg")
output1 = image_generation(text=f"a new image depicting the same subject as {output0['text']}")
output2 = image_crop_top(image=output1['image'])

PRED

from PIL import Image, ImageOps
import io
from mnm.tool_api import image_classification, image_generation, image_crop_top

# Load the image using PIL
image_path = '/root/input_data/18404.jpg'
with open(image_path, 'rb') as file:
image_data = file.read()
image = Image.open(io.BytesIO(image_data))

# Identify the subject in the image
classification_result = image_classification(image)
subject = classification_result['text']

# Generate a new image based on the subject
generation_result = image_generation(text=subject)
generated_image = generation_result['image']

# Crop the top portion of the newly generated image
cropped_image_result = image_crop_top(generated_image)
cropped_image = cropped_image_result['image']

GT

image_classification
image_generation
image_crop_top

PRED

image_classification
image_generation
image_crop_top

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
93 3886 "I am preparing a kids-friendly learning material. Today's lesson involves the number 6. Could you find a captivating trivia fact about the number 6, produce a vibrant, engaging image representing that fact and write an easy-to-understand caption that quickly explains the image?"

GT

output0 = get_trivia_fact(number="6")
output1 = image_generation(text=f"a vibrant, engaging image representing {output0['text']}")
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import get_trivia_fact, image_generation, image_captioning
output0 = get_trivia_fact("6")
output1 = image_generation("the number of points on a Star of David")
output2 = image_captioning(output1['image'])

GT

get_trivia_fact
image_generation
image_captioning

PRED

get_trivia_fact
image_generation
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
94 2725 "I have an image with the label 2413350.jpg. I would like to know how many objects are present in the image and learn a fascinating mathematical fact related to that number."

GT

output0 = object_detection(image="2413350.jpg")
output1 = count(objects=output0['objects'])
output2 = get_math_fact(number=output1['number'])

PRED

from mnm.tool_api import object_detection, count, get_math_fact
output0 = object_detection(image='/root/input_data/2413350.jpg')
output1 = count(objects=output0['objects'])
output2 = get_math_fact(number=str(output1['number']))

GT

object_detection
count
get_math_fact

PRED

object_detection
count
get_math_fact

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
95 2242 "From the '11561.jpg', please identify the main subject, make a new image that represents the same concept, and provide a written summary of what the newly created image depicts."

GT

output0 = object_detection(image="11561.jpg")
output1 = image_generation(text=f"create a new image that represents the same concept as {output0['text']}")
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import object_detection, image_generation, image_captioning
image_path = '/root/input_data/11561.jpg'
image = Image.open(image_path)
detection_result = object_detection(image)
main_subject = detection_result['objects'][0]['label']
text_description = main_subject
generated_image_result = image_generation(text_description)
caption_result = image_captioning(generated_image_result['image'])

GT

object_detection
image_generation
image_captioning

PRED

object_detection
image_generation
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
96 3336 "What's an interesting piece of history that took place on October 19th? Create a graphic image based on this historical event and identify the various elements that form this image."

GT

output0 = get_date_fact(date="19-Oct")
output1 = image_generation(text=f"graphic image based on {output0['text']}")
output2 = image_segmentation(image=output1['image'])

PRED

from mnm.tool_api import get_date_fact, image_generation, image_segmentation
date_fact = get_date_fact("October 19")
image_description = "Napoleon I of France retreats from Moscow"
generated_image_result = image_generation(image_description)
segmentation_result = image_segmentation(generated_image_result['image'])

GT

get_date_fact
image_generation
image_segmentation

PRED

get_date_fact
image_generation
image_segmentation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
97 1761 "I have written a kids' story and I need some illustrations. For the scene where 'There is a baseball player who swung for the ball', could you generate an appropriate image for it? Afterwards, please extract whatever letters or numbers might exist within the image as text. I'd then wish to have a follow-up story context based on this detected text to continue the narrative."

GT

output0 = image_generation(text="an illustration of a scene where a baseball player swung for the ball")
output1 = optical_character_recognition(image=output0['image'])
output2 = text_generation(text=f"a follow-up story context based on {output1['text']} to continue the narrative")

PRED

from mnm.tool_api import image_generation, optical_character_recognition, text_generation
output0 = image_generation(text='There is a baseball player who swung for the ball')
output1 = optical_character_recognition(image=output0['image'])
output2 = text_generation(text=output1['text'])

GT

image_generation
optical_character_recognition
text_generation

PRED

image_generation
optical_character_recognition
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
98 71 "Currently, I'm working on an image named '15653-input.png' which requires a tweak. Specifically, I need to place a bottle behind the vegetables in the picture. Could you help me generate this visual alteration?"

GT

output0 = image_editing(image="15653-input.png", prompt="Have there be a bottle behind the vegetables")

PRED

from mnm.tool_api import image_editing
output0 = image_editing(image='/root/input_data/15653-input.png', prompt='Place a bottle behind the vegetables.')

GT

image_editing

PRED

image_editing

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
100 3416 "I'm crafting a public speaking presentation themed around the year 2022. Can you provide a fascinating fact about the year 2022 and use that information to create an engaging speech segment? I'm also looking to have a concise summary of the speech for my notes, so any help with that would be hugely appreciated."

GT

output0 = get_year_fact(year="2022")
output1 = text_generation(text=f"an engaging speech segment based on the fascinating fact about the year 2022: {output0['text']}")
output2 = text_generation(text=f"Summarize the text: {output1['text']}")

PRED

from mnm.tool_api import get_year_fact, text_generation
year_fact = get_year_fact("2022")
speech_segment = text_generation(text=year_fact['text'])

GT

get_year_fact
text_generation
text_generation

PRED

get_year_fact
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
102 3539 "I'm planning to write a blog post about 2023. Can you assist by giving me an interesting fact about this year and then creating an image based on the fact? I would also like the image segmented into individual elements so I can explain the image piece by piece to my readers."

GT

output0 = get_year_fact(year="2023")
output1 = image_generation(text=f"an intriguing image based on the fact of {output0['text']}")
output2 = image_segmentation(image=output1['image'])

PRED

from mnm.tool_api import get_year_fact, image_generation, image_segmentation
fact_about_2023 = get_year_fact("2023")
generated_image = image_generation(fact_about_2023['text'])
segmented_image = image_segmentation(generated_image['image'])

GT

get_year_fact
image_generation
image_segmentation

PRED

get_year_fact
image_generation
image_segmentation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
130 886 "I'm planning an educational presentation for primary school children about the number 4. Can you find me a fun fact about the number 4 that I can include, and then generate a creative picture based on that fact?"

GT

output0 = get_trivia_fact(number="4")
output1 = image_generation(text=f"a creative picture based on the fun fact about the number 4, which is {output0['text']}")

PRED

from mnm.tool_api import get_trivia_fact, image_generation
output0 = get_trivia_fact("4")
output1 = image_generation(output0['text'])

GT

get_trivia_fact
image_generation

PRED

get_trivia_fact
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
131 4226 I am organizing a fun activity for my film society where we guess movies from images indirectly related to the movie. Can you first provide some key details for 'The Shape of Water' from 2017? Using a combination of its genre and director as a clue, can you then generate an intriguing image for the audience? And finally, could you determine the classification of the generated image to aid in our guessing game?

GT

output0 = search_movie(movie_title="The Shape of Water", movie_year="2017")
output1 = image_generation(text=f"an intriguing image indirectly related to genre and director of {output0['text']}")
output2 = image_classification(image=output1['image'])

PRED

from mnm.tool_api import search_movie, image_generation, image_classification
output0 = search_movie("The Shape of Water", "2017")
output1 = image_generation("Drama, Fantasy, Romance by Guillermo del Toro")
output2 = image_classification(output1['image'])

GT

search_movie
image_generation
image_classification

PRED

search_movie
image_generation
image_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
133 4008 "I am helping plan a special date for Jacob Black and Bella Swan. Could you help me figure out their compatibility chances? Then, to give the date a cute brainy twist, please generate an intriguing math fact. Then, let's top that off by creating a creative visual representation of that fact to be used as part of their date's decoration."

GT

output0 = love_calculator(first_name="Jacob Black", second_name="Bella Swan")
output1 = get_math_fact(number=output0['number'])
output2 = image_generation(text=f"a creative visual representation of {output1['text']}")

PRED

from mnm.tool_api import love_calculator, get_math_fact, image_generation
compatibility_result = love_calculator(first_name="Jacob", second_name="Bella")
math_fact_result = get_math_fact(number="random")
visual_representation = image_generation(text=math_fact_result["text"])

GT

love_calculator
get_math_fact
image_generation

PRED

love_calculator
get_math_fact
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
104 56 "I have an image COCO_train2014_000000256855.jpg. The image seems to have some text on it. Could you help me find out what the text says?"

GT

output0 = optical_character_recognition(image="COCO_train2014_000000256855.jpg")

PRED

from mnm.tool_api import optical_character_recognition
text_recognition_result = optical_character_recognition(image='/root/input_data/COCO_train2014_000000256855.jpg')

GT

optical_character_recognition

PRED

optical_character_recognition

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
105 2013 "I have this photo 2323530.jpg. Could you describe what's happening in the photo, create a new related image from the description, and finally, read any text or captions that appear in this newly generated image?"

GT

output0 = image_captioning(image="2323530.jpg")
output1 = image_generation(text=f"a new image based on the scene of {output0['text']}")
output2 = optical_character_recognition(image=output1['image'])

PRED

from mnm.tool_api import image_captioning, image_generation, optical_character_recognition
description = image_captioning(image='/root/input_data/2323530.jpg')
new_image = image_generation(text=description['text'])
text_from_new_image = optical_character_recognition(image=new_image['image'])

GT

image_captioning
image_generation
optical_character_recognition

PRED

image_captioning
image_generation
optical_character_recognition

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
106 1049 "For my next podcast, I'm planning to cover the topic 'What Really Happens When You Flush on an Airplane?' but I'm struggling to structure my content. Can you create some text starting with this topic to help me get an overall understanding? Once that's done, can you then summarize this into a compact script that I can use for my episode? Also, it would be helpful if you could pull up the basic information about this topic from Wikipedia to ensure I cover the most important points."

GT

output0 = text_generation(text="An overall understanding of the topic - What Really Happens When You Flush on an Airplane?")
output1 = text_summarization(text=output0['text'])
output2 = wikipedia_simple_search(text=output1['text'])

PRED

from mnm.tool_api import text_generation, text_summarization, wikipedia_simple_search
output0 = text_generation("What Really Happens When You Flush on an Airplane?")
output1 = text_summarization(output0['text'])
output2 = wikipedia_simple_search("Aircraft lavatory")

GT

text_generation
text_summarization
wikipedia_simple_search

PRED

text_generation
text_summarization
wikipedia_simple_search

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
107 2373 Query: I recently uploaded a photo (407550-input.png) to my website's healthy recipes blog, but mistakenly used an image of a carrot instead of broccoli. Could you modify the picture to reflect this and provide a short, snappy description for the newly altered image? Further, can you distil this image caption into a quick concise tagline that I can use as a post title?

GT

output0 = image_editing(image="407550-input.png", prompt="change the carrot into broccoli")
output1 = image_captioning(image=output0['image'])
output2 = text_summarization(text=output1['text'])

PRED

from mnm.tool_api import image_editing, image_captioning, text_summarization
edited_image_result = image_editing(image='/root/input_data/407550-input.png', prompt='Replace the carrot with broccoli.')
caption_result = image_captioning(image=edited_image_result['image'])
tagline_result = text_summarization(text=caption_result['text'])

GT

image_editing
image_captioning
text_summarization

PRED

image_editing
image_captioning
text_summarization

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
109 2891 "I have a recording '237-134500-0000.flac' in which Frank was reading a text about a divorce case very slowly and becoming progressively angrier. I would like to know the main points he was elaborating on without having to listen to the whole thing. In addition, I am wondering who was getting divorced in the case Frank was reading about?"

GT

output0 = automatic_speech_recognition(audio="237-134500-0000.flac")
output1 = text_summarization(text=output0['text'])
output2 = question_answering(text=output1['text'], question="who was getting divorced in the case Frank was reading about?")

PRED

from mnm.tool_api import automatic_speech_recognition, text_summarization, question_answering
audio_file_path = '/root/input_data/237-134500-0000.flac'
transcription_result = automatic_speech_recognition(audio=audio_file_path)
summarized_text = text_summarization(text=transcription_result['text'])
question_about_divorce = "Who was getting divorced in the case?"
divorce_info = question_answering(question=question_about_divorce, text=transcription_result['text'])

GT

automatic_speech_recognition
text_summarization
question_answering

PRED

automatic_speech_recognition
text_summarization
question_answering

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
110 3887 "Can you tell me a fascinating fact about the number '2' and then illustrate it with an image? After you've created the image, could you give me a clear description of what's shown in it?"

GT

output0 = get_trivia_fact(number="2")
output1 = image_generation(text=f"an illustrative image of a fascinating fact about the number {output0['text']}")
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import get_trivia_fact, image_generation, image_captioning
fact_about_number_two = get_trivia_fact('2')
generated_image = image_generation(fact_about_number_two['text'])
image_description = image_captioning(generated_image['image'])

GT

get_trivia_fact
image_generation
image_captioning

PRED

get_trivia_fact
image_generation
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
111 188 "Can you find details for the movie 'Nomadland' that was released in 2020? I am interested in knowing the director and the genre of the movie."

GT

output0 = search_movie(movie_title="Nomadland", movie_year="2020")

PRED

from mnm.tool_api import search_movie
movie_details = search_movie(movie_title='Nomadland', movie_year='2020')

GT

search_movie

PRED

search_movie

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
120 799 "I'm working on a project about notable events in recent years. For 2016, could you provide a captivating fact followed by a visual representation of it? I intend to use the generated image for my PowerPoint presentation."

GT

output0 = get_year_fact(year="2016")
output1 = image_generation(text=f"a visual representation of {output0['text']} suitable for a PowerPoint presentation")

PRED

from mnm.tool_api import get_year_fact, image_generation
output0 = get_year_fact("2016")
output1 = image_generation("NASA's Juno spacecraft is expected to arrive at Jupiter")

GT

get_year_fact
image_generation

PRED

get_year_fact
image_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
121 730 "I am preparing for a trivia night with my friends. Can you help me create a fascinating story based on historical events that happened on 4th of May to impress them?"

GT

output0 = get_date_fact(date="4-May")
output1 = text_generation(text=f"a fascinating story based on {output0['text']} to impress my friends at a trivia night")

PRED

from mnm.tool_api import get_date_fact, text_generation
output0 = get_date_fact(date="05/04")
output1 = text_generation(text=f"On the 4th of May, {output0['text']}.")

GT

get_date_fact
text_generation

PRED

get_date_fact
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
125 2744 "Considering the image 2328879.jpg, can you determine the number of individual objects in the scene? Afterwards, would it be possible to enlighten me with an intriguing trivia about the figure you come up with?"

GT

output0 = object_detection(image="2328879.jpg")
output1 = count(objects=output0['objects'])
output2 = get_trivia_fact(number=output1['number'])

PRED

from PIL import Image
from mnm.tool_api import object_detection, count, get_trivia_fact
image = Image.open('/root/input_data/2328879.jpg')
detection_result = object_detection(image)
object_count = count(detection_result['objects'])
trivia_fact = get_trivia_fact(str(object_count['number']))

GT

object_detection
count
get_trivia_fact

PRED

object_detection
count
get_trivia_fact

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
126 152 "Can you tell me an interesting or fun fact about the number 4? I want to include it in my Math Club presentation to add some fun."

GT

output0 = get_trivia_fact(number="4")

PRED

from mnm.tool_api import get_trivia_fact
output0 = get_trivia_fact("4")

GT

get_trivia_fact

PRED

get_trivia_fact

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
127 47 "For the photo titled 2349496.jpg, generate a descriptive text summary."

GT

output0 = image_captioning(image="2349496.jpg")

PRED

from mnm.tool_api import image_captioning
descriptive_text_summary = image_captioning(image_path='/root/input_data/2349496.jpg')

GT

image_captioning

PRED

image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
113 1440 I'm planning to create a storyboard for an upcoming short film. One of the scenes is described as "The city street is quiet during this time of night." Could you help me by creating a visual of this scene, providing a detailed caption to outline the elements, and summarizing this into a snappy setting description that can be included in the script?

GT

output0 = image_generation(text="a detailed visual of the scene: The city street is quiet during this time of night.")
output1 = image_captioning(image=output0['image'])
output2 = text_summarization(text=output1['text'])

PRED

from mnm.tool_api import image_generation, image_captioning, text_summarization
output0 = image_generation(text="The city street is quiet during this time of night.")
output1 = image_captioning(image=output0['image'])
output2 = text_summarization(text=output1['text'])

GT

image_generation
image_captioning
text_summarization

PRED

image_generation
image_captioning
text_summarization

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
115 3546 "I need some unique information pertaining to the year 2022 for a blog post. Once we have the fact, can you create a corresponding image? Also, for further analysis, could you divide that image into detailed segments?"

GT

output0 = get_year_fact(year="2022")
output1 = image_generation(text=f"a unique image corresponding to {output0['text']}")
output2 = image_segmentation(image=output1['image'])

PRED

from mnm.tool_api import get_year_fact, image_generation, image_segmentation
output0 = get_year_fact("2022")
output1 = image_generation(output0['text'])
output2 = image_segmentation(output1['image'])

GT

get_year_fact
image_generation
image_segmentation

PRED

get_year_fact
image_generation
image_segmentation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
116 3874 "I want to know something unique about the number 9. After that, please generate an image that could illustrate this trivia fact. Subsequently, I'd like to add a sun in the top right corner of the image. Can you use the input Add a sun in the top right corner for adjusting the image?"

GT

output0 = get_trivia_fact(number="9")
output1 = image_generation(text=f"an illustrative image portraying the unique fact about number 9 from {output0['text']}")
output2 = image_editing(image=output1['image'], prompt="Add a sun in the top right corner")

PRED

from mnm.tool_api import get_trivia_fact, image_generation, image_editing
output0 = get_trivia_fact('9')
output1 = image_generation(output0['text'])
output2 = image_editing(output1['image'], "Add a sun in the top right corner")

GT

get_trivia_fact
image_generation
image_editing

PRED

get_trivia_fact
image_generation
image_editing

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
118 3236 "I am planning a special event in honor of what happened on 24th of August in history. Could you give me a historical fact about it and then create a speech, inspired by that fact, that I could present at the event? Once the speech is generated, could you also determine the overall tone of the speech?"

GT

output0 = get_date_fact(date="24-Aug")
output1 = text_generation(text=f"a speech inspired by the historical fact: {output0['text']}")
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import get_date_fact, text_generation, text_classification
output0 = get_date_fact("08/24")
output1 = text_generation(text=output0['text'])
output2 = text_classification(text=output1['text'])

GT

get_date_fact
text_generation
text_classification

PRED

get_date_fact
text_generation
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
192 2993 I have an audio file named '672-122797-0000.flac.' I would love to see how an artist might interpret this scene. Can you show me a picture based on this audio content and tell me what type of image it is?

GT

output0 = automatic_speech_recognition(audio="672-122797-0000.flac")
output1 = image_generation(text=f"an artist interpretation of the scene from the audio file {output0['text']}")
output2 = image_classification(image=output1['image'])

PRED

from mnm.tool_api import automatic_speech_recognition, image_generation, image_classification
audio_file_path = '/root/input_data/672-122797-0000.flac'
transcription = automatic_speech_recognition(audio=audio_file_path)
transcribed_text = transcription['text']
generated_image = image_generation(text=transcribed_text)
classification_result = image_classification(image=generated_image['image'])
image_type = classification_result['text']

GT

automatic_speech_recognition
image_generation
image_classification

PRED

automatic_speech_recognition
image_generation
image_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
193 558 "I'm trying to create a dynamic new photo for an agricultural-themed event poster. Start with '111376-input.png', and based on the phrase 'Have the cow wear a hat.', do your magic. Once that's done, for conciseness and rhythm, would you keep only the left part of the image?"

GT

output0 = image_editing(image="111376-input.png", prompt="Have the cow wear a hat.")
output1 = image_crop_left(image=output0['image'])

PRED

from mnm.tool_api import image_editing, image_crop_left
from PIL import Image
original_image_path = '/root/input_data/111376-input.png'
original_image = Image.open(original_image_path)
edited_image_info = image_editing(image=original_image, prompt="Have the cow wear a hat.")
cropped_image_info = image_crop_left(image=edited_image_info['image'])
final_image_path = '/root/playground/edited_and_cropped_image.png'
cropped_image_info['image'].save(final_image_path)

GT

image_editing
image_crop_left

PRED

image_editing
image_crop_left

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
194 3545 "I'm working on a presentation about cool things from 2022. Could you provide me with an interesting fact from that year and create an image based on it? Once the image is ready, could you also break it down into its constituent parts?"

GT

output0 = get_year_fact(year="2022")
output1 = image_generation(text=f"an image based on the interesting fact from 2022 {output0['text']}")
output2 = image_segmentation(image=output1['image'])

PRED

from mnm.tool_api import get_year_fact, image_generation, image_segmentation
fact = get_year_fact("2022")
generated_image = image_generation(fact['text'])
segmentation_result = image_segmentation(generated_image['image'])

GT

get_year_fact
image_generation
image_segmentation

PRED

get_year_fact
image_generation
image_segmentation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
195 772 Can you provide me with an interesting fact about the year 2023, and then use that fact to generate a short story?

GT

output0 = get_year_fact(year="2023")
output1 = text_generation(text=f"a short story based on the interesting fact from {output0['text']}")

PRED

from mnm.tool_api import get_year_fact, text_generation
year_fact = get_year_fact("2023")
prompt_for_story = "Tell a short story about the completion of the Istanbul Canal in 2023 and its impact on the city."
short_story = text_generation(text=prompt_for_story)

GT

get_year_fact
text_generation

PRED

get_year_fact
text_generation

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
199 192 "I'll be visiting Cape Town at coordinates 18.41 longitude and 34.02 latitude. Can you help me with the weather forecast so I can pack accordingly?"

GT

output0 = get_weather(lon="18.41", lat="34.02")

PRED

from mnm.tool_api import get_weather
output0 = get_weather(lon="18.41", lat="34.02")

GT

get_weather

PRED

get_weather

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
0 168 "I'm wondering how likely a successful relationship would be between Christian Grey and Anastasia Steele. Can you determine their love compatibility for me?"

GT

output0 = love_calculator(first_name="Christian Grey", second_name="Anastasia Steele")

PRED

from mnm.tool_api import love_calculator
output0 = love_calculator("Christian Grey", "Anastasia Steele")

GT

love_calculator

PRED

love_calculator

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
1 1356 Can you help me create an image based on the description 'A long passenger train traveling over a rail bridge.'? After that, I would like to make changes to the image so that it's snowing heavily and the train and the bridge are almost covered in snow. Once that's completed, can you generate an evocative caption for this image? I am planning to use this for my short story about a dramatic journey on a snow-covered train!

GT

output0 = image_generation(text="A long passenger train traveling over a rail bridge.")
output1 = image_editing(image=output0['image'], prompt="it's snowing heavily and the train and the bridge are almost covered in snow")
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import image_generation, image_editing, image_captioning
output0 = image_generation(text='A long passenger train traveling over a rail bridge.')
output1 = image_editing(image=output0['image'], prompt='It is snowing heavily and the train and the bridge are almost covered in snow.')
output2 = image_captioning(image=output1['image'])

GT

image_generation
image_editing
image_captioning

PRED

image_generation
image_editing
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
2 354 "I need an illustration for my new project description. Could you create an image that displays some big red metal boxes located near a road? I only need the right portion of the illustration though, to use as a banner for the project's webpage.

GT

output0 = image_generation(text="An illustration displaying some big red metal boxes located near a road")
output1 = image_crop_right(image=output0['image'])

PRED

from mnm.tool_api import image_generation, image_crop_right
output0 = image_generation(text='some big red metal boxes located near a road')
output1 = image_crop_right(image=output0['image'])

GT

image_generation
image_crop_right

PRED

image_generation
image_crop_right

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
3 4039 "I'm curious, what are the chances of a successful relationship between Jack Dawson and Rose Dewitt Bukater, and can you also provide a fun trivia fact relating to this? Also, I would like to get some insights, whether the overall sentiment from that trivia fact is positive or negative."

GT

output0 = love_calculator(first_name="Jack Dawson", second_name="Rose Dewitt Bukater")
output1 = get_trivia_fact(number=output0['number'])
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import love_calculator, get_trivia_fact, text_classification
compatibility_result = love_calculator("Jack Dawson", "Rose Dewitt Bukater")
trivia_fact_result = get_trivia_fact("random")
sentiment_result = text_classification(trivia_fact_result["text"])

GT

love_calculator
get_trivia_fact
text_classification

PRED

love_calculator
get_trivia_fact
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
4 2379 I have this image, "581668-input.png" and I want to add a few cookies to the tray in the picture. Afterwards, I want to understand what the modified image portrays. Finally, can we determine if the resulting description paints a positive or negative picture?

GT

output0 = image_editing(image="581668-input.png", prompt="Add cookies to the tray.")
output1 = image_captioning(image=output0['image'])
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import image_editing, image_captioning, text_classification
edited_image_result = image_editing(image="/root/input_data/581668-input.png", prompt="Add a few cookies to the tray in the picture.")
caption_result = image_captioning(image=edited_image_result['image'])
sentiment_result = text_classification(text=caption_result['text'])

GT

image_editing
image_captioning
text_classification

PRED

image_editing
image_captioning
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
6 516 I have an image, specifically '111376-input.png', depicting a cow in a field. I think it would be quite amusing to see this cow with a hat on. Could you modify the image to meet this idea and afterwards describe to me what the new image looks like?

GT

output0 = image_editing(image="111376-input.png", prompt="Have the cow wear a hat.")
output1 = image_captioning(image=output0['image'])

PRED

from mnm.tool_api import image_editing, image_captioning
edited_image_result = image_editing(image='/root/input_data/111376-input.png', prompt='A cow wearing a hat')
description_result = image_captioning(image=edited_image_result['image'])

GT

image_editing
image_captioning

PRED

image_editing
image_captioning

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
7 4358 I'm participating in a debate on contemporary socio-political terminologies and their impact on societies. One of these terms is 'woke'. I need to understand its meaning and implications better. Can you find a simplified definition of 'woke' and then generate a paragraph that illustrates its usage? Finally, identify the general sentiment or tone associated with its usage.

GT

output0 = text_generation(text="Tell me about What does woke mean")
output1 = text_generation(text=f"a paragraph illustrating the usage of {output0['text']}")
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import text_generation, text_classification
paragraph = text_generation(text="woke")
sentiment = text_classification(text=paragraph['text'])

GT

text_generation
text_generation
text_classification

PRED

text_generation
text_classification

precision=100.0
recall=100.0
f1=100.0
100.00 trajectory
52 3072 "I have an image 2348288.jpg in front of me and it's quite an interesting scene. I have a question though, what color do you think the dog's tail is? Once I have this detail, please provide a creative and fun story that links the color of that tail with the scene captured in the image. Subsequently, I would like if you can categorize the overall mood implied by your generated story - does it come off as positive or negative?"

GT

output0 = visual_question_answering(image="2348288.jpg", question="What color do you think that tail is?")
output1 = text_generation(text=f"a creative and fun story that links the color of the tail, which is {output0['text']}, with the scene of two dogs playing together on the grass")
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import object_detection, visual_question_answering, text_generation, text_classification
output0 = object_detection(image='/root/input_data/2348288.jpg')
output1 = visual_question_answering(image='/root/input_data/2348288.jpg', question="What color is the dog's tail?")
output2 = text_generation(text="In a scene filled with the hustle and bustle of a vibrant city park, a dog with a black tail stands out. Its tail wags energetically as it navigates through the crowd, leaving a trail of smiles in its wake. The dog's tail is black, and it seems to have a story of its own. Let's dive into the tale of this black-tailed adventurer.")
output3 = text_classification(text=output2['text'])

GT

visual_question_answering
text_generation
text_classification

PRED

object_detection
visual_question_answering
text_generation
text_classification

precision=75.0
recall=100.0
f1=85.71
85.71 trajectory
172 3717 "Can you generate some fun facts about the number 5, create an image representing these facts, and then identify what core subject or theme this image falls under in simple terms?"

GT

output0 = get_math_fact(number="5")
output1 = image_generation(text=f"an image representing fun facts about the number 5 based on {output0['text']}")
output2 = image_classification(image=output1['image'])

PRED

from mnm.tool_api import get_math_fact, get_trivia_fact, image_generation, image_classification
math_fact = get_math_fact('5')
trivia_fact = get_trivia_fact('5')
description = f"{math_fact['text']}. {trivia_fact['text']}."
generated_image = image_generation(description)
classification_result = image_classification(generated_image['image'])

GT

get_math_fact
image_generation
image_classification

PRED

get_math_fact
get_trivia_fact
image_generation
image_classification

precision=75.0
recall=100.0
f1=85.71
85.71 trajectory
5 1349 "Could you show me an image of a situation where 'A cyclist pauses near the water to observe a canoe.'? I'd then like to change the time within the image to evening with a beautiful sunset in the background. What would be a suitable description for this modified image?"

GT

output0 = image_generation(text="a situation where a cyclist pauses near the water to observe a canoe.")
output1 = image_editing(image=output0['image'], prompt="evening with a beautiful sunset in the background")
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import image_generation, image_editing
output0 = image_generation(text="A cyclist pauses near the water to observe a canoe.")
output1 = image_editing(image=output0['image'], prompt="Change the time to evening with a beautiful sunset in the background.")

GT

image_generation
image_editing
image_captioning

PRED

image_generation
image_editing

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
129 4146 I'm doing research on the movie "Green Book" from the year 2018 but I need an easily digestible summary of its storyline and main themes, as well as a visual representation that would help me grasp its overall ambiance and mood.

GT

output0 = search_movie(movie_title="Green Book", movie_year="2018")
output1 = text_summarization(text=output0['text'])
output2 = image_generation(text=f" a visual representation that helps grasp the overall ambiance and mood of {output1['text']}")

PRED

from mnm.tool_api import search_movie, image_generation
movie_summary = search_movie(movie_title="Green Book", movie_year="2018")
visual_representation = image_generation(text=movie_summary['text'])

GT

search_movie
text_summarization
image_generation

PRED

search_movie
image_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
17 1768 "We're producing a short animation based on the description 'A trolley with people drives down a street by a building'. After generating this scene, we need to detect if there are any writings or signs in our visuals. Finally, please categorize the detected text according to whether it gives off positive or negative vibes."

GT

output0 = image_generation(text="an animation based on the description \"A trolley with people drives down a street by a building.\"")
output1 = optical_character_recognition(image=output0['image'])
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import image_generation, optical_character_recognition, text_classification
generated_image = image_generation(text='A trolley with people drives down a street by a building')
ocr_results = optical_character_recognition(image=generated_image['image'])
detected_texts = ocr_results['text'].split(', ')
text_vibes = [text_classification(text=text) for text in detected_texts]

GT

image_generation
optical_character_recognition
text_classification

PRED

image_generation
optical_character_recognition

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
46 1347 I'm trying to understand the religious teaching landscape better. Within religious and spiritual instruction, teachers like gurus, mullahs, rabbis, pastors/youth pastors, and lamas teach religious texts including the Quran, Torah, or Bible. Could I trouble you to find another broad term that might be commonly used to describe such teachers? After you find this out, could you pull up the most relevant Wikipedia page about that term? Once we have that information, can you categorize the overall mood or sentiment found in that Wikipedia page's summary?

GT

output0 = question_answering(text="Religious and spiritual teachers, such as gurus, mullahs, rabbis, pastors/youth pastors and lamas, may teach religious texts such as the Quran, Torah or Bible.", question="What is another general name for a religious teacher?")
output1 = wikipedia_simple_search(text=output0['text'])
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import wikipedia_simple_search, text_classification
general_term_search_result = wikipedia_simple_search("religious teacher")
wikipedia_page_result = wikipedia_simple_search("religious honorifics")
sentiment_result = text_classification(wikipedia_page_result['text'])

GT

question_answering
wikipedia_simple_search
text_classification

PRED

wikipedia_simple_search
wikipedia_simple_search
text_classification

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
45 2782 "I want to zoom in on the image 2409782.jpg. More specifically, I'm interested in the lamp portion of this image. Can you generate a cropped image that focuses on just the lamp?"

GT

output0 = object_detection(image="2409782.jpg")
output1 = select_object(objects=output0['objects'], object_name="lamp")
output2 = image_crop(image="2409782.jpg", object=output1['object'])

PRED

from mnm.tool_api import object_detection, image_crop
from PIL import Image
image_path = '/root/input_data/2409782.jpg'
image = Image.open(image_path)
detection_result = object_detection(image)
closest_bbox = [123.38, 253.69, 262.66, 310.34] # This was determined by finding the object closest to the center of the image
object_to_crop = {'bbox': closest_bbox}
cropped_image_info = image_crop(image, object_to_crop)
cropped_image = cropped_image_info['image']
cropped_image_path = '/root/playground/cropped_closest_center.jpg'
cropped_image.save(cropped_image_path)

GT

object_detection
select_object
image_crop

PRED

object_detection
image_crop

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
80 3621 Query: I'm working on creating an educational content piece and I need some intriguing information about the number 6 initially. Based on that information, could you generate a short story to make math learning fun? And after the story, let's have a comprehension question. What is the essential mathematical characteristic of the number 6 discussed in the story?

GT

output0 = get_math_fact(number="6")
output1 = text_generation(text=f"a short story to make math learning fun based on {output0['text']}")
output2 = question_answering(text=output1['text'], question="What is the essential mathematical characteristic of the number 6 discussed in the story?")

PRED

from mnm.tool_api import get_math_fact, text_generation
math_fact_about_6 = get_math_fact('6')
story_prompt = f"Once upon a time, the number 6 discovered that it was {math_fact_about_6['text']}."
short_story = text_generation(story_prompt)
comprehension_question = "What is the essential mathematical characteristic of the number 6 discussed in the story?"

GT

get_math_fact
text_generation
question_answering

PRED

get_math_fact
text_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
136 4125 I'm curious about the movie 'Green Book' which was released in 2018. Can you provide me with an overview of the film? Once you've done that, maybe you could also give me a concise summary that I could read in under a minute. One thing I've been wondering is who the director of 'Green Book' is. Can you answer that for me?

GT

output0 = search_movie(movie_title="Green Book", movie_year="2018")
output1 = text_generation(text=f"Summarize the text: {output0['text']}")
output2 = question_answering(text=output1['text'], question="who the director of 'Green Book' is?")

PRED

from mnm.tool_api import search_movie, text_generation
movie_info = search_movie(movie_title='Green Book', movie_year='2018')
summary_prompt = f"Summarize the following movie information in under a minute read: {movie_info['text']}"
movie_summary = text_generation(text=summary_prompt)
director = movie_info['text'].split("Director: ")[1].split("\n")[0]

GT

search_movie
text_generation
question_answering

PRED

search_movie
text_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
135 1370 Task Query: Using an image that portrays 'Two cups of fruits placed next to each other.', can we then modify this image to display both cups filled with only strawberries? After making these changes, I would like to know what the main object of the altered image is.

GT

output0 = image_generation(text="an image that portrays \"Two cups of fruits placed next to each other.\"")
output1 = image_editing(image=output0['image'], prompt="both cups filled with only strawberries")
output2 = image_classification(image=output1['image'])

PRED

from mnm.tool_api import image_editing, image_classification
edited_image_result = image_editing(image_path='/root/input_data/example.png', prompt='Both cups filled with only strawberries.')
classification_result = image_classification(image=edited_image_result['image'])

GT

image_generation
image_editing
image_classification

PRED

image_editing
image_classification

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
49 4404 "I'm trying to understand more about economic topics and one of them is 'recession'. Can you simplify the concept of a recession for me, later on more specifically, explain how a recession impacts the average individual?".

GT

output0 = wikipedia_simple_search(text="What is a recession")
output1 = text_summarization(text=output0['text'])
output2 = question_answering(text=output1['text'], question="explain how a recession impacts the average individual?")

PRED

from mnm.tool_api import wikipedia_simple_search, text_summarization
output0 = wikipedia_simple_search("recession")
output1 = text_summarization(output0['text'])
output2 = wikipedia_simple_search("impact of recession on individuals")
output3 = text_summarization(output2['text'])
output4 = wikipedia_simple_search("How does a recession impact the average individual?")
output5 = wikipedia_simple_search("Effects of a recession on individuals")

GT

wikipedia_simple_search
text_summarization
question_answering

PRED

wikipedia_simple_search
text_summarization
wikipedia_simple_search
text_summarization
wikipedia_simple_search
wikipedia_simple_search

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
186 4225 I am planning a movie-themed trivia with my friends and I don't want obvious clues. For the 2017 movie 'The Shape of Water', can you provide some basic information? Then based on its genre and director, generate an image that would represent a subtle hint. Once the image is generated, can you help identify what category the image falls into, without revealing the movie's title?

GT

output0 = search_movie(movie_title="The Shape of Water", movie_year="2017")
output1 = image_generation(text=f"an image that represents a subtle hint based on the genre and director of {output0['text']}")
output2 = image_classification(image=output1['image'])

PRED

from mnm.tool_api import image_generation, image_classification
output0 = image_generation(text="A fantasy movie set in the 1960s during the Cold War with an amphibious creature")
output1 = image_classification(image=output0['image'])

GT

search_movie
image_generation
image_classification

PRED

image_generation
image_classification

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
117 1075 I'd like to better visualize the steps for baking something under the conditions of 'Bake 375 for 10 minutes then turn down to 350 degrees for 45 minutes.' Can you generate a step-by-step guide or tips for doing this to ensure it's done correctly? Based on that guide, can you answer this question: What is the total baking time? Finally, could you also provide a visual representation of the procedure based on the generated guide?

GT

output0 = text_generation(text="a step-by-step guide or tips for baking under the conditions of \"Bake 375 for 10 minutes then turn down to 350 degrees for 45 minutes.\"")
output1 = question_answering(text=output0['text'], question="What is the total baking time?")
output2 = image_generation(text=f"a visual representation of the procedure based on {output0['text']}")

PRED

from mnm.tool_api import text_generation, image_generation
guide = text_generation("Bake 375 for 10 minutes then turn down to 350 degrees for 45 minutes.")
total_baking_time = 10 + 45 # Total baking time in minutes
visual_representation = image_generation(guide['text'])

GT

text_generation
question_answering
image_generation

PRED

text_generation
image_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
114 2852 Based on the auditory content of "6829-68769-0002.flac", I am curious about a few things. First, can we extrapolate what the lawyer's conversation could have been about before or after that line? Also, I want to understand, what could be the possible reasons or situations that caused the old lawyer to say "I can't see it in that light"?

GT

output0 = automatic_speech_recognition(audio="6829-68769-0002.flac")
output1 = text_generation(text=f"extrapolation on what the lawyer's conversation could have been about before or after the line: {output0['text']}")
output2 = question_answering(text=output1['text'], question="what could be the possible reasons or situations that caused the old lawyer to say \"I can't see it in that light\"?")

PRED

from mnm.tool_api import automatic_speech_recognition, text_generation
transcription = automatic_speech_recognition(audio="/root/input_data/6829-68769-0002.flac")
prompt_before = "What led to the conversation where the old lawyer said: " + transcription['text']
output0 = text_generation(prompt_before)
prompt_after = transcription['text'] + " What did the old lawyer say next?"
output1 = text_generation(prompt_after)
prompt_reasons = "Why would an old lawyer say: " + transcription['text']
output2 = text_generation(prompt_reasons)

GT

automatic_speech_recognition
text_generation
question_answering

PRED

automatic_speech_recognition
text_generation
text_generation
text_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
112 4158 I heard the 2016 movie, Moonlight, was really popular. Can you pull up its details and let me know what genre it falls into? Based on the genre, can you provide a general overview of what kinds of themes I might encounter in films of this type?

GT

output0 = search_movie(movie_title="Moonlight", movie_year="2016")
output1 = question_answering(text=output0['text'], question="what genre it falls into?")
output2 = text_generation(text=f"a general overview of what kinds of themes I might encounter in films of this type based on {output1['text']}")

PRED

from mnm.tool_api import search_movie, text_generation
movie_details = search_movie("Moonlight", "2016")
genre = movie_details.get('text', '').split("Genre: ")[1].split("\n")[0]
themes_overview = text_generation(f"Common themes in {genre} films include:")

GT

search_movie
question_answering
text_generation

PRED

search_movie
text_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
63 4183 Hey, can you look up the movie Parasite from the year 2019 and tell me who the director is . I'm also very curious about its origin, can you find that information from Wikipedia?

GT

output0 = search_movie(movie_title="Parasite", movie_year="2019")
output1 = question_answering(text=output0['text'], question=" who the director is ?")
output2 = wikipedia_simple_search(text=output1['text'])

PRED

from mnm.tool_api import search_movie, wikipedia_simple_search
output0 = search_movie(movie_title="Parasite", movie_year="2019")
output1 = wikipedia_simple_search(text="Parasite 2019 film")

GT

search_movie
question_answering
wikipedia_simple_search

PRED

search_movie
wikipedia_simple_search

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
79 4003 "I'm curious, based on the names Romeo Montague and Juliet Capulet, how destined for love were they really? Could you then provide me with a fun mathematical fact and create a visual representation for that fact?"

GT

output0 = love_calculator(first_name="Romeo Montague", second_name="Juliet Capulet")
output1 = get_math_fact(number=output0['number'])
output2 = image_generation(text=f"a visual representation of {output1['text']}")

PRED

from mnm.tool_api import love_calculator, image_generation
output0 = love_calculator("Romeo Montague", "Juliet Capulet")
output1 = image_generation("Mathematics: The language of the universe.")

GT

love_calculator
get_math_fact
image_generation

PRED

love_calculator
image_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
71 3442 Task: "I'm developing content for an interactive timeline and I'm currently working on the year 2018. Can you give me an intriguing fact from that year, compose a descriptive narrative based on that fact, and then generate a suitable image that depicts the narrative?"

GT

output0 = get_year_fact(year="2018")
output1 = text_generation(text=f"a descriptive narrative based on the intriguing fact from 2018: {output0['text']}")
output2 = image_generation(text=f"a suitable image that depicts the narrative of {output1['text']}")

PRED

from mnm.tool_api import get_year_fact, image_generation
year_fact = get_year_fact("2018")
description = "A representation of the year 2018, showcasing themes of technology, advancement, and global connectivity."
generated_image = image_generation(description)

GT

get_year_fact
text_generation
image_generation

PRED

get_year_fact
image_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
156 2367 My cousin shared a funny idea with me the other day. She has this image 270650-input.png of her car and jokingly asked me to make it look like the car is ready to go skiing. Could you help by putting skis on the wheels of the car in the image? Once done, could you depict the outcome amusingly and then present it to me in a short and concise phrase to make my cousin laugh?

GT

output0 = image_editing(image="270650-input.png", prompt="put skis on the wheel")
output1 = image_captioning(image=output0['image'])
output2 = text_generation(text=f"Summarize the text: {output1['text']}")

PRED

from mnm.tool_api import image_editing, text_generation
output0 = image_editing(image='/root/input_data/270650-input.png', prompt="Add skis to the wheels of the car.")
output1 = text_generation(text="Caption for a car with skis instead of wheels.")

GT

image_editing
image_captioning
text_generation

PRED

image_editing
text_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
75 2423 "I have a digital image tagged '398575-input.png' and I'd like to transform it so the stoplight seems like a spear. Afterwards, I need to identify and label all objects that are present within the edited picture. I am working on a fantasy graphic novel and this task helps me conceptualize unique situations or scenes."

GT

output0 = image_editing(image="398575-input.png", prompt="let the stop light be a spear")
output1 = object_detection(image=output0['image'])
output2 = tag(image=output1['image'], objects=output1['objects'])

PRED

from mnm.tool_api import image_editing, object_detection
edited_image_result = image_editing(image='/root/input_data/398575-input.png', prompt="Transform the stoplight into a spear.")
detection_result = object_detection(image=edited_image_result['image'])

GT

image_editing
object_detection
tag

PRED

image_editing
object_detection

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
147 4085 I recently heard about a film released in 2021 named 'CODA', and I am curious to learn more about it. Once the needed details, including title, release year, genre and its director, have been collected, I'd appreciate it if you could compose a brief summary or abstract of this movie. Following that, could you answer this for me: Who is the director of CODA?

GT

output0 = search_movie(movie_title="CODA", movie_year="2021")
output1 = text_generation(text=f"a brief summary or abstract of the movie CODA based on {output0['text']}")
output2 = question_answering(text=output1['text'], question="Who is the director of CODA?")

PRED

from mnm.tool_api import search_movie, text_generation
movie_info = search_movie(movie_title='CODA', movie_year='2021')
movie_summary = text_generation(text=movie_info['text'])

GT

search_movie
text_generation
question_answering

PRED

search_movie
text_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
85 3657 I often get confused with complex mathematical facts. Can you fetch an interesting fact about the number 10, simplify it into an understandable summary, and based on this simple information, answer this: What are some interesting and simple properties of the number 10?

GT

output0 = get_math_fact(number="10")
output1 = text_summarization(text=output0['text'])
output2 = question_answering(text=output1['text'], question="What are some interesting and simple properties of the number 10?")

PRED

from mnm.tool_api import get_math_fact, text_summarization
output0 = get_math_fact(number="10")
output1 = text_summarization(text=output0['text'])

GT

get_math_fact
text_summarization
question_answering

PRED

get_math_fact
text_summarization

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
84 2816 I'm working with this image 2347302.jpg. I'd like to create a version of it where only the city street is in color and everything else is grayscale for an urban planning presentation.

GT

output0 = image_segmentation(image="2347302.jpg")
output1 = select_object(objects=output0['objects'], object_name="city street")
output2 = color_pop(image="2347302.jpg", object=output1['object'])

PRED

from PIL import Image
import io
from mnm.tool_api import image_segmentation, color_pop

# Load the image and convert it to a PIL image
image_path = '/root/input_data/2347302.jpg'
with open(image_path, 'rb') as file:
image_data = file.read()
image = Image.open(io.BytesIO(image_data))

# Segment the image
segmentation_result = image_segmentation(image)

# Apply the color pop effect to the image, keeping the street in color and the rest in grayscale
# Assuming the street segment is the first one in the list (for demonstration purposes)
street_segment = segmentation_result['objects'][0]
color_pop_result = color_pop(image, street_segment)

# Save the resulting image
output_image_path = '/root/playground/urban_planning_presentation.jpg'
color_pop_result['image'].save(output_image_path)

GT

image_segmentation
select_object
color_pop

PRED

image_segmentation
color_pop

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
14 4426 I came across a term - 'Juneteenth' in a book. To better comprehend the context, can I have a summarized information about 'Juneteenth' along with a visual depiction of it?

GT

output0 = wikipedia_simple_search(text="What is Juneteenth")
output1 = text_summarization(text=output0['text'])
output2 = image_generation(text=f"a visual depiction of {output1['text']}")

PRED

from mnm.tool_api import text_summarization, image_generation
summary_result = text_summarization("Juneteenth is a holiday celebrating the emancipation of those who had been enslaved in the United States. It is also known as Freedom Day or Emancipation Day. Please provide a summary of this event.")
summary_text = summary_result.get('text', 'No summary available')
visual_result = image_generation("A visual representation of Juneteenth, the holiday celebrating the emancipation of those who had been enslaved in the United States.")
visual_image = visual_result.get('image', None)

GT

wikipedia_simple_search
text_summarization
image_generation

PRED

text_summarization
image_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
12 1684 "I'm writing a sports education article and require an image to illustrate a snow ski long jump. Can you create an image depicting an athlete in the air during such a jump? I'd need the top part of the image mainly, as I want to focus on the action in the air. I'd also like to catalog all the distinct objects present in the image - could we identify those?"

GT

output0 = image_generation(text="an image depicting an athlete in the air during a snow ski long jump")
output1 = image_crop_top(image=output0['image'])
output2 = object_detection(image=output1['image'])

PRED

from mnm.tool_api import image_generation, object_detection
generated_image_result = image_generation(text="an athlete in the air during a snow ski long jump")
detected_objects_result = object_detection(image=generated_image_result['image'])

GT

image_generation
image_crop_top
object_detection

PRED

image_generation
object_detection

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
165 1878 I'm working on a graphic novel project and I have a picture referenced as 2395343.jpg. I need a condensed and intense description of what's going on in the picture to maintain the storyline's fast pace. Once the description is ready, could you create an image from your narrative? I want to see if the new image would better fit the novel's art style and mood.

GT

output0 = image_captioning(image="2395343.jpg")
output1 = text_summarization(text=output0['text'])
output2 = image_generation(text=f"an image created from the narrative of {output1['text']} that fits the graphic novel’s art style and mood")

PRED

from mnm.tool_api import image_captioning, image_generation
from PIL import Image
image_path = '/root/input_data/2395343.jpg'
image = Image.open(image_path)
caption_result = image_captioning(image)
generated_image_result = image_generation(caption_result['text'])
output_image_path = '/root/playground/generated_image.jpg'
with open(output_image_path, 'wb') as file:
file.write(generated_image_result['image'].tobytes())

GT

image_captioning
text_summarization
image_generation

PRED

image_captioning
image_generation

precision=100.0
recall=66.67
f1=80.0
80.00 trajectory
108 3199 I have an image (specifically, image 2336917.jpg) and I'm trying to figure out what place this could be. Once we identify the place, I'd like to generate a new panoramic image view of such place based on our findings from the original image. After the image generation, could we crop to the lower half of this new panoramic view? I am preparing a photographic content for a travel blog and I'm focusing on the architectural details commonly found at ground levels of the location.

GT

output0 = visual_question_answering(image="2336917.jpg", question="What place could this be?")
output1 = image_generation(text=f"a new panoramic image view of {output0['text']}")
output2 = image_crop_bottom(image=output1['image'])

PRED

from PIL import Image
from mnm.tool_api import image_classification, image_captioning, visual_question_answering, image_generation, image_crop_bottom
import io

# Load the image as a PIL Image object
image_path = '/root/input_data/2336917.jpg'
with open(image_path, 'rb') as file:
image_data = file.read()
image = Image.open(io.BytesIO(image_data))

# Classify the image to identify the place
classification_result = image_classification(image)

# Generate a descriptive caption of the image
caption_result = image_captioning(image)

# Ask a question about the location depicted in the image
location_question = "What is the name of this place?"
location_answer = visual_question_answering(image, location_question)

# Generate a panoramic image view of a generic train station
panoramic_description = "panoramic view of a train station"
panoramic_image_result = image_generation(panoramic_description)
panoramic_image = panoramic_image_result['image']

# Crop the lower half of the panoramic image
cropped_image_result = image_crop_bottom(panoramic_image)
cropped_image = cropped_image_result['image']

GT

visual_question_answering
image_generation
image_crop_bottom

PRED

image_classification
image_captioning
visual_question_answering
image_generation
image_crop_bottom

precision=60.0
recall=100.0
f1=75.0
75.00 trajectory
123 2645 I have a large picture named '357903-input.png' that features multiple pizzas and I would like to see how it looks with one of the pizzas removed. After this, I'm interested in focusing on the top part of this edited image, to capture only the remaining pizzas. Could you then please identify what type of food can be seen in this cropped section of the image?

GT

output0 = image_editing(image="357903-input.png", prompt="Remove one of the pizzas.")
output1 = image_crop_top(image=output0['image'])
output2 = image_classification(image=output1['image'])

PRED

from mnm.tool_api import image_segmentation, image_editing, image_crop_top, image_classification
segmentation_result = image_segmentation('/root/input_data/357903-input.png')
pizza_to_remove = next((obj for obj in segmentation_result['objects'] if obj['label'] == 'pizza'), None)
prompt = f"Remove the object at {pizza_to_remove['bbox']} from the image."
edited_image_result = image_editing('/root/input_data/357903-input.png', prompt)
cropped_image_result = image_crop_top(edited_image_result['image'])
classification_result = image_classification(cropped_image_result['image'])
food_type = classification_result['text']

GT

image_editing
image_crop_top
image_classification

PRED

image_segmentation
next
image_editing
image_crop_top
image_classification

precision=60.0
recall=100.0
f1=75.0
75.00 trajectory
55 936 "I recently heard about the movie '12 Years a Slave' from 2013. Can you find some information on it and provide a brief overview?"

GT

output0 = search_movie(movie_title="12 Years a Slave", movie_year="2013")
output1 = text_summarization(text=output0['text'])

PRED

from mnm.tool_api import search_movie
movie_info = search_movie("12 Years a Slave", "2013")

GT

search_movie
text_summarization

PRED

search_movie

precision=100.0
recall=50.0
f1=66.67
66.67 trajectory
141 3510 "I'm curious, can you provide me with an unique tidbit about the year 2022, then generate an illustration that brings to life that piece of trivia? To complete it, can you give me a detailed description of that image?"

GT

output0 = get_year_fact(year="2022")
output1 = image_generation(text=f"an illustration that brings to life {output0['text']}")
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import wikipedia_simple_search, image_generation, image_captioning
trivia_result = wikipedia_simple_search("2022")
illustration_result = image_generation(trivia_result['text'])
description_result = image_captioning(illustration_result['image'])

GT

get_year_fact
image_generation
image_captioning

PRED

wikipedia_simple_search
image_generation
image_captioning

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
50 1496 "I'm working on a project about animals and their habitats. Using a visualization where we have two brown and black birds staring on a curved wire would be helpful. Once you have generated that image, I would like to isolate the birds for further study. Please identify and highlight the bird objects in the image for me."

GT

output0 = image_generation(text="a visualization of two brown and black birds staring on a curved wire")
output1 = object_detection(image=output0['image'])
output2 = select_object(objects=output1['objects'], object_name="bird")

PRED

from mnm.tool_api import image_generation, object_detection, tag
output0 = image_generation(text='two brown and black birds staring on a curved wire')
output1 = object_detection(image=output0['image'])
output2 = tag(image=output0['image'], objects=output1['objects'])

GT

image_generation
object_detection
select_object

PRED

image_generation
object_detection
tag

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
196 3428 I'm curious to know more about the year 2023. Could you give me an interesting fact about that year and create a novel summary based on that fact? Then, from the summary, can you answer What was the main conflict in the novel inspired by the year 2023 fact?

GT

output0 = get_year_fact(year="2023")
output1 = text_generation(text=f"a novel summary based on the interesting fact about year 2023, which is {output0['text']}")
output2 = question_answering(text=output1['text'], question="What was the main conflict in the novel inspired by the year 2023 fact?")

PRED

from mnm.tool_api import get_year_fact, text_summarization, question_answering
output0 = get_year_fact("2023")
output1 = text_summarization(f"The Istanbul Canal is an ambitious project that is expected to be completed in the year 2023. {output0['text']}.")
output2 = question_answering("What was the main conflict in the novel inspired by the year 2023 fact?", output1['text'])

GT

get_year_fact
text_generation
question_answering

PRED

get_year_fact
text_summarization
question_answering

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
190 3209 "I have an image 2326465.jpg of a slab with some fruits and vegetables on it. Can you tell what fruit is present on the table? Based on your answer, could you then create a new image representing that specific fruit? Once the image is generated, can you then identify any text information that might be present in the newly created fruit image?"

GT

output0 = visual_question_answering(image="2326465.jpg", question="What is the name of the fruit on the table?")
output1 = image_generation(text=f"a new image representing the specific fruit identified as {output0['text']}")
output2 = optical_character_recognition(image=output1['image'])

PRED

from mnm.tool_api import object_detection, image_generation, optical_character_recognition
output0 = object_detection(image='/root/input_data/2326465.jpg')
output1 = image_generation(text=output0['objects'][0]['label'])
output2 = optical_character_recognition(image='/root/playground/generated_fruit_image.jpg')

GT

visual_question_answering
image_generation
optical_character_recognition

PRED

object_detection
image_generation
optical_character_recognition

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
43 2507 "I have an image, labeled as '362944-input.png'. In this image, I want the car to be recolored to white. After the changes are done, can you identify and count the number of separate parts the image consists of?"

GT

output0 = image_editing(image="362944-input.png", prompt="The car should be white.")
output1 = image_segmentation(image=output0['image'])
output2 = count(objects=output1['objects'])

PRED

from mnm.tool_api import image_editing, image_segmentation
edited_image_result = image_editing(image='/root/input_data/362944-input.png', prompt="Recolor the car to white.")
segmentation_result = image_segmentation(image=edited_image_result['image'])
number_of_parts = len(segmentation_result['objects'])

GT

image_editing
image_segmentation
count

PRED

image_editing
image_segmentation
len

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
101 355 "I'm writing a short children's story that involves some big red metal boxes by a road - can you create an image based on those elements for me? Only the right aspect of the image will be utilized for the book cover, to leave space for the title and other information."

GT

output0 = image_generation(text="an image of big red metal boxes by a road dedicated for a kids story")
output1 = image_crop_right(image=output0['image'])

PRED

from mnm.tool_api import image_generation
generated_image = image_generation(text="big red metal boxes by a road")

GT

image_generation
image_crop_right

PRED

image_generation

precision=100.0
recall=50.0
f1=66.67
66.67 trajectory
183 1501 I just ordered a painting based on the description {'text': 'this woman is sitting on a wood bench'}. Once it's generated, help me identify and highlight the wood bench in the artwork.

GT

output0 = image_generation(text="a painting depicting a woman sitting on a wood bench")
output1 = object_detection(image=output0['image'])
output2 = select_object(objects=output1['objects'], object_name="wood bench")

PRED

from mnm.tool_api import image_generation, object_detection, tag
output0 = image_generation(text='this woman is sitting on a wood bench')
output1 = object_detection(image=output0['image'])
wood_bench = next((obj for obj in output1['objects'] if 'bench' in obj['label'].lower()), None)
output2 = tag(image=output0['image'], objects=[wood_bench]) if wood_bench else None

GT

image_generation
object_detection
select_object

PRED

image_generation
object_detection
next

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
180 3056 "I have this image 2317766.jpg, and I want to confirm the color of the pen in the image. Following that confirmation, I'd love to generate an intriguing story, continuing from 'In the realm of knowledge, a black pen on a yellow book indicated...'. After that, I need to condense the story into an interesting, tight-knit summary that could appeal to readers."

GT

output0 = visual_question_answering(image="2317766.jpg", question="What color is the pen?")
output1 = text_generation(text=f"'In the realm of knowledge, a {output0['text']} pen on a yellow book indicated... and an intriguing continuation of this story")
output2 = text_summarization(text=output1['text'])

PRED

from mnm.tool_api import object_detection, text_generation, text_summarization
image_path = '/root/input_data/2317766.jpg'
detected_objects = object_detection(image_path)
story_prompt = "In the realm of knowledge, a black pen on a yellow book indicated..."
generated_story = text_generation(story_prompt)
full_story = story_prompt + " " + generated_story['text']
story_summary = text_summarization(full_story)

GT

visual_question_answering
text_generation
text_summarization

PRED

object_detection
text_generation
text_summarization

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
99 860 "I'm working on a school project about the significance of the number 4 throughout history and culture. Could you find an interesting fact about the number 4 and simplify it into a concise statement that would be easy for a 5th grader to understand?"

GT

output0 = get_trivia_fact(number="4")
output1 = text_summarization(text=output0['text'])

PRED

from mnm.tool_api import get_trivia_fact
fact_about_number_4 = get_trivia_fact('4')
simple_fact = "The number 4 is special because it's the same as the number of parts in the heart of animals like humans and cats!"

GT

get_trivia_fact
text_summarization

PRED

get_trivia_fact

precision=100.0
recall=50.0
f1=66.67
66.67 trajectory
47 2768 "In the image 2394635.jpg, a man is seen riding a skateboard up the side of a ramp. I want to create a vivid contrast that emphasizes the ramp. Can you modify the image in a way that everything else including the man and the skateboard is in black and white, while only the ramp remains colored?"

GT

output0 = image_segmentation(image="2394635.jpg")
output1 = select_object(objects=output0['objects'], object_name="ramp")
output2 = color_pop(image="2394635.jpg", object=output1['object'])

PRED

from mnm.tool_api import image_segmentation, color_pop
from PIL import Image
image_path = '/root/input_data/2394635.jpg'
image = Image.open(image_path)
segmentation_result = image_segmentation(image)
ramp = next((obj for obj in segmentation_result['objects'] if obj['label'] == 'platform'), None)
result = color_pop(image, ramp)
result_image = result['image']
result_image_path = '/root/playground/modified_2394635.jpg'
result_image.save(result_image_path)

GT

image_segmentation
select_object
color_pop

PRED

image_segmentation
next
color_pop

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
70 3686 "I would like to have a unique fact about the number '10' and create an image to depict this fact in a fun way. Once the initial image is generated, I would like if you can edit the image and include a group of ten animals in the foreground to personify the quantity 10 in nature."

GT

output0 = get_math_fact(number="10")
output1 = image_generation(text=f"a fun image depicting the unique fact about the number 10 from {output0['text']}")
output2 = image_editing(image=output1['image'], prompt="edit the image and include a group of ten animals in the foreground to personify the quantity 10 in nature")

PRED

from mnm.tool_api import get_trivia_fact, image_generation, image_editing
fact_about_number_10 = get_trivia_fact(number='10')
initial_image = image_generation(text=fact_about_number_10['text'])
edited_image_with_animals = image_editing(image=initial_image['image'], prompt="Include a group of ten animals in the foreground to personify the quantity 10 in nature.")

GT

get_math_fact
image_generation
image_editing

PRED

get_trivia_fact
image_generation
image_editing

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
64 3627 "I want to learn more about the number 3. Could you start by providing me an interesting fact about this number, and then write a caption or story that includes this fact? After that, could you produce an illustration that encapsulates this story or caption?"

GT

output0 = get_math_fact(number="3")
output1 = text_generation(text=f"a caption or story that includes the interesting fact: {output0['text']}")
output2 = image_generation(text=f"an illustration that encapsulates the story or caption: {output1['text']}")

PRED

from mnm.tool_api import get_trivia_fact, text_generation, image_generation
output0 = get_trivia_fact('3')
output1 = text_generation(f"Tell a story that includes the following fact: {output0['text']}.")
output2 = image_generation(output1['text'])

GT

get_math_fact
text_generation
image_generation

PRED

get_trivia_fact
text_generation
image_generation

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
73 3058 "I have a picture 2350057.jpg, Can you determine what is on the fence in this picture? After recognizing the object, could you create a fictional backstory that includes the object on the fence in it, and finally, sum it up in a few sentences for a quick look?”

GT

output0 = visual_question_answering(image="2350057.jpg", question="What is on the fence?")
output1 = text_generation(text=f"a fictional backstory that includes the object {output0['text']} from the fence in the image")
output2 = text_summarization(text=output1['text'])

PRED

from PIL import Image
from mnm.tool_api import object_detection, text_generation, text_summarization
image = Image.open('/root/input_data/2350057.jpg')
detected_objects = object_detection(image)
prompt = "Once upon a time, an elephant was sitting on a fence"
backstory = text_generation(prompt)
summary = text_summarization(backstory['text'])

GT

visual_question_answering
text_generation
text_summarization

PRED

object_detection
text_generation
text_summarization

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
76 2194 "I've taken a photograph of an object, saved under '23936.jpg'. I haven't been able to recognize the object in it. Could you tell me what's in this picture, develop a story around it and also provide a condensed version of the story for me?"

GT

output0 = image_classification(image="23936.jpg")
output1 = text_generation(text=f"a detailed story about the object identified as {output0['text']}")
output2 = text_summarization(text=output1['text'])

PRED

from PIL import Image
from mnm.tool_api import image_classification, image_captioning, text_summarization

image_path = '/root/input_data/23936.jpg'
image = Image.open(image_path)
classification_result = image_classification(image)
story_result = image_captioning(image)
summary_result = text_summarization(story_result['text'])

GT

image_classification
text_generation
text_summarization

PRED

image_classification
image_captioning
text_summarization

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
87 786 "Can you find me a fun fact about the year 2013 and then provide a brief summary of it?"

GT

output0 = get_year_fact(year="2013")
output1 = text_summarization(text=output0['text'])

PRED

from mnm.tool_api import get_year_fact
year_fact = get_year_fact("2013")

GT

get_year_fact
text_summarization

PRED

get_year_fact

precision=100.0
recall=50.0
f1=66.67
66.67 trajectory
148 2513 "I have an image '270650-input.png', and I want to add skis on the wheels in the photo. After that, can you divide the edited image into distinctive sections and tell me how many different parts there are?"

GT

output0 = image_editing(image="270650-input.png", prompt="put skis on the wheel")
output1 = image_segmentation(image=output0['image'])
output2 = count(objects=output1['objects'])

PRED

from mnm.tool_api import image_editing, image_segmentation
edited_image_result = image_editing(image='/root/input_data/270650-input.png', prompt="Add skis on the wheels in the photo.")
segmentation_result = image_segmentation(image=edited_image_result['image'])
number_of_parts = len(segmentation_result['objects'])

GT

image_editing
image_segmentation
count

PRED

image_editing
image_segmentation
len

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
150 3210 "Looking at image 2326465.jpg, can you identify the fruit on the table? Then, using the fruit you identified, can you generate a new image of a supermarket aisle dedicated to this fruit? Lastly, extract any text that is discernible within the newly generated image for me."

GT

output0 = visual_question_answering(image="2326465.jpg", question="What is the name of the fruit on the table?")
output1 = image_generation(text=f"a supermarket aisle dedicated to the {output0['text']}")
output2 = optical_character_recognition(image=output1['image'])

PRED

from mnm.tool_api import object_detection, image_generation, optical_character_recognition
output0 = object_detection(image='/root/input_data/2326465.jpg')
output1 = image_generation(text="a supermarket aisle dedicated to apples")
output2 = optical_character_recognition(image=output1['image'])

GT

visual_question_answering
image_generation
optical_character_recognition

PRED

object_detection
image_generation
optical_character_recognition

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
72 2815 I have an image 2347302.jpg featuring a public transit bus on a city street. I want to create an image where only the bus stands out in full color, while the rest of the image is in black and white so I can use it for a public transportation campaign.

GT

output0 = image_segmentation(image="2347302.jpg")
output1 = select_object(objects=output0['objects'], object_name="bus")
output2 = color_pop(image="2347302.jpg", object=output1['object'])

PRED

from mnm.tool_api import image_segmentation, color_pop
from PIL import Image

image_path = '/root/input_data/2347302.jpg'
image = Image.open(image_path)
segmentation_result = image_segmentation(image)
bus_object = next((obj for obj in segmentation_result['objects'] if obj['label'] == 'bus'), None)
color_pop_result = color_pop(image, bus_object)

GT

image_segmentation
select_object
color_pop

PRED

image_segmentation
next
color_pop

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
42 1456 I am creating a presentation about hiking and I'm currently picturing a scenario where two men are standing in some sand walking up a hill. After creating a visual representation of this using AI, I would like to have this image described to me. Additionally, I'd like to understand - from the image - if it's clear how prepared the hikers are for their journey. Please answer the question: Do the men in the image appear to be equipped for their hike?.

GT

output0 = image_generation(text="a visual representation of two men standing in some sand walking up a hill")
output1 = image_captioning(image=output0['image'])
output2 = question_answering(text=output1['text'], question="Do the men in the image appear to be equipped for their hike?")

PRED

from mnm.tool_api import image_generation, image_captioning, visual_question_answering
description = "two men standing in some sand walking up a hill"
generated_image = image_generation(text=description)
image_description = image_captioning(image=generated_image['image'])
question_about_preparedness = "Do the men in the image appear to be equipped for their hike?"
preparedness_answer = visual_question_answering(image=generated_image['image'], question=question_about_preparedness)

GT

image_generation
image_captioning
question_answering

PRED

image_generation
image_captioning
visual_question_answering

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
189 4235 I'm working on a digital art project around the movie "Moonlight" from 2016 and need some inspiration. Could you find out the basic information about the movie and use the details to generate an image that captures its essence? Afterwards, could you help me understand this image by breaking it down into different parts?

GT

output0 = search_movie(movie_title="Moonlight", movie_year="2016")
output1 = image_generation(text=f"an image that captures the essence of the movie Moonlight using details from {output0['text']}")
output2 = image_segmentation(image=output1['image'])

PRED

from mnm.tool_api import search_movie, image_generation, object_detection
output0 = search_movie("Moonlight", "2016")
output1 = image_generation(output0['text'])
output2 = object_detection(output1['image'])

GT

search_movie
image_generation
image_segmentation

PRED

search_movie
image_generation
object_detection

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
56 3474 I want a quick fact about the year 2018 and need some interesting insight from that. Can you provide this fact in a shortened form and tell me who was the significant person involved in that event ?

GT

output0 = get_year_fact(year="2018")
output1 = text_summarization(text=output0['text'])
output2 = question_answering(text=output1['text'], question=" who was the significant person involved in that event ?")

PRED

from mnm.tool_api import get_year_fact, text_summarization, wikipedia_simple_search
output0 = get_year_fact("2018")
output1 = text_summarization(output0['text'])
output2 = wikipedia_simple_search("2018")

GT

get_year_fact
text_summarization
question_answering

PRED

get_year_fact
text_summarization
wikipedia_simple_search

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
187 2268 I need help with '09966.jpg'. I want to understand what the main object is in this photo. Based on this assessment, I would like to create some new visuals focusing on the identified subject. Lastly, I would like to divide these created images into distinguishable sections, for further analysis. Can you help me do this?

GT

output0 = object_detection(image="09966.jpg")
output1 = image_generation(text=f"new visuals focusing on the identified subject - {output0['text']}")
output2 = image_segmentation(image=output1['image'])

PRED

from PIL import Image
from mnm.tool_api import object_detection, image_editing, image_segmentation
image_path = '/root/input_data/09966.jpg'
image = Image.open(image_path)
detection_result = object_detection(image)
prompt = "Focus on the cake"
edited_image_result = image_editing(detection_result['image'], prompt)
segmentation_result = image_segmentation(edited_image_result['image'])

GT

object_detection
image_generation
image_segmentation

PRED

object_detection
image_editing
image_segmentation

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
197 2240 Query: "Based on the content of 13181.jpg, can you identify the main subject, develop a refreshing depiction of the same subject, and formulate a descriptive narrative to correspond with the new artwork?"

GT

output0 = image_classification(image="13181.jpg")
output1 = image_generation(text=f"a refreshing depiction of {output0['text']}")
output2 = image_captioning(image=output1['image'])

PRED

from mnm.tool_api import image_classification, image_generation, text_generation
output0 = image_classification(image='/root/input_data/13181.jpg')
output1 = image_generation(text=output0['text'])
output2 = text_generation(text="Describe this image: " + output0['text'])

GT

image_classification
image_generation
image_captioning

PRED

image_classification
image_generation
text_generation

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
119 2802 I have this image labeled '2356182.jpg'. Could you analyze this picture, identify and calculate the number of independent objects present in it, and then provide me with a fun and interesting tidbit or fact about that number?

GT

output0 = image_segmentation(image="2356182.jpg")
output1 = count(objects=output0['objects'])
output2 = get_trivia_fact(number=output1['number'])

PRED

from mnm.tool_api import object_detection, count, get_trivia_fact
output0 = object_detection('/root/input_data/2356182.jpg')
output1 = count(output0['objects'])
output2 = get_trivia_fact(str(output1['number']))

GT

image_segmentation
count
get_trivia_fact

PRED

object_detection
count
get_trivia_fact

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
122 4381 I want a short and concise explanation of what "pansexual" is. Afterwards, can you expand upon this summary to provide more nuanced detail and possibly expand upon related concepts?

GT

output0 = wikipedia_simple_search(text="What is pansexual")
output1 = text_generation(text=f"Summarize the text: {output0['text']}")
output2 = text_generation(text=f"Expand upon the short and concise explanation of pansexual derived from {output1['text']} and provide more nuanced detail and possibly expand upon related concepts")

PRED

from mnm.tool_api import wikipedia_simple_search
summary_result = wikipedia_simple_search("Pansexual")
detailed_result_gender_identity = wikipedia_simple_search("Gender identity")
detailed_result_sexual_orientation = wikipedia_simple_search("Sexual orientation")

GT

wikipedia_simple_search
text_generation
text_generation

PRED

wikipedia_simple_search
wikipedia_simple_search
wikipedia_simple_search

precision=100.0
recall=50.0
f1=66.67
66.67 trajectory
124 2207 I am visiting the zoo and there was a prancing creature in an enclosure that I was not familiar with. I managed to take a photo of it. Now, since I didn't find any information board around that enclosure, I was wondering if you could help me learn more about it. Let's start with this, I have this picture named '17412.jpg'. Could you first identify the animal in that picture? Afterwards, could you generate an informative paragraph about that specific type of animal? Lastly, could you answer this question: What is the average lifespan of those animals in the wild?

GT

output0 = image_classification(image="17412.jpg")
output1 = text_generation(text=f"an informative paragraph about the {output0['text']}")
output2 = question_answering(text=output1['text'], question="What is the average lifespan of those animals in the wild?")

PRED

from PIL import Image
from mnm.tool_api import image_classification, text_generation, wikipedia_simple_search

# Load the image using PIL and classify it to identify the animal
image_path = '/root/input_data/17412.jpg'
image = Image.open(image_path)
classification_result = image_classification(image)
animal = classification_result['text']

# Generate an informative paragraph about the identified animal
animal_info = text_generation("pug dog")
informative_paragraph = animal_info['text']

# Retrieve information about the average lifespan of pugs in the wild
lifespan_info = wikipedia_simple_search("pug dog lifespan")
lifespan_paragraph = lifespan_info['text']

GT

image_classification
text_generation
question_answering

PRED

image_classification
text_generation
wikipedia_simple_search

precision=66.67
recall=66.67
f1=66.67
66.67 trajectory
69 3600 I'm giving a presentation about the number 5 and I want to impress the audience with uncommon knowledge about it. I'd need this information threaded together into a cohesive, yet interesting storyline. Finally, for my own notes, I would like a concise summary that captures all the main points. Could you assist in this process?

GT

output0 = get_math_fact(number="5")
output1 = text_generation(text=f"thread together a cohesive, yet interesting storyline about {output0['text']}")
output2 = text_generation(text=f"Summarize the text: {output1['text']}")

PRED

from mnm.tool_api import get_math_fact, get_trivia_fact
math_fact_about_5 = get_math_fact("5")
trivia_fact_about_5 = get_trivia_fact("5")
storyline = "The number 5 is not just a digit but a symbol of significance across various domains. In the realm of geometry, it represents the number of Platonic solids, which are the only convex polyhedra with equivalent faces composed of congruent convex regular polygons. These solids have been known since antiquity and were famously associated with the elements of the universe by the philosopher Plato. Moreover, the number 5 holds a special place in the world of sports and unity. It is symbolized by the five interlocked rings of the Olympic Games, which represent the number of inhabited continents brought together by the spirit of competition and camaraderie. These continents are often counted as North America and South America as one, along with Africa, Asia, Europe, and Oceania. The number 5, therefore, carries with it a legacy of mathematical perfection and global unity."
summary = "Number 5: - Symbolizes the number of Platonic solids in geometry, representing the elements of the universe. - Embodied in the Olympic Games' symbol, uniting the inhabited continents through sports."

GT

get_math_fact
text_generation
text_generation

PRED

get_math_fact
get_trivia_fact

precision=50.0
recall=50.0
f1=50.0
50.00 trajectory
134 4121 I'd like to find out some details about the movie '12 Years a Slave' that was released in 2013. Once you find the information, could you maybe summarize it for me? Additionally, I'm particularly interested in this aspect- Who was the director of the movie?, could you pull that out of the summary for me?

GT

output0 = search_movie(movie_title="12 Years a Slave", movie_year="2013")
output1 = text_summarization(text=output0['text'])
output2 = question_answering(text=output1['text'], question="Who was the director of the movie?")

PRED

from mnm.tool_api import search_movie
movie_info = search_movie(movie_title='12 Years a Slave', movie_year='2013')
director = movie_info.get('text', '').split('Director: ')[1].split('\n')[0]

GT

search_movie
text_summarization
question_answering

PRED

search_movie

precision=100.0
recall=33.33
f1=50.0
50.00 trajectory
169 4360 "Can you help me understand the concept of recession? Please perform a simple search and then expand on the topic, including the impact of recessions on common individuals. Based on this extensive information, please answer the question: What are the possible effects of a recession on the unemployment rate?."

GT

output0 = wikipedia_simple_search(text="What is a recession")
output1 = text_generation(text=f"an expansion on the topic of recession, including its impact on common individuals, based on {output0['text']}")
output2 = question_answering(text=output1['text'], question="What are the possible effects of a recession on the unemployment rate?")

PRED

from mnm.tool_api import wikipedia_simple_search
recession_info = wikipedia_simple_search("Recession")
# The variable recession_info contains the summary from Wikipedia about recession.
# Additional explanations about the impact on individuals and unemployment were provided in plain text.

GT

wikipedia_simple_search
text_generation
question_answering

PRED

wikipedia_simple_search

precision=100.0
recall=33.33
f1=50.0
50.00 trajectory
162 4373 I'm trying to get a grasp on what exactly a recession entails and how it impacts the economy. After you provide me with a basic understanding of 'What is a recession', can you expand on the major effects it has had on global economies in recent history? Following that, I'd like to get a visual representation of a recession's impact on the stock market.

GT

output0 = wikipedia_simple_search(text="What is a recession")
output1 = text_generation(text=f"a comprehensive understanding based on {output0['text']} and expansion on the major effects recessions have had on global economies in recent history")
output2 = image_generation(text=f"a visual representation of a recession's impact on the stock market based on {output1['text']}")

PRED

from mnm.tool_api import wikipedia_simple_search
output0 = wikipedia_simple_search("Recession")
output1 = wikipedia_simple_search("Effects of recessions on global economies")

GT

wikipedia_simple_search
text_generation
image_generation

PRED

wikipedia_simple_search
wikipedia_simple_search

precision=100.0
recall=33.33
f1=50.0
50.00 trajectory
128 626 "Could you please find the computer desk in the image 2387080.jpg?"

GT

output0 = object_detection(image="2387080.jpg")
output1 = select_object(objects=output0['objects'], object_name="computer desk")

PRED

from PIL import Image
from mnm.tool_api import object_detection
image = Image.open('/root/input_data/2387080.jpg')
detection_result = object_detection(image)
computer_desk = next((obj for obj in detection_result['objects'] if obj['label'].lower() == 'computer desk'), None)
alternative_labels = ['desk', 'table']
computer_desk = next((obj for obj in detection_result['objects'] if obj['label'].lower() in alternative_labels), None)

GT

object_detection
select_object

PRED

object_detection
next
next

precision=50.0
recall=50.0
f1=50.0
50.00 trajectory
81 812 "Could you give me an interesting fact about the number 5 and then create a short story illustrating that fact?"

GT

output0 = get_math_fact(number="5")
output1 = text_generation(text=f"a short story illustrating {output0['text']}")

PRED

from mnm.tool_api import get_trivia_fact, text_generation
output0 = get_trivia_fact("5")
output1 = text_generation("In a world where the number of Justices on the Supreme Court of the United States necessary to render a majority decision is 5, ")

GT

get_math_fact
text_generation

PRED

get_trivia_fact
text_generation

precision=50.0
recall=50.0
f1=50.0
50.00 trajectory
92 277 "I've been digging into the history of the Super Bowl and found this piece of information which says, 'The league eventually narrowed the bids to three sites: New Orleans' Mercedes-Benz Superdome, Miami's Sun Life Stadium, and the San Francisco Bay Area's Levi's Stadium.' From this text, can you clarify which venue in Louisiana was in the running for Super Bowl 50 and afterwards, it would be helpful if you generate a representative image of this venue."

GT

output0 = question_answering(text="The league eventually narrowed the bids to three sites: New Orleans' Mercedes-Benz Superdome, Miami's Sun Life Stadium, and the San Francisco Bay Area's Levi's Stadium.", question="Which Louisiana venue was one of three considered for Super Bowl 50?")
output1 = image_generation(text=f"a representative image of {output0['text']}")

PRED

from mnm.tool_api import wikipedia_simple_search, image_generation
output0 = wikipedia_simple_search("Mercedes-Benz Superdome")
output1 = image_generation("Mercedes-Benz Superdome")

GT

question_answering
image_generation

PRED

wikipedia_simple_search
image_generation

precision=50.0
recall=50.0
f1=50.0
50.00 trajectory
26 4406 I am working on a sociology project and I came across the term 'Woke', but I'm unsure about its exact connotation. Can you first find out 'What does woke mean' and then summarize it for me? Then, I would appreciate it if you could use that information to answer my question In what cultural context did 'Woke' originate?.

GT

output0 = wikipedia_simple_search(text="What does woke mean")
output1 = text_summarization(text=output0['text'])
output2 = question_answering(text=output1['text'], question="In what cultural context did 'Woke' originate?")

PRED

from mnm.tool_api import wikipedia_simple_search
woke_meaning = wikipedia_simple_search("Woke")
woke_origin = wikipedia_simple_search("Woke cultural origin")

GT

wikipedia_simple_search
text_summarization
question_answering

PRED

wikipedia_simple_search
wikipedia_simple_search

precision=100.0
recall=33.33
f1=50.0
50.00 trajectory
90 3267 "I journey each year on July 20th, my wedding anniversary. This time, I would like to impress my spouse with some interesting trivia related to our special day. But I'm not really looking for anything too lengthy, rather a short, precise version that I can use as a toast. After that, could you help me to build on this piece of trivia, expanding it into a romantic message that I can share with her?"

GT

output0 = get_date_fact(date="20-Jul")
output1 = text_generation(text=f"Summarize the text: {output0['text']}")
output2 = text_generation(text=f"a romantic message building on {output1['text']}")

PRED

from mnm.tool_api import get_trivia_fact, text_generation
trivia_fact = get_trivia_fact("20")
romantic_message = text_generation("Did you know? On our special day, July 20th, " + trivia_fact['text'] + ".")

GT

get_date_fact
text_generation
text_generation

PRED

get_trivia_fact
text_generation

precision=50.0
recall=50.0
f1=50.0
50.00 trajectory
66 2517 "I've got this picture '147546-input.png' and I'm wondering if you could assist with getting rid of all the bananas in it and replacing them with some grapes. After doing so, could you find grapes for me in the altered image? I need it for a project I'm currently working on."

GT

output0 = image_editing(image="147546-input.png", prompt="remove bananas and add grapes")
output1 = image_segmentation(image=output0['image'])
output2 = select_object(objects=output1['objects'], object_name="grapes")

PRED

from mnm.tool_api import image_editing, object_detection
from PIL import Image
import io

# Load the original image
input_image_path = '/root/input_data/147546-input.png'
with open(input_image_path, 'rb') as file:
input_image = Image.open(io.BytesIO(file.read()))

# Edit the image to replace bananas with grapes
edit_prompt = "Replace all bananas with grapes."
edited_image_result = image_editing(input_image, edit_prompt)

# Save the edited image to a temporary file and reload it
temp_image_path = '/root/playground/edited_image.png'
edited_image = edited_image_result['image']
edited_image.save(temp_image_path)
with open(temp_image_path, 'rb') as file:
reloaded_edited_image = Image.open(io.BytesIO(file.read()))

# Detect objects in the reloaded edited image
detection_result = object_detection(reloaded_edited_image)

GT

image_editing
image_segmentation
select_object

PRED

image_editing
object_detection

precision=50.0
recall=33.33
f1=40.0
40.00 trajectory
198 2511 I recently changed an image (319096-input.png) where I converted the piece of paper hanging on the wall into a mirror. Now, I want to verify how many clear segments, possibly representing different objects, can be identified in this newly edited image.

GT

output0 = image_editing(image="319096-input.png", prompt="Make the piece of paper hanging on the wall a mirror")
output1 = image_segmentation(image=output0['image'])
output2 = count(objects=output1['objects'])

PRED

from mnm.tool_api import image_segmentation
from PIL import Image
image_path = '/root/input_data/319096-input.png'
image = Image.open(image_path)
segmentation_result = image_segmentation(image)
number_of_segments = len(segmentation_result['objects'])

GT

image_editing
image_segmentation
count

PRED

image_segmentation
len

precision=50.0
recall=33.33
f1=40.0
40.00 trajectory
86 1321 I'm reading a text on computational theory and it states "Of course, some complexity classes have complicated definitions that do not fit into this framework. Thus, a typical complexity class has a definition like the following." I'd like to understand which part has complicated definitions that cannot be classified into a framework. Could we dive deeper into Wikipedia to get some context and then generate a brief explanatory note on this topic?

GT

output0 = question_answering(text="Of course, some complexity classes have complicated definitions that do not fit into this framework. Thus, a typical complexity class has a definition like the following:", question="What has complicated definitions that prevent classification into a framework?")
output1 = wikipedia_simple_search(text=output0['text'])
output2 = text_generation(text=f"a brief explanatory note on {output1['text']}")

PRED

from mnm.tool_api import wikipedia_simple_search, text_summarization
wikipedia_result = wikipedia_simple_search("complexity classes in computational theory")
summary_result = text_summarization(wikipedia_result['text'])

GT

question_answering
wikipedia_simple_search
text_generation

PRED

wikipedia_simple_search
text_summarization

precision=50.0
recall=33.33
f1=40.0
40.00 trajectory
89 4367 I'm curious about the details of Elvis Presley's death. Can you find some reliable information on how did Elvis die and summarize it for me? Then, could you also generate a brief narrative describing the circumstances of his passing and how it affected the world of music? Lastly, I'd appreciate if you could create a visual representation, an image that captures the essence of that narrative.

GT

output0 = wikipedia_simple_search(text="How did Elvis die")
output1 = text_generation(text=f"a summary and a brief narrative on {output0['text']} and its impact on the world of music")
output2 = image_generation(text=f"a visual representation capturing the essence of {output1['text']}")

PRED

from mnm.tool_api import wikipedia_simple_search, text_summarization, image_generation

# Attempt to retrieve information about Elvis Presley's death from Wikipedia
output0 = wikipedia_simple_search("Elvis Presley death")

# Attempt to refine the search to get specific information about Elvis Presley's death
output1 = wikipedia_simple_search("Death of Elvis Presley")

# Attempt to refine the search further to get detailed information about Elvis Presley's death
output2 = wikipedia_simple_search("Elvis Presley death details")

# Use text summarization on a more detailed search query to try to extract relevant information
output3 = text_summarization("Elvis Presley death details")

# Try a different search query to get the cause of Elvis Presley's death
output4 = wikipedia_simple_search("Elvis Presley cause of death")

GT

wikipedia_simple_search
text_generation
image_generation

PRED

wikipedia_simple_search
wikipedia_simple_search
wikipedia_simple_search
text_summarization
wikipedia_simple_search

precision=50.0
recall=33.33
f1=40.0
40.00 trajectory
132 1274 "Hi, I've been studying about chloroplasts and I'm a bit confused about a term. In the text: 'These chloroplasts, which can be traced back directly to a cyanobacterial ancestor, are known as primary plastids ("plastid" in this context means almost the same thing as chloroplast). All primary chloroplasts belong to one of three chloroplast lineages—the glaucophyte chloroplast lineage, the rhodophyte, or red algal chloroplast lineage, or the chloroplastidan, or green chloroplast lineage. The second two are the largest, and the green chloroplast lineage is the one that contains the land plants', it mentions the term 'plastid'. Could you explain to me what this means? Afterwards, I'm looking for a depiction of this 'plastid', but would like to have only the left segment of the image for my presentation.

GT

output0 = question_answering(text="These chloroplasts, which can be traced back directly to a cyanobacterial ancestor, are known as primary plastids (\"plastid\" in this context means almost the same thing as chloroplast). All primary chloroplasts belong to one of three chloroplast lineages—the glaucophyte chloroplast lineage, the rhodophyte, or red algal chloroplast lineage, or the chloroplastidan, or green chloroplast lineage. The second two are the largest, and the green chloroplast lineage is the one that contains the land plants.", question="What does 'plastid' mean?")
output1 = image_generation(text=f"a depiction of {output0['text']}")
output2 = image_crop_left(image=output1['image'])

PRED

from mnm.tool_api import image_captioning, image_crop_left
# The user needs to provide the correct file path for the image of a plastid.
image_of_plastid = "correct_file_path.png" # The user should replace this with the actual file path.
caption_result = image_captioning(image_of_plastid)
left_segment_result = image_crop_left(image_of_plastid)

GT

question_answering
image_generation
image_crop_left

PRED

image_captioning
image_crop_left

precision=50.0
recall=33.33
f1=40.0
40.00 trajectory
54 2795 "Using the image 2321647.jpg, is it possible to identify the total number of separate elements within the image and share a fascinating mathematical trivia associated with this count?"

GT

output0 = image_segmentation(image="2321647.jpg")
output1 = count(objects=output0['objects'])
output2 = get_math_fact(number=output1['number'])

PRED

from PIL import Image
import io
from mnm.tool_api import object_detection, count, get_trivia_fact

# Load the image as a PIL image
image_path = '/root/input_data/2321647.jpg'
with open(image_path, 'rb') as file:
image_data = Image.open(io.BytesIO(file.read()))

# Detect objects in the image
detection_result = object_detection(image_data)

# Count the number of objects detected
count_result = count(detection_result['objects'])
object_count = count_result['number']

# Retrieve a trivia fact for the number of objects detected
trivia_result = get_trivia_fact(str(object_count))
trivia_fact = trivia_result.get('text', 'No trivia found for this number.')

GT

image_segmentation
count
get_math_fact

PRED

object_detection
count
get_trivia_fact

precision=33.33
recall=33.33
f1=33.33
33.33 trajectory
8 140 "Can you tell me an interesting fact about the number 9? I want to surprise my children with it as they are learning about it."

GT

output0 = get_math_fact(number="9")

PRED

from mnm.tool_api import get_trivia_fact
output0 = get_trivia_fact('9')

GT

get_math_fact

PRED

get_trivia_fact

precision=0.0
recall=0.0
f1=0.0
0.00 trajectory
185 3651 "Can you provide me with an interesting tidbit about the number 0, then summarize it in a simple manner and tell me if the overall tone towards the number 0 depicted in the information is positive or negative?"

GT

output0 = get_math_fact(number="0")
output1 = text_summarization(text=output0['text'])
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import get_trivia_fact
fact_about_zero = get_trivia_fact('0')
summarized_fact = "0 is the atomic number for a hypothetical element called tetraneutron."
tone_of_information = "neutral"

GT

get_math_fact
text_summarization
text_classification

PRED

get_trivia_fact

precision=0.0
recall=0.0
f1=0.0
0.00 trajectory
103 2091 "I have this image COCO_train2014_000000113236.jpg. I need to extract the main text from that, then create a new, unique image based on the extracted text. Lastly, I need to know under which category the newly generated image falls into. Can you handle that?"

GT

output0 = optical_character_recognition(image="COCO_train2014_000000113236.jpg")
output1 = image_generation(text=f"a new, unique image based on {output0['text']}")
output2 = image_classification(image=output1['image'])

PRED

from mnm.tool_api import optical_character_recognition, image_generation, image_classification
extracted_text = optical_character_recognition("/root/input_data/COCO_train2014_000000113236.jpg")['text']
generated_image = image_generation(extracted_text)['image']
image_category = image_classification(generated_image)['text']

GT

optical_character_recognition
image_generation
image_classification

PRED

None

precision=0.0
recall=0.0
f1=0.0
0.00 trajectory
161 129 "Can you tell me any unique trivia or fact associated with the date 24th-August? I am a content creator and I am thinking of creating a post themed around 'Fact of the Day' for my followers."

GT

output0 = get_date_fact(date="24-Aug")

PRED

from datetime import datetime
from mnm.tool_api import get_trivia_fact
date_string = "2023-08-24"
date_format = "%Y-%m-%d"
date_obj = datetime.strptime(date_string, date_format)
day_of_year = date_obj.timetuple().tm_yday
output0 = get_trivia_fact(str(day_of_year))

GT

get_date_fact

PRED

get_trivia_fact

precision=0.0
recall=0.0
f1=0.0
0.00 trajectory
145 3839 "Hey, I've always wondered about the significance of the number 0. Can you tell me an interesting fact about it, brief it down and then categorize whether it's a positive or negative fact?"

GT

output0 = get_trivia_fact(number="0")
output1 = text_summarization(text=output0['text'])
output2 = text_classification(text=output1['text'])

PRED

from mnm.tool_api import get_math_fact
fact_about_zero = get_math_fact('0')
summary = fact_about_zero.get('text', 'No fact found.')
category = "neutral"

GT

get_trivia_fact
text_summarization
text_classification

PRED

get_math_fact

precision=0.0
recall=0.0
f1=0.0
0.00 trajectory