Skip to content

Commit 099a25e

Browse files
merveenoyanpcuenca
andauthored
Add new models, datasets, videos to task pages (#836)
--------- Co-authored-by: Pedro Cuenca <pedro@huggingface.co>
1 parent 092d010 commit 099a25e

File tree

9 files changed

+44
-30
lines changed

9 files changed

+44
-30
lines changed

packages/tasks/src/tasks/feature-extraction/data.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,13 @@ const taskData: TaskDataCustom = {
4242
],
4343
spaces: [
4444
{
45-
description: "A leaderboard to rank best feature extraction models..",
45+
description: "A leaderboard to rank text feature extraction models based on a benchmark.",
4646
id: "mteb/leaderboard",
4747
},
48+
{
49+
description: "A leaderboard to rank best feature extraction models based on human feedback.",
50+
id: "mteb/arena",
51+
},
4852
],
4953
summary: "Feature extraction is the task of extracting features learnt in a model.",
5054
widgetModels: ["facebook/bart-base"],

packages/tasks/src/tasks/image-feature-extraction/data.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,20 @@ const taskData: TaskDataCustom = {
3636
},
3737
{
3838
description: "A strong image feature extraction model.",
39-
id: "google/vit-base-patch16-224-in21k",
39+
id: "nvidia/MambaVision-T-1K",
4040
},
4141
{
42-
description: "A robust image feature extraction models.",
42+
description: "A robust image feature extraction model.",
4343
id: "facebook/dino-vitb16",
4444
},
4545
{
46-
description: "Strong image-text-to-text model made for information retrieval from documents.",
46+
description: "Strong image feature extraction model made for information retrieval from documents.",
4747
id: "vidore/colpali",
4848
},
49+
{
50+
description: "Strong image feature extraction model that can be used on images and documents.",
51+
id: "OpenGVLab/InternViT-6B-448px-V1-2",
52+
},
4953
],
5054
spaces: [],
5155
summary: "Image feature extraction is the task of extracting features learnt in a computer vision model.",

packages/tasks/src/tasks/image-segmentation/data.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,16 @@ const taskData: TaskDataCustom = {
4848
id: "facebook/detr-resnet-50-panoptic",
4949
},
5050
{
51-
description: "Semantic segmentation model trained on ADE20k benchmark dataset.",
52-
id: "microsoft/beit-large-finetuned-ade-640-640",
51+
description: "Background removal model.",
52+
id: "briaai/RMBG-1.4",
5353
},
5454
{
5555
description: "Semantic segmentation model trained on ADE20k benchmark dataset with 512x512 resolution.",
5656
id: "nvidia/segformer-b0-finetuned-ade-512-512",
5757
},
5858
{
59-
description: "Semantic segmentation model trained Cityscapes dataset.",
60-
id: "facebook/mask2former-swin-large-cityscapes-semantic",
59+
description: "A multipurpose image segmentation model for high resolution images.",
60+
id: "ZhengPeng7/BiRefNet",
6161
},
6262
{
6363
description: "Panoptic segmentation model trained COCO (common objects) dataset.",

packages/tasks/src/tasks/image-text-to-text/about.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,5 @@ print(processor.decode(output[0], skip_special_tokens=True))
7272
- [Vision Language Models Explained](https://huggingface.co/blog/vlms)
7373
- [Open-source Multimodality and How to Achieve it using Hugging Face](https://www.youtube.com/watch?v=IoGaGfU1CIg&t=601s)
7474
- [Introducing Idefics2: A Powerful 8B Vision-Language Model for the community](https://huggingface.co/blog/idefics2)
75+
- [Image-text-to-text task guide](https://huggingface.co/tasks/image-text-to-text)
76+
- [Preference Optimization for Vision Language Models with TRL](https://huggingface.co/blog/dpo_vlm)

packages/tasks/src/tasks/image-text-to-text/data.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ const taskData: TaskDataCustom = {
8888
summary:
8989
"Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.",
9090
widgetModels: ["microsoft/kosmos-2-patch14-224"],
91-
youtubeId: "",
91+
youtubeId: "IoGaGfU1CIg",
9292
};
9393

9494
export default taskData;

packages/tasks/src/tasks/image-to-image/data.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ const taskData: TaskDataCustom = {
4545
],
4646
models: [
4747
{
48-
description: "A model that enhances images captured in low light conditions.",
49-
id: "keras-io/low-light-image-enhancement",
48+
description: "An image-to-image model to improve image resolution.",
49+
id: "fal/AuraSR-v2",
5050
},
5151
{
5252
description: "A model that increases the resolution of an image.",

packages/tasks/src/tasks/mask-generation/data.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,22 @@ const taskData: TaskDataCustom = {
2424
},
2525
{
2626
description: "Very strong mask generation model.",
27-
id: "facebook/sam-vit-huge",
27+
id: "facebook/sam2-hiera-large",
2828
},
2929
],
3030
spaces: [
3131
{
3232
description:
33-
"An application that combines a mask generation model with an image embedding model for open-vocabulary image segmentation.",
34-
id: "SkalskiP/SAM_and_MetaCLIP",
33+
"An application that combines a mask generation model with a zero-shot object detection model for text-guided image segmentation.",
34+
id: "merve/OWLSAM2",
3535
},
3636
{
3737
description: "An application that compares the performance of a large and a small mask generation model.",
3838
id: "merve/slimsam",
3939
},
4040
{
4141
description: "An application based on an improved mask generation model.",
42-
id: "linfanluntan/Grounded-SAM",
42+
id: "SkalskiP/segment-anything-model-2",
4343
},
4444
{
4545
description: "An application to remove objects from videos using mask generation models.",

packages/tasks/src/tasks/text-generation/data.ts

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ const taskData: TaskDataCustom = {
1919
description: "An instruction dataset with preference ratings on responses.",
2020
id: "openbmb/UltraFeedback",
2121
},
22+
{
23+
description: "A large synthetic dataset for alignment of text generation models.",
24+
id: "argilla/magpie-ultra-v0.1",
25+
},
2226
],
2327
demo: {
2428
inputs: [
@@ -51,32 +55,32 @@ const taskData: TaskDataCustom = {
5155
],
5256
models: [
5357
{
54-
description: "A large language model trained for text generation.",
55-
id: "bigscience/bloom-560m",
58+
description: "A text-generation model trained to follow instructions.",
59+
id: "google/gemma-2-2b-it",
5660
},
5761
{
58-
description: "A large code generation model that can generate code in 80+ languages.",
62+
description: "A code generation model that can generate code in 80+ languages.",
5963
id: "bigcode/starcoder",
6064
},
6165
{
62-
description: "A very powerful text generation model.",
63-
id: "mistralai/Mixtral-8x7B-Instruct-v0.1",
66+
description: "Very powerful text generation model trained to follow instructions.",
67+
id: "meta-llama/Meta-Llama-3.1-8B-Instruct",
6468
},
6569
{
6670
description: "Small yet powerful text generation model.",
67-
id: "microsoft/phi-2",
71+
id: "microsoft/Phi-3-mini-4k-instruct",
6872
},
6973
{
70-
description: "A very powerful model that can chat, do mathematical reasoning and write code.",
71-
id: "openchat/openchat-3.5-0106",
74+
description: "A very powerful model that can solve mathematical problems.",
75+
id: "AI-MO/NuminaMath-7B-TIR",
7276
},
7377
{
74-
description: "Very strong yet small assistant model.",
75-
id: "HuggingFaceH4/zephyr-7b-beta",
78+
description: "Strong coding assistant model.",
79+
id: "HuggingFaceH4/starchat2-15b-v0.1",
7680
},
7781
{
7882
description: "Very strong open-source large language model.",
79-
id: "meta-llama/Llama-2-70b-hf",
83+
id: "mistralai/Mistral-Nemo-Instruct-2407",
8084
},
8185
],
8286
spaces: [
@@ -104,7 +108,7 @@ const taskData: TaskDataCustom = {
104108
summary:
105109
"Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
106110
widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
107-
youtubeId: "Vpjb1lu0MDk",
111+
youtubeId: "e9gNEAlsOvU",
108112
};
109113

110114
export default taskData;

packages/tasks/src/tasks/text-to-image/data.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,15 @@ const taskData: TaskDataCustom = {
4646
models: [
4747
{
4848
description: "One of the most powerful image generation models that can generate realistic outputs.",
49-
id: "stabilityai/stable-diffusion-xl-base-1.0",
49+
id: "black-forest-labs/FLUX.1-dev",
5050
},
5151
{
5252
description: "A powerful yet fast image generation model.",
5353
id: "latent-consistency/lcm-lora-sdxl",
5454
},
5555
{
56-
description: "A very fast text-to-image model.",
57-
id: "ByteDance/SDXL-Lightning",
56+
description: "Text-to-image model for photorealistic generation.",
57+
id: "Kwai-Kolors/Kolors",
5858
},
5959
{
6060
description: "A powerful text-to-image model.",

0 commit comments

Comments
 (0)