{
"dataset_name": <dataset name (could be identical to paper title)>,
"title": <paper title>,
"paper_link": <paper link>,
"data_link": <link to download the data>, # If your data is not public, don't include this field
"motivation": {'task-oriented (target language)','cross-lingual transfer','task-oriented (multilingual)''multi-task (target language)'}, "task_type": {'classification (sentiment analyis)', 'classification (sentence pair)', 'classification (other)', 'QA (w/ retrieval)', 'QA (machine reading)', 'structured prediction', 'sequence tagging', 'generation (summarization)', 'generation (other)', 'other'},
"has_train_data": {'YES'/'NO'},
"size": {'<100'/'100~1000'/'1000~10K'/'>10K'},
"input_data_source": {'annotated (authors, linguists)', 'commercial sources', 'crowdsourced', 'curated linguistic resources (wordnet, etc)', 'curated source (exams, scientific papers.etc)', 'media', 'template-based', 'web', 'Wikipedia'}, # If your data has multiple input sources, please use '&' to connect all of them
"original_language": {'English', 'in its own language', 'both'}
"translation": {'YES'/'NO'},
"label_source": {'Annotated (authors, linguists)', 'Automatically induced', 'Crowdsourced','Derived from linguistic resources (wordnet, etc)'}, # If your data has multiple label sources, please use '&' to connect all of them
"publication_year": <public year>,
"languages": {all languages in ISO 639-1 Language Code},
"published_venue": <published venue>,
"reused_dataset": {'YES (English)' / 'YES (other language)' / 'NO'},
"creators": {'combination of university and industry', 'university', 'industry', 'individual researchers'},
"citations": <your current citation>,
"in_huggingface": {'YES'/'NO'}
}