dc.contributor.author | Savary, Agata |
dc.contributor.author | Ramisch, Carlos |
dc.contributor.author | Cordeiro, Silvio Ricardo |
dc.contributor.author | Sangati, Federico |
dc.contributor.author | Vincze, Veronika |
dc.contributor.author | QasemiZadeh, Behrang |
dc.contributor.author | Candito, Marie |
dc.contributor.author | Cap, Fabienne |
dc.contributor.author | Giouli, Voula |
dc.contributor.author | Stoyanova, Ivelina |
dc.contributor.author | Doucet, Antoine |
dc.contributor.author | Adalı, Kübra |
dc.contributor.author | Barbu Mititelu, Verginica |
dc.contributor.author | Bejček, Eduard |
dc.contributor.author | El Maarouf, Ismail |
dc.contributor.author | Eryiğit, Gülşen |
dc.contributor.author | Galea, Luke |
dc.contributor.author | Ha-Cohen Kerner, Yaakov |
dc.contributor.author | Liebeskind, Chaya |
dc.contributor.author | Monti, Johanna |
dc.contributor.author | Parra Escartín, Carla |
dc.contributor.author | Kovalevskaitė, Jolanta |
dc.contributor.author | Krek, Simon |
dc.contributor.author | van der Plas, Lonneke |
dc.contributor.author | Aceta, Cristina |
dc.contributor.author | Aduriz, Itziar |
dc.contributor.author | Antoine, Jean-Yves |
dc.contributor.author | Attard, Greta |
dc.contributor.author | Azzopardi, Kirsty |
dc.contributor.author | Boizou, Loic |
dc.contributor.author | Bonnici, Janice |
dc.contributor.author | Boz, Mert |
dc.contributor.author | Bumbulienė, Ieva |
dc.contributor.author | Busuttil, Jael |
dc.contributor.author | Caruso, Valeria |
dc.contributor.author | Cherchi, Manuela |
dc.contributor.author | Constant, Matthieu |
dc.contributor.author | Czerepowicka, Monika |
dc.contributor.author | De Santis, Anna |
dc.contributor.author | Dimitrova, Tsvetana |
dc.contributor.author | Dinç, Tutkum |
dc.contributor.author | Elyovich, Hevi |
dc.contributor.author | Fabri, Ray |
dc.contributor.author | Farrugia, Alison |
dc.contributor.author | Findlay, Jamie |
dc.contributor.author | Fotopoulou, Aggeliki |
dc.contributor.author | Foufi, Vassiliki |
dc.contributor.author | Galea, Sara Anne |
dc.contributor.author | Gantar, Polona |
dc.contributor.author | Gatt, Albert |
dc.contributor.author | Gatt, Anabelle |
dc.contributor.author | Herrero, Carlos |
dc.contributor.author | Iñurrieta, Uxoa |
dc.contributor.author | Jagfeld, Glorianna |
dc.contributor.author | Hnátková, Milena |
dc.contributor.author | Ionescu, Mihaela |
dc.contributor.author | Klyueva, Natalia |
dc.contributor.author | Koeva, Svetla |
dc.contributor.author | Kovács, Viktória |
dc.contributor.author | Kuzman, Taja |
dc.contributor.author | Leseva, Svetlozara |
dc.contributor.author | Louisou, Sevi |
dc.contributor.author | Lynn, Teresa |
dc.contributor.author | Malka, Ruth |
dc.contributor.author | Martínez Alonso, Héctor |
dc.contributor.author | McCrae, John |
dc.contributor.author | de Medeiros Caseli, Helena |
dc.contributor.author | Miral, Ayşenur |
dc.contributor.author | Muscat, Amanda |
dc.contributor.author | Nivre, Joakim |
dc.contributor.author | Oakes, Michael |
dc.contributor.author | Onofrei, Mihaela |
dc.contributor.author | Parmentier, Yannick |
dc.contributor.author | Pasquer, Caroline |
dc.contributor.author | Pia di Buono, Maria |
dc.contributor.author | Priego Sanchez, Belem |
dc.contributor.author | Raffone, Annalisa |
dc.contributor.author | Ramisch, Renata |
dc.contributor.author | Rimkutė, Erika |
dc.contributor.author | Rizea, Monica-Mihaela |
dc.contributor.author | Simkó, Katalin |
dc.contributor.author | Spagnol, Michael |
dc.contributor.author | Stefanova, Valentina |
dc.contributor.author | Stymne, Sara |
dc.contributor.author | Sulubacak, Umut |
dc.contributor.author | Tabone, Nicole |
dc.contributor.author | Tanti, Marc |
dc.contributor.author | Todorova, Maria |
dc.contributor.author | Urešová, Zdenka |
dc.contributor.author | Villavicencio, Aline |
dc.contributor.author | Zilio, Leonardo |
dc.date.accessioned | 2017-06-20T08:39:36Z |
dc.date.available | 2017-06-20T08:39:36Z |
dc.date.issued | 2017-01-20 |
dc.identifier.uri | http://hdl.handle.net/11372/LRT-2282 |
dc.description | The PARSEME shared task aims at identifying verbal MWEs in running texts. Verbal MWEs include idioms (let the cat out of the bag), light verb constructions (make a decision), verb-particle constructions (give up), and inherently reflexive verbs (se suicider 'to suicide' in French). VMWEs were annotated according to the universal guidelines in 18 languages. The corpora are provided in the parsemetsv format, inspired by the CONLL-U format. For most languages, paired files in the CONLL-U format - not necessarily using UD tagsets - containing parts of speech, lemmas, morphological features and/or syntactic dependencies are also provided. Depending on the language, the information comes from treebanks (e.g., Universal Dependencies) or from automatic parsers trained on treebanks (e.g., UDPipe). This item contains training and test data, tools and the universal guidelines file. |
dc.language.iso | bul |
dc.language.iso | ces |
dc.language.iso | deu |
dc.language.iso | ell |
dc.language.iso | spa |
dc.language.iso | fas |
dc.language.iso | fra |
dc.language.iso | heb |
dc.language.iso | hun |
dc.language.iso | ita |
dc.language.iso | lit |
dc.language.iso | mlt |
dc.language.iso | pol |
dc.language.iso | por |
dc.language.iso | ron |
dc.language.iso | slv |
dc.language.iso | swe |
dc.language.iso | tur |
dc.publisher | PARSEME |
dc.relation.isreferencedby | http://multiword.sourceforge.net/mwe2017/proceedings/MWE201704.pdf |
dc.relation.isreplacedby | http://hdl.handle.net/11372/LRT-2842 |
dc.rights | PARSEME Shared Task Data (v. 1.0) Agreement |
dc.rights.uri | https://lindat.mff.cuni.cz/repository/xmlui/page/licence-mwe-1.0 |
dc.source.uri | http://multiword.sf.net/sharedtask2017 |
dc.subject | Multiword expressions |
dc.subject | verbal multiword expressions |
dc.subject | idioms |
dc.subject | light-verb constructions |
dc.subject | verb-particle constructions |
dc.subject | inherently reflexive verbs |
dc.title | Annotated corpora and tools of the PARSEME Shared Task on Automatic Identification of Verbal Multiword Expressions (edition 1.0) |
dc.type | corpus |
metashare.ResourceInfo#ContentInfo.mediaType | text |
dc.rights.label | PUB |
has.files | yes |
branding | LRT + Open Submissions |
contact.person | Agata Savary agata.savary@univ-tours.fr Université François Rabelais de Tours |
contact.person | Carlos Ramisch carlos.ramisch@lif.univ-mrs.fr Aix Marseille Université |
contact.person | Natalia Klyueva kljueva@ufal.mff.cuni.cz Charles University in Prague, UFAL |
sponsor | COST IC1207 PARSEME: PARSing and Multi-word Expressions euFunds |
size.info | 274376 sentences |
size.info | 5439204 tokens |
size.info | 62218 multiWordUnits |
files.size | 67372060 |
files.count | 21 |
featuredService.kontext | Czech|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_cs_a |
featuredService.kontext | German|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_de_a |
featuredService.kontext | Greek|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_el_a |
featuredService.kontext | Spanish|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_es_a |
featuredService.kontext | Persian (Farsi)|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_fa_a |
featuredService.kontext | French|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_fr_a |
featuredService.kontext | Hungarian|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_hu_a |
featuredService.kontext | Italian|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_it_a |
featuredService.kontext | Maltese|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_mt_a |
featuredService.kontext | Polish|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_pl_a |
featuredService.kontext | Portuguese|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_pt_a |
featuredService.kontext | Romanian|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_ro_a |
featuredService.kontext | Slovenian|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_sl_a |
featuredService.kontext | Swedish|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_sv_a |
featuredService.kontext | Turkish|http://lindat.mff.cuni.cz/services/kontext/first_form?corpname=parseme_tr_a |
Files in this item
Download all files in item (64.25 MB)- Name
- README.md
- Size
- 2.67 KB
- Format
- Unknown
- Description
- Unknown
- MD5
- 3b65e76fcb453f3dbe570240b4a0ca3a
- Name
- Annotation_guidelines_PARSEME_Shared_Task_1.0.pdf
- Size
- 608.46 KB
- Format
- Description
- common annotation guidelines
- MD5
- 7efe5547bd0d85cd3f341f0125a35a6c
- Name
- Description_paper_PARSEME_Shared_Task_1.0.pdf
- Size
- 278.76 KB
- Format
- Description
- an article describing the creation of the data and its use in the PARSEME shared task
- MD5
- 6947539d298d53bbcd9024437bd29939
- Name
- BG.tgz
- Size
- 959.28 KB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- b29b32b039d4b7cfafc02569a9e90dcd
- BG
- train.parsemetsv2 MB
- stats.md287 B
- test.parsemetsv716 kB
- README.md1 kB
- test.blind.parsemetsv713 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- Name
- CS.tgz
- Size
- 10.56 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- b97b0f5bed1ed94f096be4150ee68049
- CS
- stats.md294 B
- train.parsemetsv9 MB
- test.parsemetsv1 MB
- README.md1 kB
- test.blind.parsemetsv1 MB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu6 MB
- train.conllu49 MB
- Name
- DE.tgz
- Size
- 2.09 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 94ea5c3f074b783a090946e9e7e208ce
- DE
- train.parsemetsv1 MB
- stats.md318 B
- test.parsemetsv310 kB
- README.md2 kB
- test.blind.parsemetsv309 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu1 MB
- train.conllu6 MB
- Name
- EL.tgz
- Size
- 3.59 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 0956143f60ed16a0c01c4ec87e6c07f3
- EL
- train.parsemetsv2 MB
- stats.md294 B
- test.parsemetsv1 MB
- README.md2 kB
- test.blind.parsemetsv1 MB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu5 MB
- train.conllu10 MB
- Name
- ES.tgz
- Size
- 2.16 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- e1ae9704c3608f78cf57e09bb9b165dd
- ES
- stats.md299 B
- train.parsemetsv1 MB
- test.parsemetsv749 kB
- README.md2 kB
- test.blind.parsemetsv747 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu3 MB
- train.conllu5 MB
- Name
- FA.tgz
- Size
- 593.52 KB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- cc0686b1f93e0b0782855e514c54823a
- FA
- train.parsemetsv713 kB
- stats.md208 B
- test.parsemetsv134 kB
- README.md3 kB
- test.blind.parsemetsv132 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu344 kB
- train.conllu1 MB
- Name
- FR.tgz
- Size
- 7.89 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 69f9a65d2a6c127573f8b646cc10eeb3
- FR
- stats.md304 B
- train.parsemetsv8 MB
- test.parsemetsv705 kB
- README.md2 kB
- test.blind.parsemetsv703 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu1 MB
- train.conllu24 MB
- Name
- HE.tgz
- Size
- 804.67 KB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 6e20cc548086eeb18469841b0c5b1393
- HE
- train.parsemetsv1 MB
- stats.md295 B
- test.parsemetsv762 kB
- README.md1 kB
- test.blind.parsemetsv760 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- Name
- HU.tgz
- Size
- 1.3 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 06ce4fa53dcaeda0b1bdce90a24637ea
- HU
- train.parsemetsv1 MB
- stats.md239 B
- test.parsemetsv290 kB
- README.md1 kB
- test.blind.parsemetsv288 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu1 MB
- train.conllu5 MB
- Name
- IT.tgz
- Size
- 4.82 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 64ab3ab19e87767e9fa9764130e41046
- IT
- stats.md328 B
- train.parsemetsv4 MB
- test.parsemetsv503 kB
- README.md1 kB
- test.blind.parsemetsv501 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu1 MB
- train.conllu14 MB
- Name
- LT.tgz
- Size
- 1.18 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 4b2e19fdb954c52a1adf1b1dc05de4a0
- LT
- stats.md236 B
- train.parsemetsv2 MB
- test.parsemetsv623 kB
- README.md1 kB
- test.blind.parsemetsv622 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- Name
- MT.tgz
- Size
- 2.89 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 58f1b8bc4dc99429f504df50c59d21e5
- MT
- stats.md266 B
- train.parsemetsv1 MB
- test.parsemetsv1 MB
- README.md1 kB
- test.blind.parsemetsv1 MB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu3 MB
- train.conllu4 MB
- Name
- PL.tgz
- Size
- 3.45 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 4f54d970d85b325b4c3b3f621e6c192d
- PL
- train.conllu14 MB
- README.md3 kB
- test.blind.parsemetsv384 kB
- train.parsemetsv2 MB
- stats.md276 B
- LICENSE.txt34 kB
- test.parsemetsv386 kB
- .gitkeep0 B
- test.conllu2 MB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- Name
- PT.tgz
- Size
- 5.1 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- eec1f919ce50aad1099d69381ab1f76e
- PT
- stats.md287 B
- train.parsemetsv4 MB
- test.parsemetsv690 kB
- README.md3 kB
- test.blind.parsemetsv688 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu3 MB
- train.conllu20 MB
- Name
- RO.tgz
- Size
- 8 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 83f76629b83fc380facb4e11e98f119e
- RO
- stats.md290 B
- train.parsemetsv9 MB
- test.parsemetsv1 MB
- README.md1 kB
- test.blind.parsemetsv1 MB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu3 MB
- train.conllu25 MB
- Name
- SV.tgz
- Size
- 499.71 KB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 52db26a0ba0dbc2e283c4551795b5271
- SV
- stats.md303 B
- train.parsemetsv41 kB
- test.parsemetsv322 kB
- README.md1 kB
- test.blind.parsemetsv321 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu1 MB
- train.conllu216 kB
- Name
- SL.tgz
- Size
- 2.89 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- abf463cd1bf7855d35efb581706ebf66
- SL
- stats.md328 B
- train.parsemetsv2 MB
- test.parsemetsv650 kB
- README.md2 kB
- test.blind.parsemetsv648 kB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B
- test.conllu1 MB
- train.conllu6 MB
- Name
- TR.tgz
- Size
- 4.65 MB
- Format
- application/x-gzip
- Description
- Unknown
- MD5
- 4d34bb3f81dec21184b9877da2dcf12b
- TR
- train.conllu14 MB
- README.md4 kB
- test.blind.parsemetsv354 kB
- train.parsemetsv4 MB
- stats.md270 B
- test.parsemetsv356 kB
- .gitkeep0 B
- test.conllu1 MB
- bin
- checkParsemeTsvFormat.py1 kB
- bmc_munkres
- LICENSE561 B
- README.md1 kB
- munkres.py23 kB
- evaluate.py9 kB
- tsvlib.py7 kB
- test
- pred.parsemetsv178 B
- zeropred.parsemetsv139 B
- gold.parsemetsv151 B