Changes
On March 4, 2025 at 4:38:48 PM UTC,
-
No fields were updated. See the metadata diff for more details.
| f | 1 | { | f | 1 | { |
| 2 | "Observaciones": { | 2 | "Observaciones": { | ||
| 3 | "en": "Recommended citation for this dataset: Campillos-Llanos, | 3 | "en": "Recommended citation for this dataset: Campillos-Llanos, | ||
| 4 | Leonardo; Terroba Reinares, Ana Rosa; Zakhir Puig, Sof\u00eda; | 4 | Leonardo; Terroba Reinares, Ana Rosa; Zakhir Puig, Sof\u00eda; | ||
| 5 | Valverde Mateos, Ana; Capllonch Carri\u00f3n, Adri\u00e1n; 2022; | 5 | Valverde Mateos, Ana; Capllonch Carri\u00f3n, Adri\u00e1n; 2022; | ||
| 6 | CLARA-MeD corpus [Dataset]; DIGITAL.CSIC; | 6 | CLARA-MeD corpus [Dataset]; DIGITAL.CSIC; | ||
| 7 | https://doi.org/10.20350/digitalCSIC/14644", | 7 | https://doi.org/10.20350/digitalCSIC/14644", | ||
| 8 | "es": "Cita recomendada: Campillos-Llanos, Leonardo; Terroba | 8 | "es": "Cita recomendada: Campillos-Llanos, Leonardo; Terroba | ||
| 9 | Reinares, Ana Rosa; Zakhir Puig, Sof\u00eda; Valverde Mateos, Ana; | 9 | Reinares, Ana Rosa; Zakhir Puig, Sof\u00eda; Valverde Mateos, Ana; | ||
| 10 | Capllonch Carri\u00f3n, Adri\u00e1n; 2022; CLARA-MeD corpus [Dataset]; | 10 | Capllonch Carri\u00f3n, Adri\u00e1n; 2022; CLARA-MeD corpus [Dataset]; | ||
| 11 | DIGITAL.CSIC; https://doi.org/10.20350/digitalCSIC/14644" | 11 | DIGITAL.CSIC; https://doi.org/10.20350/digitalCSIC/14644" | ||
| 12 | }, | 12 | }, | ||
| 13 | "author": null, | 13 | "author": null, | ||
| 14 | "author_email": null, | 14 | "author_email": null, | ||
| 15 | "autor": { | 15 | "autor": { | ||
| 16 | "en": [ | 16 | "en": [ | ||
| 17 | "Leonardo Campillos-Llanos", | 17 | "Leonardo Campillos-Llanos", | ||
| 18 | "Ana Rosa Terroba Reinares", | 18 | "Ana Rosa Terroba Reinares", | ||
| 19 | "Sof\u00eda Zakhir Puig", | 19 | "Sof\u00eda Zakhir Puig", | ||
| 20 | "Ana Valverde Mateos", | 20 | "Ana Valverde Mateos", | ||
| 21 | "Adri\u00e1n Capllonch Carri\u00f3n" | 21 | "Adri\u00e1n Capllonch Carri\u00f3n" | ||
| 22 | ], | 22 | ], | ||
| 23 | "es": [ | 23 | "es": [ | ||
| 24 | "Leonardo Campillos-Llanos", | 24 | "Leonardo Campillos-Llanos", | ||
| 25 | "Ana Rosa Terroba Reinares", | 25 | "Ana Rosa Terroba Reinares", | ||
| 26 | "Sof\u00eda Zakhir Puig", | 26 | "Sof\u00eda Zakhir Puig", | ||
| 27 | "Ana Valverde Mateos", | 27 | "Ana Valverde Mateos", | ||
| 28 | "Adri\u00e1n Capllonch Carri\u00f3n" | 28 | "Adri\u00e1n Capllonch Carri\u00f3n" | ||
| 29 | ] | 29 | ] | ||
| 30 | }, | 30 | }, | ||
| 31 | "conforms_to": [], | 31 | "conforms_to": [], | ||
| 32 | "coverage_new": {}, | 32 | "coverage_new": {}, | ||
| 33 | "creator_user_id": "196556b3-e0c4-4c51-a9e6-f51cc752bc37", | 33 | "creator_user_id": "196556b3-e0c4-4c51-a9e6-f51cc752bc37", | ||
| 34 | "description": { | 34 | "description": { | ||
| 35 | "en": "A collection of 24.298 pairs of professional and simplified | 35 | "en": "A collection of 24.298 pairs of professional and simplified | ||
| 36 | texts (>96 million tokens): 1) Drug leaflets and summaries of product | 36 | texts (>96 million tokens): 1) Drug leaflets and summaries of product | ||
| 37 | characteristics (10 211 pairs of texts, >82M words); 2) Cancer-related | 37 | characteristics (10 211 pairs of texts, >82M words); 2) Cancer-related | ||
| 38 | information summaries (201 pairs of texts, >3M tokens); and 2) | 38 | information summaries (201 pairs of texts, >3M tokens); and 2) | ||
| 39 | Clinical trials announcements (5748 pairs of texts, 451 690 tokens). | 39 | Clinical trials announcements (5748 pairs of texts, 451 690 tokens). | ||
| 40 | The dataset also contains a parallel corpus with a subset of 3800 | 40 | The dataset also contains a parallel corpus with a subset of 3800 | ||
| 41 | sentence pairs of professional and laymen variants (149 862 tokens). | 41 | sentence pairs of professional and laymen variants (149 862 tokens). | ||
| 42 | This is a benchmark for medical text simplification. The latest | 42 | This is a benchmark for medical text simplification. The latest | ||
| 43 | download of files was in February 2022.", | 43 | download of files was in February 2022.", | ||
| 44 | "es": "A collection of 24.298 pairs of professional and simplified | 44 | "es": "A collection of 24.298 pairs of professional and simplified | ||
| 45 | texts (>96 million tokens): 1) Drug leaflets and summaries of product | 45 | texts (>96 million tokens): 1) Drug leaflets and summaries of product | ||
| 46 | characteristics (10 211 pairs of texts, >82M words); 2) Cancer-related | 46 | characteristics (10 211 pairs of texts, >82M words); 2) Cancer-related | ||
| 47 | information summaries (201 pairs of texts, >3M tokens); and 2) | 47 | information summaries (201 pairs of texts, >3M tokens); and 2) | ||
| 48 | Clinical trials announcements (5748 pairs of texts, 451 690 tokens). | 48 | Clinical trials announcements (5748 pairs of texts, 451 690 tokens). | ||
| 49 | The dataset also contains a parallel corpus with a subset of 3800 | 49 | The dataset also contains a parallel corpus with a subset of 3800 | ||
| 50 | sentence pairs of professional and laymen variants (149 862 tokens). | 50 | sentence pairs of professional and laymen variants (149 862 tokens). | ||
| 51 | This is a benchmark for medical text simplification. The latest | 51 | This is a benchmark for medical text simplification. The latest | ||
| 52 | download of files was in February 2022." | 52 | download of files was in February 2022." | ||
| 53 | }, | 53 | }, | ||
| 54 | "groups": [], | 54 | "groups": [], | ||
| 55 | "id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | 55 | "id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | ||
| 56 | "identifier": "http://hdl.handle.net/10261/269887", | 56 | "identifier": "http://hdl.handle.net/10261/269887", | ||
| 57 | "international_spatial_translated": { | 57 | "international_spatial_translated": { | ||
| 58 | "en": "Europe (data from EudraCT) and United States of America | 58 | "en": "Europe (data from EudraCT) and United States of America | ||
| 59 | (data from NCI)", | 59 | (data from NCI)", | ||
| 60 | "es": "Europa y Estados Unidos" | 60 | "es": "Europa y Estados Unidos" | ||
| 61 | }, | 61 | }, | ||
| 62 | "isopen": false, | 62 | "isopen": false, | ||
| 63 | "issued_date": "2022-05-19T00:00:00", | 63 | "issued_date": "2022-05-19T00:00:00", | ||
| 64 | "language": [ | 64 | "language": [ | ||
| 65 | "es", | 65 | "es", | ||
| 66 | "en" | 66 | "en" | ||
| 67 | ], | 67 | ], | ||
| 68 | "license_id": | 68 | "license_id": | ||
| 69 | "\thttps://creativecommons.org/licenses/by-nc-sa/4.0/", | 69 | "\thttps://creativecommons.org/licenses/by-nc-sa/4.0/", | ||
| 70 | "license_title": | 70 | "license_title": | ||
| 71 | "\thttps://creativecommons.org/licenses/by-nc-sa/4.0/", | 71 | "\thttps://creativecommons.org/licenses/by-nc-sa/4.0/", | ||
| 72 | "maintainer": null, | 72 | "maintainer": null, | ||
| 73 | "maintainer_email": null, | 73 | "maintainer_email": null, | ||
| 74 | "metadata_created": "2025-03-04T16:37:34.111951", | 74 | "metadata_created": "2025-03-04T16:37:34.111951", | ||
| n | 75 | "metadata_modified": "2025-03-04T16:38:48.014334", | n | 75 | "metadata_modified": "2025-03-04T16:38:48.136317", |
| 76 | "modified_date": "2022-05-19T00:00:00", | 76 | "modified_date": "2022-05-19T00:00:00", | ||
| 77 | "multilingual_tags": { | 77 | "multilingual_tags": { | ||
| 78 | "en": [ | 78 | "en": [ | ||
| 79 | "Comparable corpus", | 79 | "Comparable corpus", | ||
| 80 | "Parallel sentences", | 80 | "Parallel sentences", | ||
| 81 | "Medical text simplification", | 81 | "Medical text simplification", | ||
| 82 | "Biomedical natural language processing" | 82 | "Biomedical natural language processing" | ||
| 83 | ], | 83 | ], | ||
| 84 | "es": [ | 84 | "es": [ | ||
| 85 | "Comparable corpus", | 85 | "Comparable corpus", | ||
| 86 | "Parallel sentences", | 86 | "Parallel sentences", | ||
| 87 | "Medical text simplification" | 87 | "Medical text simplification" | ||
| 88 | ] | 88 | ] | ||
| 89 | }, | 89 | }, | ||
| 90 | "name": "clara-med-corpus", | 90 | "name": "clara-med-corpus", | ||
| 91 | "notes": null, | 91 | "notes": null, | ||
| 92 | "num_resources": 2, | 92 | "num_resources": 2, | ||
| 93 | "num_tags": 0, | 93 | "num_tags": 0, | ||
| 94 | "organization": { | 94 | "organization": { | ||
| 95 | "approval_status": "approved", | 95 | "approval_status": "approved", | ||
| 96 | "created": "2023-09-25T12:13:42.172869", | 96 | "created": "2023-09-25T12:13:42.172869", | ||
| 97 | "description": "El Instituto de Lengua, Literatura y | 97 | "description": "El Instituto de Lengua, Literatura y | ||
| 98 | Antropolog\u00eda, ILLA (CSIC) tiene como objetivo primordial la | 98 | Antropolog\u00eda, ILLA (CSIC) tiene como objetivo primordial la | ||
| 99 | investigaci\u00f3n del patrimonio cultural hisp\u00e1nico en su triple | 99 | investigaci\u00f3n del patrimonio cultural hisp\u00e1nico en su triple | ||
| 100 | dimensi\u00f3n antropol\u00f3gica, ling\u00fc\u00edstica y | 100 | dimensi\u00f3n antropol\u00f3gica, ling\u00fc\u00edstica y | ||
| 101 | literaria.", | 101 | literaria.", | ||
| 102 | "id": "fc47e531-a165-4eac-8fc7-34342a3a38ff", | 102 | "id": "fc47e531-a165-4eac-8fc7-34342a3a38ff", | ||
| 103 | "image_url": "2023-09-25-101342.167134illa0.png", | 103 | "image_url": "2023-09-25-101342.167134illa0.png", | ||
| 104 | "is_organization": true, | 104 | "is_organization": true, | ||
| 105 | "name": "instituto-de-lengua-literatura-y-antropologia-illa-csic", | 105 | "name": "instituto-de-lengua-literatura-y-antropologia-illa-csic", | ||
| 106 | "state": "active", | 106 | "state": "active", | ||
| 107 | "title": "Instituto de Lengua, Literatura y Antropolog\u00eda | 107 | "title": "Instituto de Lengua, Literatura y Antropolog\u00eda | ||
| 108 | (ILLA), CSIC", | 108 | (ILLA), CSIC", | ||
| 109 | "type": "organization" | 109 | "type": "organization" | ||
| 110 | }, | 110 | }, | ||
| 111 | "owner_org": "fc47e531-a165-4eac-8fc7-34342a3a38ff", | 111 | "owner_org": "fc47e531-a165-4eac-8fc7-34342a3a38ff", | ||
| 112 | "private": false, | 112 | "private": false, | ||
| 113 | "proyecto": {}, | 113 | "proyecto": {}, | ||
| 114 | "publisher": "b627d71d-2315-4e75-afc9-897da84459f0", | 114 | "publisher": "b627d71d-2315-4e75-afc9-897da84459f0", | ||
| 115 | "reference": [], | 115 | "reference": [], | ||
| 116 | "relationships_as_object": [], | 116 | "relationships_as_object": [], | ||
| 117 | "relationships_as_subject": [], | 117 | "relationships_as_subject": [], | ||
| 118 | "resources": [ | 118 | "resources": [ | ||
| 119 | { | 119 | { | ||
| 120 | "byte_size": "8,1 kB", | 120 | "byte_size": "8,1 kB", | ||
| 121 | "cache_last_updated": null, | 121 | "cache_last_updated": null, | ||
| 122 | "cache_url": null, | 122 | "cache_url": null, | ||
| 123 | "created": "2025-03-04T16:38:15.618921", | 123 | "created": "2025-03-04T16:38:15.618921", | ||
| 124 | "datastore_active": false, | 124 | "datastore_active": false, | ||
| 125 | "description": null, | 125 | "description": null, | ||
| 126 | "format": "txt", | 126 | "format": "txt", | ||
| 127 | "hash": "", | 127 | "hash": "", | ||
| 128 | "id": "084fe5a6-500d-4d1c-a87e-1e764b61c3a2", | 128 | "id": "084fe5a6-500d-4d1c-a87e-1e764b61c3a2", | ||
| 129 | "last_modified": null, | 129 | "last_modified": null, | ||
| 130 | "metadata_modified": "2025-03-04T16:38:48.016552", | 130 | "metadata_modified": "2025-03-04T16:38:48.016552", | ||
| 131 | "mimetype": "text/plain", | 131 | "mimetype": "text/plain", | ||
| 132 | "mimetype_inner": null, | 132 | "mimetype_inner": null, | ||
| 133 | "name": "README.txt", | 133 | "name": "README.txt", | ||
| 134 | "name_translated": { | 134 | "name_translated": { | ||
| 135 | "en": "README.txt", | 135 | "en": "README.txt", | ||
| 136 | "es": "README.txt" | 136 | "es": "README.txt" | ||
| 137 | }, | 137 | }, | ||
| 138 | "package_id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | 138 | "package_id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | ||
| 139 | "position": 0, | 139 | "position": 0, | ||
| 140 | "resource_identifier": | 140 | "resource_identifier": | ||
| 141 | "https://digital.csic.es/bitstream/10261/269887/4/README.txt", | 141 | "https://digital.csic.es/bitstream/10261/269887/4/README.txt", | ||
| 142 | "resource_relation": [], | 142 | "resource_relation": [], | ||
| 143 | "resource_relation-1": "", | 143 | "resource_relation-1": "", | ||
| 144 | "resource_type": null, | 144 | "resource_type": null, | ||
| 145 | "size": null, | 145 | "size": null, | ||
| 146 | "state": "active", | 146 | "state": "active", | ||
| 147 | "url": | 147 | "url": | ||
| 148 | "https://digital.csic.es/bitstream/10261/269887/4/README.txt", | 148 | "https://digital.csic.es/bitstream/10261/269887/4/README.txt", | ||
| 149 | "url_type": null | 149 | "url_type": null | ||
| 150 | }, | 150 | }, | ||
| 151 | { | 151 | { | ||
| 152 | "byte_size": "196,13 MB", | 152 | "byte_size": "196,13 MB", | ||
| 153 | "cache_last_updated": null, | 153 | "cache_last_updated": null, | ||
| 154 | "cache_url": null, | 154 | "cache_url": null, | ||
| 155 | "created": "2025-03-04T16:38:48.018874", | 155 | "created": "2025-03-04T16:38:48.018874", | ||
| 156 | "datastore_active": false, | 156 | "datastore_active": false, | ||
| 157 | "description": null, | 157 | "description": null, | ||
| 158 | "format": "zip", | 158 | "format": "zip", | ||
| 159 | "hash": "", | 159 | "hash": "", | ||
| 160 | "id": "42ab5813-12ae-48a3-b2e9-6d3e0bc3c692", | 160 | "id": "42ab5813-12ae-48a3-b2e9-6d3e0bc3c692", | ||
| 161 | "last_modified": null, | 161 | "last_modified": null, | ||
| n | 162 | "metadata_modified": "2025-03-04T16:38:48.016725", | n | 162 | "metadata_modified": "2025-03-04T16:38:48.138642", |
| 163 | "mimetype": "application/zip", | 163 | "mimetype": "application/zip", | ||
| 164 | "mimetype_inner": null, | 164 | "mimetype_inner": null, | ||
| 165 | "name": "CLARA-MeD-corpus.zip\t", | 165 | "name": "CLARA-MeD-corpus.zip\t", | ||
| 166 | "name_translated": { | 166 | "name_translated": { | ||
| 167 | "en": "CLARA-MeD-corpus.zip\t", | 167 | "en": "CLARA-MeD-corpus.zip\t", | ||
| 168 | "es": "CLARA-MeD-corpus.zip\t" | 168 | "es": "CLARA-MeD-corpus.zip\t" | ||
| 169 | }, | 169 | }, | ||
| 170 | "package_id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | 170 | "package_id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | ||
| 171 | "position": 1, | 171 | "position": 1, | ||
| 172 | "resource_identifier": | 172 | "resource_identifier": | ||
| 173 | ttps://digital.csic.es/bitstream/10261/269887/1/CLARA-MeD-corpus.zip", | 173 | ttps://digital.csic.es/bitstream/10261/269887/1/CLARA-MeD-corpus.zip", | ||
| 174 | "resource_relation": [], | 174 | "resource_relation": [], | ||
| 175 | "resource_relation-1": "", | 175 | "resource_relation-1": "", | ||
| 176 | "resource_type": null, | 176 | "resource_type": null, | ||
| 177 | "size": null, | 177 | "size": null, | ||
| 178 | "state": "active", | 178 | "state": "active", | ||
| 179 | "url": | 179 | "url": | ||
| 180 | ttps://digital.csic.es/bitstream/10261/269887/1/CLARA-MeD-corpus.zip", | 180 | ttps://digital.csic.es/bitstream/10261/269887/1/CLARA-MeD-corpus.zip", | ||
| 181 | "url_type": null | 181 | "url_type": null | ||
| 182 | } | 182 | } | ||
| 183 | ], | 183 | ], | ||
| 184 | "spatial": [ | 184 | "spatial": [ | ||
| 185 | 185 | ||||
| 186 | ttp://datos.gob.es/recurso/sector-publico/territorio/Pais/Espa\u00f1a" | 186 | ttp://datos.gob.es/recurso/sector-publico/territorio/Pais/Espa\u00f1a" | ||
| 187 | ], | 187 | ], | ||
| t | 188 | "state": "draft", | t | 188 | "state": "active", |
| 189 | "tags": [], | 189 | "tags": [], | ||
| 190 | "theme": [ | 190 | "theme": [ | ||
| 191 | 191 | ||||
| 192 | "http://datos.gob.es/kos/sector-publico/sector/ciencia-tecnologia", | 192 | "http://datos.gob.es/kos/sector-publico/sector/ciencia-tecnologia", | ||
| 193 | "http://datos.gob.es/kos/sector-publico/sector/salud" | 193 | "http://datos.gob.es/kos/sector-publico/sector/salud" | ||
| 194 | ], | 194 | ], | ||
| 195 | "title": "CLARA-MeD corpus", | 195 | "title": "CLARA-MeD corpus", | ||
| 196 | "title_translated": { | 196 | "title_translated": { | ||
| 197 | "en": "CLARA-MeD corpus", | 197 | "en": "CLARA-MeD corpus", | ||
| 198 | "es": "CLARA-MeD corpus" | 198 | "es": "CLARA-MeD corpus" | ||
| 199 | }, | 199 | }, | ||
| 200 | "type": "dataset", | 200 | "type": "dataset", | ||
| 201 | "url": null, | 201 | "url": null, | ||
| 202 | "version": null | 202 | "version": null | ||
| 203 | } | 203 | } |
