Cambios
En el instante 20 de marzo de 2025, 8:28:07 UTC,

-
Modificada la licencia de CLARA-MeD corpus a https://creativecommons.org/licenses/by-nc-sa/4.0/ (anteriormente https://creativecommons.org/licenses/by-nc-sa/4.0/)
-
Modificado el valor del campo
coverage_new
a{'1': {'from': '2022-05-15T00:00:00', 'to': '2022-05-15T00:00:00'}}
en CLARA-MeD corpus -
Modificado el valor del campo
reference
a['https://github.com/lcampillos/CLARA-MeD/corpus']
en CLARA-MeD corpus
f | 1 | { | f | 1 | { |
2 | "Observaciones": { | 2 | "Observaciones": { | ||
3 | "en": "Recommended citation for this dataset: Campillos-Llanos, | 3 | "en": "Recommended citation for this dataset: Campillos-Llanos, | ||
4 | Leonardo; Terroba Reinares, Ana Rosa; Zakhir Puig, Sof\u00eda; | 4 | Leonardo; Terroba Reinares, Ana Rosa; Zakhir Puig, Sof\u00eda; | ||
5 | Valverde Mateos, Ana; Capllonch Carri\u00f3n, Adri\u00e1n; 2022; | 5 | Valverde Mateos, Ana; Capllonch Carri\u00f3n, Adri\u00e1n; 2022; | ||
6 | CLARA-MeD corpus [Dataset]; DIGITAL.CSIC; | 6 | CLARA-MeD corpus [Dataset]; DIGITAL.CSIC; | ||
7 | https://doi.org/10.20350/digitalCSIC/14644", | 7 | https://doi.org/10.20350/digitalCSIC/14644", | ||
8 | "es": "Cita recomendada: Campillos-Llanos, Leonardo; Terroba | 8 | "es": "Cita recomendada: Campillos-Llanos, Leonardo; Terroba | ||
9 | Reinares, Ana Rosa; Zakhir Puig, Sof\u00eda; Valverde Mateos, Ana; | 9 | Reinares, Ana Rosa; Zakhir Puig, Sof\u00eda; Valverde Mateos, Ana; | ||
10 | Capllonch Carri\u00f3n, Adri\u00e1n; 2022; CLARA-MeD corpus [Dataset]; | 10 | Capllonch Carri\u00f3n, Adri\u00e1n; 2022; CLARA-MeD corpus [Dataset]; | ||
11 | DIGITAL.CSIC; https://doi.org/10.20350/digitalCSIC/14644" | 11 | DIGITAL.CSIC; https://doi.org/10.20350/digitalCSIC/14644" | ||
12 | }, | 12 | }, | ||
13 | "author": null, | 13 | "author": null, | ||
14 | "author_email": null, | 14 | "author_email": null, | ||
15 | "autor": { | 15 | "autor": { | ||
16 | "en": [ | 16 | "en": [ | ||
17 | "Leonardo Campillos-Llanos", | 17 | "Leonardo Campillos-Llanos", | ||
18 | "Ana Rosa Terroba Reinares", | 18 | "Ana Rosa Terroba Reinares", | ||
19 | "Sof\u00eda Zakhir Puig", | 19 | "Sof\u00eda Zakhir Puig", | ||
20 | "Ana Valverde Mateos", | 20 | "Ana Valverde Mateos", | ||
21 | "Adri\u00e1n Capllonch Carri\u00f3n" | 21 | "Adri\u00e1n Capllonch Carri\u00f3n" | ||
22 | ], | 22 | ], | ||
23 | "es": [ | 23 | "es": [ | ||
24 | "Leonardo Campillos-Llanos", | 24 | "Leonardo Campillos-Llanos", | ||
25 | "Ana Rosa Terroba Reinares", | 25 | "Ana Rosa Terroba Reinares", | ||
26 | "Sof\u00eda Zakhir Puig", | 26 | "Sof\u00eda Zakhir Puig", | ||
27 | "Ana Valverde Mateos", | 27 | "Ana Valverde Mateos", | ||
28 | "Adri\u00e1n Capllonch Carri\u00f3n" | 28 | "Adri\u00e1n Capllonch Carri\u00f3n" | ||
29 | ] | 29 | ] | ||
30 | }, | 30 | }, | ||
31 | "conforms_to": [], | 31 | "conforms_to": [], | ||
n | 32 | "coverage_new": {}, | n | 32 | "coverage_new": { |
33 | "1": { | ||||
34 | "from": "2022-05-15T00:00:00", | ||||
35 | "to": "2022-05-15T00:00:00" | ||||
36 | } | ||||
37 | }, | ||||
33 | "creator_user_id": "196556b3-e0c4-4c51-a9e6-f51cc752bc37", | 38 | "creator_user_id": "196556b3-e0c4-4c51-a9e6-f51cc752bc37", | ||
34 | "description": { | 39 | "description": { | ||
35 | "en": "A collection of 24.298 pairs of professional and simplified | 40 | "en": "A collection of 24.298 pairs of professional and simplified | ||
36 | texts (>96 million tokens): 1) Drug leaflets and summaries of product | 41 | texts (>96 million tokens): 1) Drug leaflets and summaries of product | ||
37 | characteristics (10 211 pairs of texts, >82M words); 2) Cancer-related | 42 | characteristics (10 211 pairs of texts, >82M words); 2) Cancer-related | ||
38 | information summaries (201 pairs of texts, >3M tokens); and 2) | 43 | information summaries (201 pairs of texts, >3M tokens); and 2) | ||
39 | Clinical trials announcements (5748 pairs of texts, 451 690 tokens). | 44 | Clinical trials announcements (5748 pairs of texts, 451 690 tokens). | ||
40 | The dataset also contains a parallel corpus with a subset of 3800 | 45 | The dataset also contains a parallel corpus with a subset of 3800 | ||
41 | sentence pairs of professional and laymen variants (149 862 tokens). | 46 | sentence pairs of professional and laymen variants (149 862 tokens). | ||
42 | This is a benchmark for medical text simplification. The latest | 47 | This is a benchmark for medical text simplification. The latest | ||
43 | download of files was in February 2022.", | 48 | download of files was in February 2022.", | ||
44 | "es": "A collection of 24.298 pairs of professional and simplified | 49 | "es": "A collection of 24.298 pairs of professional and simplified | ||
45 | texts (>96 million tokens): 1) Drug leaflets and summaries of product | 50 | texts (>96 million tokens): 1) Drug leaflets and summaries of product | ||
46 | characteristics (10 211 pairs of texts, >82M words); 2) Cancer-related | 51 | characteristics (10 211 pairs of texts, >82M words); 2) Cancer-related | ||
47 | information summaries (201 pairs of texts, >3M tokens); and 2) | 52 | information summaries (201 pairs of texts, >3M tokens); and 2) | ||
48 | Clinical trials announcements (5748 pairs of texts, 451 690 tokens). | 53 | Clinical trials announcements (5748 pairs of texts, 451 690 tokens). | ||
49 | The dataset also contains a parallel corpus with a subset of 3800 | 54 | The dataset also contains a parallel corpus with a subset of 3800 | ||
50 | sentence pairs of professional and laymen variants (149 862 tokens). | 55 | sentence pairs of professional and laymen variants (149 862 tokens). | ||
51 | This is a benchmark for medical text simplification. The latest | 56 | This is a benchmark for medical text simplification. The latest | ||
52 | download of files was in February 2022." | 57 | download of files was in February 2022." | ||
53 | }, | 58 | }, | ||
54 | "groups": [ | 59 | "groups": [ | ||
55 | { | 60 | { | ||
56 | "description": "", | 61 | "description": "", | ||
57 | "display_name": "Ling\u00fc\u00edstica", | 62 | "display_name": "Ling\u00fc\u00edstica", | ||
58 | "id": "372aa551-b8e2-47fd-bc5e-0f807f9b09d6", | 63 | "id": "372aa551-b8e2-47fd-bc5e-0f807f9b09d6", | ||
59 | "image_display_url": | 64 | "image_display_url": | ||
60 | s.cchs.csic.es/uploads/group/2024-09-24-155551.755581Linguistica.png", | 65 | s.cchs.csic.es/uploads/group/2024-09-24-155551.755581Linguistica.png", | ||
61 | "name": "linguistica", | 66 | "name": "linguistica", | ||
62 | "title": "Ling\u00fc\u00edstica" | 67 | "title": "Ling\u00fc\u00edstica" | ||
63 | }, | 68 | }, | ||
64 | { | 69 | { | ||
65 | "description": "", | 70 | "description": "", | ||
66 | "display_name": "Tecnolog\u00edas del lenguaje", | 71 | "display_name": "Tecnolog\u00edas del lenguaje", | ||
67 | "id": "f50a92b2-8774-4390-82dd-5c080b098961", | 72 | "id": "f50a92b2-8774-4390-82dd-5c080b098961", | ||
68 | "image_display_url": | 73 | "image_display_url": | ||
69 | s/uploads/group/2024-09-27-062643.156350Tecnologias-del-lenguaje.png", | 74 | s/uploads/group/2024-09-27-062643.156350Tecnologias-del-lenguaje.png", | ||
70 | "name": "tecnologias-del-lenguaje", | 75 | "name": "tecnologias-del-lenguaje", | ||
71 | "title": "Tecnolog\u00edas del lenguaje" | 76 | "title": "Tecnolog\u00edas del lenguaje" | ||
72 | }, | 77 | }, | ||
73 | { | 78 | { | ||
74 | "description": "", | 79 | "description": "", | ||
75 | "display_name": "Terminolog\u00eda", | 80 | "display_name": "Terminolog\u00eda", | ||
76 | "id": "f21d51d4-df60-44f9-ad1f-60f1d5ca24d2", | 81 | "id": "f21d51d4-df60-44f9-ad1f-60f1d5ca24d2", | ||
77 | "image_display_url": | 82 | "image_display_url": | ||
78 | .cchs.csic.es/uploads/group/2024-09-27-062713.009717Terminologia.png", | 83 | .cchs.csic.es/uploads/group/2024-09-27-062713.009717Terminologia.png", | ||
79 | "name": "terminologia", | 84 | "name": "terminologia", | ||
80 | "title": "Terminolog\u00eda" | 85 | "title": "Terminolog\u00eda" | ||
81 | } | 86 | } | ||
82 | ], | 87 | ], | ||
83 | "id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | 88 | "id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | ||
84 | "identifier": "http://hdl.handle.net/10261/269887", | 89 | "identifier": "http://hdl.handle.net/10261/269887", | ||
85 | "international_spatial_translated": { | 90 | "international_spatial_translated": { | ||
86 | "en": "Europe and United States of America", | 91 | "en": "Europe and United States of America", | ||
87 | "es": "Europa y Estados Unidos" | 92 | "es": "Europa y Estados Unidos" | ||
88 | }, | 93 | }, | ||
89 | "isopen": false, | 94 | "isopen": false, | ||
90 | "issued_date": "2022-05-19T00:00:00", | 95 | "issued_date": "2022-05-19T00:00:00", | ||
91 | "language": [ | 96 | "language": [ | ||
92 | "es", | 97 | "es", | ||
93 | "en" | 98 | "en" | ||
94 | ], | 99 | ], | ||
n | 95 | "license_id": | n | ||
96 | "\thttps://creativecommons.org/licenses/by-nc-sa/4.0/", | 100 | "license_id": "https://creativecommons.org/licenses/by-nc-sa/4.0/", | ||
97 | "license_title": | 101 | "license_title": | ||
n | 98 | "\thttps://creativecommons.org/licenses/by-nc-sa/4.0/", | n | 102 | "https://creativecommons.org/licenses/by-nc-sa/4.0/", |
99 | "maintainer": null, | 103 | "maintainer": null, | ||
100 | "maintainer_email": null, | 104 | "maintainer_email": null, | ||
101 | "metadata_created": "2025-03-04T16:37:34.111951", | 105 | "metadata_created": "2025-03-04T16:37:34.111951", | ||
n | 102 | "metadata_modified": "2025-03-05T07:58:38.972833", | n | 106 | "metadata_modified": "2025-03-20T08:28:07.099902", |
103 | "modified_date": "2022-05-19T00:00:00", | 107 | "modified_date": "2022-05-19T00:00:00", | ||
104 | "multilingual_tags": { | 108 | "multilingual_tags": { | ||
105 | "en": [ | 109 | "en": [ | ||
106 | "Comparable corpus", | 110 | "Comparable corpus", | ||
107 | "Parallel sentences", | 111 | "Parallel sentences", | ||
108 | "Medical text simplification", | 112 | "Medical text simplification", | ||
109 | "Biomedical natural language processing" | 113 | "Biomedical natural language processing" | ||
110 | ], | 114 | ], | ||
111 | "es": [ | 115 | "es": [ | ||
112 | "Comparaci\u00f3n de corpus", | 116 | "Comparaci\u00f3n de corpus", | ||
113 | "Frases paralelas", | 117 | "Frases paralelas", | ||
114 | "Simplificaci\u00f3n de textos m\u00e9dicos", | 118 | "Simplificaci\u00f3n de textos m\u00e9dicos", | ||
115 | "Procesamiento del Lenguaje Natural en biomedicina" | 119 | "Procesamiento del Lenguaje Natural en biomedicina" | ||
116 | ] | 120 | ] | ||
117 | }, | 121 | }, | ||
118 | "name": "clara-med-corpus", | 122 | "name": "clara-med-corpus", | ||
119 | "notes": null, | 123 | "notes": null, | ||
120 | "num_resources": 2, | 124 | "num_resources": 2, | ||
121 | "num_tags": 0, | 125 | "num_tags": 0, | ||
122 | "organization": { | 126 | "organization": { | ||
123 | "approval_status": "approved", | 127 | "approval_status": "approved", | ||
124 | "created": "2023-09-25T12:13:42.172869", | 128 | "created": "2023-09-25T12:13:42.172869", | ||
125 | "description": "El Instituto de Lengua, Literatura y | 129 | "description": "El Instituto de Lengua, Literatura y | ||
126 | Antropolog\u00eda, ILLA (CSIC) tiene como objetivo primordial la | 130 | Antropolog\u00eda, ILLA (CSIC) tiene como objetivo primordial la | ||
127 | investigaci\u00f3n del patrimonio cultural hisp\u00e1nico en su triple | 131 | investigaci\u00f3n del patrimonio cultural hisp\u00e1nico en su triple | ||
128 | dimensi\u00f3n antropol\u00f3gica, ling\u00fc\u00edstica y | 132 | dimensi\u00f3n antropol\u00f3gica, ling\u00fc\u00edstica y | ||
129 | literaria.", | 133 | literaria.", | ||
130 | "id": "fc47e531-a165-4eac-8fc7-34342a3a38ff", | 134 | "id": "fc47e531-a165-4eac-8fc7-34342a3a38ff", | ||
131 | "image_url": "2023-09-25-101342.167134illa0.png", | 135 | "image_url": "2023-09-25-101342.167134illa0.png", | ||
132 | "is_organization": true, | 136 | "is_organization": true, | ||
133 | "name": "instituto-de-lengua-literatura-y-antropologia-illa-csic", | 137 | "name": "instituto-de-lengua-literatura-y-antropologia-illa-csic", | ||
134 | "state": "active", | 138 | "state": "active", | ||
135 | "title": "Instituto de Lengua, Literatura y Antropolog\u00eda | 139 | "title": "Instituto de Lengua, Literatura y Antropolog\u00eda | ||
136 | (ILLA), CSIC", | 140 | (ILLA), CSIC", | ||
137 | "type": "organization" | 141 | "type": "organization" | ||
138 | }, | 142 | }, | ||
139 | "owner_org": "fc47e531-a165-4eac-8fc7-34342a3a38ff", | 143 | "owner_org": "fc47e531-a165-4eac-8fc7-34342a3a38ff", | ||
140 | "private": false, | 144 | "private": false, | ||
141 | "proyecto": { | 145 | "proyecto": { | ||
142 | "en": [ | 146 | "en": [ | ||
143 | "PID2020-116001RA-C33" | 147 | "PID2020-116001RA-C33" | ||
144 | ], | 148 | ], | ||
145 | "es": [ | 149 | "es": [ | ||
146 | "PID2020-116001RA-C33" | 150 | "PID2020-116001RA-C33" | ||
147 | ] | 151 | ] | ||
148 | }, | 152 | }, | ||
149 | "publisher": "b627d71d-2315-4e75-afc9-897da84459f0", | 153 | "publisher": "b627d71d-2315-4e75-afc9-897da84459f0", | ||
t | 150 | "reference": [], | t | 154 | "reference": [ |
155 | "https://github.com/lcampillos/CLARA-MeD/corpus" | ||||
156 | ], | ||||
151 | "relationships_as_object": [], | 157 | "relationships_as_object": [], | ||
152 | "relationships_as_subject": [], | 158 | "relationships_as_subject": [], | ||
153 | "resources": [ | 159 | "resources": [ | ||
154 | { | 160 | { | ||
155 | "byte_size": "8,1 kB", | 161 | "byte_size": "8,1 kB", | ||
156 | "cache_last_updated": null, | 162 | "cache_last_updated": null, | ||
157 | "cache_url": null, | 163 | "cache_url": null, | ||
158 | "created": "2025-03-04T16:38:15.618921", | 164 | "created": "2025-03-04T16:38:15.618921", | ||
159 | "datastore_active": false, | 165 | "datastore_active": false, | ||
160 | "description": null, | 166 | "description": null, | ||
161 | "format": "txt", | 167 | "format": "txt", | ||
162 | "hash": "", | 168 | "hash": "", | ||
163 | "id": "084fe5a6-500d-4d1c-a87e-1e764b61c3a2", | 169 | "id": "084fe5a6-500d-4d1c-a87e-1e764b61c3a2", | ||
164 | "last_modified": null, | 170 | "last_modified": null, | ||
165 | "metadata_modified": "2025-03-04T16:38:48.016552", | 171 | "metadata_modified": "2025-03-04T16:38:48.016552", | ||
166 | "mimetype": "text/plain", | 172 | "mimetype": "text/plain", | ||
167 | "mimetype_inner": null, | 173 | "mimetype_inner": null, | ||
168 | "name": "README.txt", | 174 | "name": "README.txt", | ||
169 | "name_translated": { | 175 | "name_translated": { | ||
170 | "en": "README.txt", | 176 | "en": "README.txt", | ||
171 | "es": "README.txt" | 177 | "es": "README.txt" | ||
172 | }, | 178 | }, | ||
173 | "package_id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | 179 | "package_id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | ||
174 | "position": 0, | 180 | "position": 0, | ||
175 | "resource_identifier": | 181 | "resource_identifier": | ||
176 | "https://digital.csic.es/bitstream/10261/269887/4/README.txt", | 182 | "https://digital.csic.es/bitstream/10261/269887/4/README.txt", | ||
177 | "resource_relation": [], | 183 | "resource_relation": [], | ||
178 | "resource_relation-1": "", | 184 | "resource_relation-1": "", | ||
179 | "resource_type": null, | 185 | "resource_type": null, | ||
180 | "size": null, | 186 | "size": null, | ||
181 | "state": "active", | 187 | "state": "active", | ||
182 | "url": | 188 | "url": | ||
183 | "https://digital.csic.es/bitstream/10261/269887/4/README.txt", | 189 | "https://digital.csic.es/bitstream/10261/269887/4/README.txt", | ||
184 | "url_type": null | 190 | "url_type": null | ||
185 | }, | 191 | }, | ||
186 | { | 192 | { | ||
187 | "byte_size": "196,13 MB", | 193 | "byte_size": "196,13 MB", | ||
188 | "cache_last_updated": null, | 194 | "cache_last_updated": null, | ||
189 | "cache_url": null, | 195 | "cache_url": null, | ||
190 | "created": "2025-03-04T16:38:48.018874", | 196 | "created": "2025-03-04T16:38:48.018874", | ||
191 | "datastore_active": false, | 197 | "datastore_active": false, | ||
192 | "description": null, | 198 | "description": null, | ||
193 | "format": "zip", | 199 | "format": "zip", | ||
194 | "hash": "", | 200 | "hash": "", | ||
195 | "id": "42ab5813-12ae-48a3-b2e9-6d3e0bc3c692", | 201 | "id": "42ab5813-12ae-48a3-b2e9-6d3e0bc3c692", | ||
196 | "last_modified": null, | 202 | "last_modified": null, | ||
197 | "metadata_modified": "2025-03-04T16:38:48.138642", | 203 | "metadata_modified": "2025-03-04T16:38:48.138642", | ||
198 | "mimetype": "application/zip", | 204 | "mimetype": "application/zip", | ||
199 | "mimetype_inner": null, | 205 | "mimetype_inner": null, | ||
200 | "name": "CLARA-MeD-corpus.zip\t", | 206 | "name": "CLARA-MeD-corpus.zip\t", | ||
201 | "name_translated": { | 207 | "name_translated": { | ||
202 | "en": "CLARA-MeD-corpus.zip\t", | 208 | "en": "CLARA-MeD-corpus.zip\t", | ||
203 | "es": "CLARA-MeD-corpus.zip\t" | 209 | "es": "CLARA-MeD-corpus.zip\t" | ||
204 | }, | 210 | }, | ||
205 | "package_id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | 211 | "package_id": "0a3879bf-01f2-40ec-902c-5f059ce1a141", | ||
206 | "position": 1, | 212 | "position": 1, | ||
207 | "resource_identifier": | 213 | "resource_identifier": | ||
208 | ttps://digital.csic.es/bitstream/10261/269887/1/CLARA-MeD-corpus.zip", | 214 | ttps://digital.csic.es/bitstream/10261/269887/1/CLARA-MeD-corpus.zip", | ||
209 | "resource_relation": [], | 215 | "resource_relation": [], | ||
210 | "resource_relation-1": "", | 216 | "resource_relation-1": "", | ||
211 | "resource_type": null, | 217 | "resource_type": null, | ||
212 | "size": null, | 218 | "size": null, | ||
213 | "state": "active", | 219 | "state": "active", | ||
214 | "url": | 220 | "url": | ||
215 | ttps://digital.csic.es/bitstream/10261/269887/1/CLARA-MeD-corpus.zip", | 221 | ttps://digital.csic.es/bitstream/10261/269887/1/CLARA-MeD-corpus.zip", | ||
216 | "url_type": null | 222 | "url_type": null | ||
217 | } | 223 | } | ||
218 | ], | 224 | ], | ||
219 | "spatial": [ | 225 | "spatial": [ | ||
220 | 226 | ||||
221 | ttp://datos.gob.es/recurso/sector-publico/territorio/Pais/Espa\u00f1a" | 227 | ttp://datos.gob.es/recurso/sector-publico/territorio/Pais/Espa\u00f1a" | ||
222 | ], | 228 | ], | ||
223 | "state": "active", | 229 | "state": "active", | ||
224 | "tags": [], | 230 | "tags": [], | ||
225 | "theme": [ | 231 | "theme": [ | ||
226 | 232 | ||||
227 | "http://datos.gob.es/kos/sector-publico/sector/ciencia-tecnologia", | 233 | "http://datos.gob.es/kos/sector-publico/sector/ciencia-tecnologia", | ||
228 | "http://datos.gob.es/kos/sector-publico/sector/salud" | 234 | "http://datos.gob.es/kos/sector-publico/sector/salud" | ||
229 | ], | 235 | ], | ||
230 | "title": "CLARA-MeD corpus", | 236 | "title": "CLARA-MeD corpus", | ||
231 | "title_translated": { | 237 | "title_translated": { | ||
232 | "en": "CLARA-MeD corpus", | 238 | "en": "CLARA-MeD corpus", | ||
233 | "es": "CLARA-MeD corpus" | 239 | "es": "CLARA-MeD corpus" | ||
234 | }, | 240 | }, | ||
235 | "type": "dataset", | 241 | "type": "dataset", | ||
236 | "url": null, | 242 | "url": null, | ||
237 | "version": null | 243 | "version": null | ||
238 | } | 244 | } |