-
-
Notifications
You must be signed in to change notification settings - Fork 7
/
ro-crate-metadata.json
458 lines (458 loc) · 21.5 KB
/
ro-crate-metadata.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
{
"@context": "https://w3id.org/ro/crate/1.1/context",
"@graph": [
{
"@id": "./",
"@type": "Dataset",
"author": [
{
"@id": "https://orcid.org/0000-0001-7956-4498"
}
],
"datePublished": "2024-05-25",
"description": "The Trove Newspaper & Gazette Harvester makes it easy to download large quantities of digitised articles from Trove's newspapers and gazettes. Just give it a search from the Trove web interface, and the harvester will save the metadata of all the articles in a CSV (spreadsheet) file for further analysis. You can also save the full text of every article, as well as copies of the articles as JPG images, and even PDFs. While the web interface will only show you the first 2,000 results matching your search, the Newspaper & Gazette Harvester will get everything.",
"hasPart": [
{
"@id": "display_harvest_results_using_datasette.ipynb"
},
{
"@id": "Explore-harvested-text-files.ipynb"
},
{
"@id": "Using-TroveHarvester-to-get-newspaper-articles-in-bulk.ipynb"
},
{
"@id": "newspaper_harvester_app.ipynb"
},
{
"@id": "Exploring-your-TroveHarvester-data.ipynb"
},
{
"@id": "basic-harvester-example.ipynb"
},
{
"@id": "harvest-specific-days.ipynb"
},
{
"@id": "reshaping-harvests.ipynb"
}
],
"license": {
"@id": "https://spdx.org/licenses/MIT"
},
"mainEntityOfPage": {
"@id": "https://glam-workbench.net/trove-newspaper-harvester"
},
"name": "trove-newspaper-harvester",
"url": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/",
"version": "v2.1.1"
},
{
"@id": "ro-crate-metadata.json",
"@type": "CreativeWork",
"about": {
"@id": "./"
},
"conformsTo": {
"@id": "https://w3id.org/ro/crate/1.1"
},
"license": {
"@id": "https://creativecommons.org/publicdomain/zero/1.0/"
}
},
{
"@id": "display_harvest_results_using_datasette.ipynb",
"@type": [
"File",
"SoftwareSourceCode"
],
"author": [
{
"@id": "https://orcid.org/0000-0001-7956-4498"
}
],
"category": "exploring",
"codeRepository": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/",
"conformsTo": {
"@id": "https://purl.archive.org/textcommons/profile#Notebook"
},
"description": "Datasette is 'a tool for exploring and publishing data'. Give it a CSV file and it turns it into a fully-searchable database, running in your browser. It supports facets, full-text search, and, with a bit of tweaking, can even present images. Although Datasette is a command-line tool, we can run from within a Jupyter notebook, and open a new window to display the results. This notebook shows you how to load the newspaper data you've harvested into Datasette, and start it up. If you've also harvested full-text and images from the newspaper articles, you can add these to your database as well!",
"encodingFormat": "application/x-ipynb+json",
"mainEntityOfPage": {
"@id": "https://glam-workbench.net/trove-harvester/display-harvest-results-in-datasette/"
},
"name": "Display the results of a harvest as a searchable database using Datasette",
"position": 1,
"programmingLanguage": {
"@id": "https://www.python.org/downloads/release/python-31012/"
},
"url": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/blob/master/display_harvest_results_using_datasette.ipynb",
"workExample": []
},
{
"@id": "Explore-harvested-text-files.ipynb",
"@type": [
"File",
"SoftwareSourceCode"
],
"author": [
{
"@id": "https://orcid.org/0000-0001-7956-4498"
}
],
"category": "exploring",
"codeRepository": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/",
"conformsTo": {
"@id": "https://purl.archive.org/textcommons/profile#Notebook"
},
"description": "This notebook suggests some ways in which you can aggregate and analyse the individual OCRd text files for each article \u2014 look at word frequencies ; calculate TF-IDF values. (Under construction)",
"encodingFormat": "application/x-ipynb+json",
"mainEntityOfPage": {
"@id": "https://glam-workbench.net/trove-harvester/exploring-troveharvester-text/"
},
"name": "Explore harvested text files",
"position": 3,
"programmingLanguage": {
"@id": "https://www.python.org/downloads/release/python-31012/"
},
"url": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/blob/master/Explore-harvested-text-files.ipynb",
"workExample": []
},
{
"@id": "Using-TroveHarvester-to-get-newspaper-articles-in-bulk.ipynb",
"@type": [
"File",
"SoftwareSourceCode"
],
"author": [
{
"@id": "https://orcid.org/0000-0001-7956-4498"
}
],
"category": "harvesting",
"codeRepository": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/",
"conformsTo": {
"@id": "https://purl.archive.org/textcommons/profile#Notebook"
},
"description": "The Trove Newspaper & Gazette Harvester is a command line tool and Python package that helps you download large quantities of digitised articles from Trove's newspapers and gazettes. This notebook demonstrates the basic use of the command line tool.",
"encodingFormat": "application/x-ipynb+json",
"mainEntityOfPage": {
"@id": "https://glam-workbench.net/trove-harvester/using-troveharvester/"
},
"name": "Using the Trove Newspaper Harvester on the command line",
"position": 3,
"programmingLanguage": {
"@id": "https://www.python.org/downloads/release/python-31012/"
},
"url": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/blob/master/Using-TroveHarvester-to-get-newspaper-articles-in-bulk.ipynb",
"workExample": []
},
{
"@id": "newspaper_harvester_app.ipynb",
"@type": [
"File",
"SoftwareSourceCode"
],
"author": [
{
"@id": "https://orcid.org/0000-0001-7956-4498"
}
],
"category": "harvesting",
"codeRepository": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/",
"conformsTo": {
"@id": "https://purl.archive.org/textcommons/profile#Notebook"
},
"description": "A simple web interface to the Trove Newspaper and Gazette Harvester \u2013 the easiest and quickest way to download all the results from a Trove newspaper or gazette search.",
"encodingFormat": "application/x-ipynb+json",
"mainEntityOfPage": {
"@id": "https://glam-workbench.net/trove-harvester/harvester-web-app/"
},
"name": "Trove Newspaper & Gazette Harvester",
"position": 1,
"programmingLanguage": {
"@id": "https://www.python.org/downloads/release/python-31012/"
},
"url": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/blob/master/newspaper_harvester_app.ipynb",
"workExample": []
},
{
"@id": "Exploring-your-TroveHarvester-data.ipynb",
"@type": [
"File",
"SoftwareSourceCode"
],
"author": [
{
"@id": "https://orcid.org/0000-0001-7956-4498"
}
],
"category": "exploring",
"codeRepository": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/",
"conformsTo": {
"@id": "https://purl.archive.org/textcommons/profile#Notebook"
},
"description": "This notebook shows some ways in which you can analyse and visualise the article metadata you've harvested \u2014 show the distribution of articles over time and space; find which newspapers published the most articles. (Under construction)",
"encodingFormat": "application/x-ipynb+json",
"mainEntityOfPage": {
"@id": "https://glam-workbench.net/trove-harvester/exploring-troveharvester-data/"
},
"name": "Exploring your harvested data",
"position": 2,
"programmingLanguage": {
"@id": "https://www.python.org/downloads/release/python-31012/"
},
"url": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/blob/master/Exploring-your-TroveHarvester-data.ipynb",
"workExample": []
},
{
"@id": "basic-harvester-example.ipynb",
"@type": [
"File",
"SoftwareSourceCode"
],
"author": [
{
"@id": "https://orcid.org/0000-0001-7956-4498"
}
],
"category": "harvesting",
"codeRepository": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/",
"conformsTo": {
"@id": "https://purl.archive.org/textcommons/profile#Notebook"
},
"description": "This notebook uses the [trove-newspaper-harvester](https://wragge.github.io/trove-newspaper-harvester/) Python package to harvest the complete results of a search in Trove's digitised newspapers or gazettes. The default settings will save both the article metadata and all of the OCRd text.",
"encodingFormat": "application/x-ipynb+json",
"mainEntityOfPage": {
"@id": "https://glam-workbench.net/trove-harvester/basic-harvester-example/"
},
"name": "Using the Trove Harvester as a Python package",
"position": 2,
"programmingLanguage": {
"@id": "https://www.python.org/downloads/release/python-31012/"
},
"url": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/blob/master/basic-harvester-example.ipynb",
"workExample": []
},
{
"@id": "harvest-specific-days.ipynb",
"@type": [
"File",
"SoftwareSourceCode"
],
"author": [
{
"@id": "https://orcid.org/0000-0001-7956-4498"
}
],
"category": "harvesting",
"codeRepository": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/",
"conformsTo": {
"@id": "https://purl.archive.org/textcommons/profile#Notebook"
},
"description": "The Trove Newspaper Harvester web app and command-line tool make it easy for you to harvest the results of a single search. But if you want to harvest very large or complex searches, you might find it easier to import the trove_newspaper_harvester library directly and take control of the harvesting process. For example, this notebook demonstrates how to harvest all of the newspaper articles mentioning 'Anzac Day' that were published on Anzac Day, 25 April.",
"encodingFormat": "application/x-ipynb+json",
"mainEntityOfPage": {
"@id": "https://glam-workbench.net/trove-harvester/harvest-specific-days/"
},
"name": "Harvesting articles that mention \"Anzac Day\" on Anzac Day",
"position": 4,
"programmingLanguage": {
"@id": "https://www.python.org/downloads/release/python-31012/"
},
"url": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/blob/master/harvest-specific-days.ipynb",
"workExample": []
},
{
"@id": "reshaping-harvests.ipynb",
"@type": [
"File",
"SoftwareSourceCode"
],
"author": [
{
"@id": "https://orcid.org/0000-0001-7956-4498"
}
],
"category": "harvesting",
"codeRepository": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/",
"conformsTo": {
"@id": "https://purl.archive.org/textcommons/profile#Notebook"
},
"description": "The Trove Newspaper Harvester downloads the OCRd text of newspaper articles as individual text files \u2013 one file for each article. That's great for exploring the content of individual articles in depth, but sometimes you might want to zoom out and aggregate the files into larger chunks. For example, if you're interested in how language changes over time, you might what to create a separate corpus for each year in the results set. Or perhaps you want to examine differences in the way particular newspapers talk about an event by grouping the articles by newspaper. This notebook provides a slice and dice wonder tool for Trove newspaper harvests, enabling you to repackage OCRd text by decade, year, and newspaper title. It saves the results as zip files, concatenated text files, or CSV files with embedded text. These repackaged slices should suit a variety of text analysis tools and questions.",
"encodingFormat": "application/x-ipynb+json",
"mainEntityOfPage": {
"@id": "https://glam-workbench.net/trove-harvester/reshaping-harvests/"
},
"name": "Reshaping your newspaper harvest",
"position": 5,
"programmingLanguage": {
"@id": "https://www.python.org/downloads/release/python-31012/"
},
"url": "https://github.com/GLAM-Workbench/trove-newspaper-harvester/blob/master/reshaping-harvests.ipynb",
"workExample": []
},
{
"@id": "https://glam-workbench.net/trove-newspaper-harvester",
"@type": "CreativeWork",
"isPartOf": {
"@id": "https://glam-workbench.net/"
},
"name": "Trove Newspaper & Gazette Harvester",
"url": "https://glam-workbench.net/trove-newspaper-harvester"
},
{
"@id": "https://glam-workbench.net/",
"@type": "CreativeWork",
"author": [
{
"@id": "https://orcid.org/0000-0001-7956-4498"
}
],
"description": "A collection of tools, tutorials, examples, and hacks to help researchers work with data from galleries, libraries, archives, and museums (the GLAM sector).",
"name": "GLAM Workbench",
"url": "https://glam-workbench.net/"
},
{
"@id": "https://orcid.org/0000-0001-7956-4498",
"@type": "Person",
"name": "Sherratt, Tim",
"orcid": "https://orcid.org/0000-0001-7956-4498"
},
{
"@id": "https://spdx.org/licenses/MIT",
"@type": "CreativeWork",
"name": "MIT License",
"url": "https://spdx.org/licenses/MIT.html"
},
{
"@id": "https://creativecommons.org/publicdomain/zero/1.0/",
"@type": "CreativeWork",
"name": "CC0 Public Domain Dedication",
"url": "https://creativecommons.org/publicdomain/zero/1.0/"
},
{
"@id": "http://rightsstatements.org/vocab/NKC/1.0/",
"@type": "CreativeWork",
"description": "The organization that has made the Item available reasonably believes that the Item is not restricted by copyright or related rights, but a conclusive determination could not be made.",
"name": "No Known Copyright",
"url": "http://rightsstatements.org/vocab/NKC/1.0/"
},
{
"@id": "http://rightsstatements.org/vocab/CNE/1.0/",
"@type": "CreativeWork",
"description": "The copyright and related rights status of this Item has not been evaluated.",
"name": "Copyright Not Evaluated",
"url": "http://rightsstatements.org/vocab/CNE/1.0/"
},
{
"@id": "https://www.python.org/downloads/release/python-31012/",
"@type": [
"ComputerLanguage",
"SoftwareApplication"
],
"name": "Python 3.10.12",
"url": "https://www.python.org/downloads/release/python-31012/",
"version": "3.10.12"
},
{
"@id": "https://glam-workbench.net/trove-harvester/display-harvest-results-in-datasette/",
"@type": "CreativeWork",
"isPartOf": {
"@id": "https://glam-workbench.net"
},
"name": "Display the results of a harvest as a searchable database using Datasette",
"url": "https://glam-workbench.net/trove-harvester/display-harvest-results-in-datasette/"
},
{
"@id": "https://glam-workbench.net/trove-harvester/exploring-troveharvester-text/",
"@type": "CreativeWork",
"isPartOf": {
"@id": "https://glam-workbench.net"
},
"name": "Exploring text files harvested with the Trove Harvester",
"url": "https://glam-workbench.net/trove-harvester/exploring-troveharvester-text/"
},
{
"@id": "https://glam-workbench.net/trove-harvester/using-troveharvester/",
"@type": "CreativeWork",
"isPartOf": {
"@id": "https://glam-workbench.net"
},
"name": "Using TroveHarvester to get newspaper and gazette articles in bulk",
"url": "https://glam-workbench.net/trove-harvester/using-troveharvester/"
},
{
"@id": "https://glam-workbench.net/trove-harvester/harvester-web-app/",
"@type": "CreativeWork",
"isPartOf": {
"@id": "https://glam-workbench.net"
},
"name": "Trove Harvester web app",
"url": "https://glam-workbench.net/trove-harvester/harvester-web-app/"
},
{
"@id": "https://glam-workbench.net/trove-harvester/exploring-troveharvester-data/",
"@type": "CreativeWork",
"isPartOf": {
"@id": "https://glam-workbench.net"
},
"name": "Exploring your TroveHarvester data",
"url": "https://glam-workbench.net/trove-harvester/exploring-troveharvester-data/"
},
{
"@id": "https://glam-workbench.net/trove-harvester/basic-harvester-example/",
"@type": "CreativeWork",
"isPartOf": {
"@id": "https://glam-workbench.net"
},
"name": "Using the Trove Harvester as a Python package",
"url": "https://glam-workbench.net/trove-harvester/basic-harvester-example/"
},
{
"@id": "https://glam-workbench.net/trove-harvester/harvest-specific-days/",
"@type": "CreativeWork",
"isPartOf": {
"@id": "https://glam-workbench.net"
},
"name": "Harvesting articles that mention \"Anzac Day\" on Anzac Day",
"url": "https://glam-workbench.net/trove-harvester/harvest-specific-days/"
},
{
"@id": "https://glam-workbench.net/trove-harvester/reshaping-harvests/",
"@type": "CreativeWork",
"isPartOf": {
"@id": "https://glam-workbench.net"
},
"name": "Reshaping your newspaper harvest",
"url": "https://glam-workbench.net/trove-harvester/reshaping-harvests/"
},
{
"@id": "#create_version_v2_0_1",
"@type": "UpdateAction",
"actionStatus": {
"@id": "http://schema.org/CompletedActionStatus"
},
"endDate": "2023-10-25",
"name": "Create version v2.0.1"
},
{
"@id": "#create_version_v2_1_0",
"@type": "UpdateAction",
"actionStatus": {
"@id": "http://schema.org/CompletedActionStatus"
},
"endDate": "2024-05-24",
"name": "Create version v2.1.0"
},
{
"@id": "#create_version_v2_1_1",
"@type": "UpdateAction",
"actionStatus": {
"@id": "http://schema.org/CompletedActionStatus"
},
"endDate": "2024-05-25",
"name": "Create version v2.1.1"
}
]
}