-
Notifications
You must be signed in to change notification settings - Fork 745
/
Office.java
187 lines (158 loc) · 7.45 KB
/
Office.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.metadata;
/**
* Office Document properties collection. These properties apply to
* Office / Productivity Documents of all forms, including (but not limited
* to) MS Office and OpenDocument formats.
* This is a logical collection of properties, which may be drawn from a
* few different external definitions.
*
* @since Apache Tika 1.2
*/
public interface Office {
// These are taken from the OpenDocumentFormat specification
String NAMESPACE_URI_DOC_META = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
String PREFIX_DOC_META = "meta";
/**
* For user defined metadata entries in the document,
* what prefix should be attached to the key names.
* eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
*/
String USER_DEFINED_METADATA_NAME_PREFIX = "custom:";
/**
* Keywords pertaining to a document. Also populates {@link DublinCore#SUBJECT}.
*/
Property KEYWORDS = Property.composite(Property.internalTextBag(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "keyword"),
new Property[]{DublinCore.SUBJECT,});
/**
* Name of the initial creator/author of a document
*/
Property INITIAL_AUTHOR = Property.internalText(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "initial-author");
/**
* Name of the last (most recent) author of a document
*/
Property LAST_AUTHOR = Property.internalText(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "last-author");
/**
* Name of the principal author(s) of a document
*/
Property AUTHOR = Property.internalTextBag(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "author");
/**
* When was the document created?
*/
Property CREATION_DATE = Property.internalDate(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "creation-date");
/**
* When was the document last saved?
*/
Property SAVE_DATE = Property.internalDate(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "save-date");
/**
* When was the document last printed?
*/
Property PRINT_DATE = Property.internalDate(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "print-date");
/**
* The number of Slides are there in the (presentation) document
*/
Property SLIDE_COUNT = Property.internalInteger(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "slide-count");
/**
* The number of Pages are there in the (paged) document
*/
Property PAGE_COUNT = Property.internalInteger(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "page-count");
/**
* The number of individual Paragraphs in the document
*/
Property PARAGRAPH_COUNT = Property.internalInteger(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "paragraph-count");
/**
* The number of lines in the document
*/
Property LINE_COUNT = Property.internalInteger(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "line-count");
/**
* The number of Words in the document
*/
Property WORD_COUNT = Property.internalInteger(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "word-count");
/**
* The number of Characters in the document
*/
Property CHARACTER_COUNT = Property.internalInteger(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "character-count");
/**
* The number of Characters in the document, including spaces
*/
Property CHARACTER_COUNT_WITH_SPACES = Property.internalInteger(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
"character-count-with-spaces");
/**
* The number of Tables in the document
*/
Property TABLE_COUNT = Property.internalInteger(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "table-count");
/**
* The number of Images in the document
*/
Property IMAGE_COUNT = Property.internalInteger(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "image-count");
/**
* The number of Objects in the document. These are typically non-Image resources
* embedded in the document, such as other documents or non-Image media.
*/
Property OBJECT_COUNT = Property.internalInteger(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "object-count");
/**
* MAPI message class. What type of .msg/MAPI file is it?
*/
Property MAPI_MESSAGE_CLASS = Property.internalClosedChoise(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "mapi-message-class",
"APPOINTMENT", "CONTACT", "NOTE", "STICKY_NOTE", "POST", "TASK", "UNKNOWN",
"UNSPECIFIED");
Property MAPI_SENT_BY_SERVER_TYPE = Property.internalText(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
"mapi-sent-by-server-type");
Property MAPI_FROM_REPRESENTING_NAME = Property.internalText(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
"mapi-from-representing-name");
Property MAPI_FROM_REPRESENTING_EMAIL = Property.internalText(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
"mapi-from-representing-email");
Property MAPI_MESSAGE_CLIENT_SUBMIT_TIME = Property.internalDate(
PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
"mapi-msg-client-submit-time");
/**
* Embedded files may have a "progID" associated with them, such as
* Word.Document.12 or AcroExch.Document.DC
*/
Property PROG_ID = Property.internalText("msoffice:progID");
Property OCX_NAME = Property.internalText("msoffice:ocxName");
Property MAPI_RECIPIENTS_STRING = Property.internalText(PREFIX_DOC_META +
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "mapi-recipients-string");
Property MAPI_IMPORTANCE = Property.internalInteger(PREFIX_DOC_META +
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "mapi-importance");
Property MAPI_PRIORTY = Property.internalInteger(PREFIX_DOC_META +
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "mapi-importance");
Property MAPI_IS_FLAGGED = Property.internalBoolean(PREFIX_DOC_META +
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "mapi-is-flagged");
}