1
1
package org .jbake .app ;
2
2
3
3
import com .orientechnologies .orient .core .record .impl .ODocument ;
4
- import org .apache .commons .configuration .CompositeConfiguration ;
4
+ import org .apache .commons .configuration2 .CompositeConfiguration ;
5
5
import org .apache .commons .io .FilenameUtils ;
6
- import org .jbake .app .Crawler .Attributes .Status ;
7
6
import org .jbake .app .configuration .JBakeConfiguration ;
8
7
import org .jbake .app .configuration .JBakeConfigurationFactory ;
9
- import org .jbake .model .DocumentAttributes ;
8
+ import org .jbake .model .DocumentModel ;
10
9
import org .jbake .model .DocumentStatus ;
11
10
import org .jbake .model .DocumentTypes ;
11
+ import org .jbake .model .ModelAttributes ;
12
12
import org .jbake .util .HtmlUtil ;
13
13
import org .slf4j .Logger ;
14
14
import org .slf4j .LoggerFactory ;
28
28
*/
29
29
public class Crawler {
30
30
31
- private static final Logger LOGGER = LoggerFactory .getLogger (Crawler .class );
31
+ private static final Logger logger = LoggerFactory .getLogger (Crawler .class );
32
32
private final ContentStore db ;
33
- private JBakeConfiguration config ;
34
- private Parser parser ;
33
+ private final JBakeConfiguration config ;
34
+ private final Parser parser ;
35
35
36
36
/**
37
37
* @param db Database instance for content
@@ -63,24 +63,23 @@ public Crawler(ContentStore db, JBakeConfiguration config) {
63
63
public void crawl () {
64
64
crawl (config .getContentFolder ());
65
65
66
- LOGGER .info ("Content detected:" );
66
+ logger .info ("Content detected:" );
67
67
for (String docType : DocumentTypes .getDocumentTypes ()) {
68
68
long count = db .getDocumentCount (docType );
69
69
if (count > 0 ) {
70
- LOGGER .info ("Parsed {} files of type: {}" , count , docType );
70
+ logger .info ("Parsed {} files of type: {}" , count , docType );
71
71
}
72
72
}
73
-
74
73
}
75
74
76
75
public void crawlDataFiles () {
77
76
crawlDataFiles (config .getDataFolder ());
78
77
79
- LOGGER .info ("Data files detected:" );
80
- String docType = "data" ;
78
+ logger .info ("Data files detected:" );
79
+ String docType = config . getDataFileDocType () ;
81
80
long count = db .getDocumentCount (docType );
82
81
if (count > 0 ) {
83
- LOGGER .info ("Parsed {} files" , count );
82
+ logger .info ("Parsed {} files" , count );
84
83
}
85
84
}
86
85
@@ -95,41 +94,37 @@ private void crawl(File path) {
95
94
Arrays .sort (contents );
96
95
for (File sourceFile : contents ) {
97
96
if (sourceFile .isFile ()) {
98
- StringBuilder sb = new StringBuilder ();
99
- sb .append ("Processing [" ).append (sourceFile .getPath ()).append ("]... " );
100
- String sha1 = buildHash (sourceFile );
101
- String uri = buildURI (sourceFile );
102
- boolean process = true ;
103
- DocumentStatus status = DocumentStatus .NEW ;
104
- for (String docType : DocumentTypes .getDocumentTypes ()) {
105
- status = findDocumentStatus (docType , uri , sha1 );
106
- if (status == DocumentStatus .UPDATED ) {
107
- sb .append (" : modified " );
108
- db .deleteContent (docType , uri );
109
-
110
- } else if (status == DocumentStatus .IDENTICAL ) {
111
- sb .append (" : same " );
112
- process = false ;
113
- }
114
- if (!process ) {
115
- break ;
116
- }
117
- }
118
- if (DocumentStatus .NEW == status ) {
119
- sb .append (" : new " );
120
- }
121
- if (process ) { // new or updated
122
- crawlSourceFile (sourceFile , sha1 , uri );
123
- }
124
- LOGGER .info ("{}" , sb );
125
- }
126
- if (sourceFile .isDirectory ()) {
97
+ crawlFile (sourceFile );
98
+ } else if (sourceFile .isDirectory ()) {
127
99
crawl (sourceFile );
128
100
}
129
101
}
130
102
}
131
103
}
132
104
105
+ private void crawlFile (File sourceFile ) {
106
+
107
+ StringBuilder sb = new StringBuilder ();
108
+ sb .append ("Processing [" ).append (sourceFile .getPath ()).append ("]... " );
109
+ String sha1 = buildHash (sourceFile );
110
+ String uri = buildURI (sourceFile );
111
+ DocumentStatus status = findDocumentStatus (uri , sha1 );
112
+ if (status == DocumentStatus .UPDATED ) {
113
+ sb .append (" : modified " );
114
+ db .deleteContent (uri );
115
+ } else if (status == DocumentStatus .IDENTICAL ) {
116
+ sb .append (" : same " );
117
+ } else if (DocumentStatus .NEW == status ) {
118
+ sb .append (" : new " );
119
+ }
120
+
121
+ logger .info ("{}" , sb );
122
+
123
+ if (status != DocumentStatus .IDENTICAL ) {
124
+ processSourceFile (sourceFile , sha1 , uri );
125
+ }
126
+ }
127
+
133
128
/**
134
129
* Crawl all files and folders looking for data files.
135
130
*
@@ -148,10 +143,10 @@ private void crawlDataFiles(File path) {
148
143
boolean process = true ;
149
144
DocumentStatus status = DocumentStatus .NEW ;
150
145
String docType = config .getDataFileDocType ();
151
- status = findDocumentStatus (docType , uri , sha1 );
146
+ status = findDocumentStatus (uri , sha1 );
152
147
if (status == DocumentStatus .UPDATED ) {
153
148
sb .append (" : modified " );
154
- db .deleteContent (docType , uri );
149
+ db .deleteContent (uri );
155
150
} else if (status == DocumentStatus .IDENTICAL ) {
156
151
sb .append (" : same " );
157
152
process = false ;
@@ -165,7 +160,7 @@ private void crawlDataFiles(File path) {
165
160
if (process ) { // new or updated
166
161
crawlDataFile (sourceFile , sha1 , uri , docType );
167
162
}
168
- LOGGER .info ("{}" , sb );
163
+ logger .info ("{}" , sb );
169
164
}
170
165
if (sourceFile .isDirectory ()) {
171
166
crawlDataFiles (sourceFile );
@@ -179,7 +174,7 @@ private String buildHash(final File sourceFile) {
179
174
try {
180
175
sha1 = FileUtil .sha1 (sourceFile );
181
176
} catch (Exception e ) {
182
- LOGGER .error ("unable to build sha1 hash for source file '{}'" , sourceFile );
177
+ logger .error ("unable to build sha1 hash for source file '{}'" , sourceFile );
183
178
sha1 = "" ;
184
179
}
185
180
return sha1 ;
@@ -197,7 +192,7 @@ private String buildURI(final File sourceFile) {
197
192
198
193
// strip off leading / to enable generating non-root based sites
199
194
if (uri .startsWith (FileUtil .URI_SEPARATOR_CHAR )) {
200
- uri = uri .substring (1 , uri . length () );
195
+ uri = uri .substring (1 );
201
196
}
202
197
203
198
return uri ;
@@ -250,83 +245,74 @@ private boolean useNoExtensionUri(String uri) {
250
245
251
246
private void crawlDataFile (final File sourceFile , final String sha1 , final String uri , final String documentType ) {
252
247
try {
253
- Map <String , Object > fileContents = parser .processFile (sourceFile );
254
- if (fileContents != null ) {
255
- fileContents .put (String .valueOf (DocumentAttributes .SHA1 ), sha1 );
256
- fileContents .put (String .valueOf (DocumentAttributes .RENDERED ), true );
257
- fileContents .put (Attributes .FILE , sourceFile .getPath ());
258
- fileContents .put (String .valueOf (DocumentAttributes .SOURCE_URI ), uri );
259
-
260
- ODocument doc = new ODocument (documentType );
261
- doc .fromMap (fileContents );
262
- boolean cached = fileContents .get (String .valueOf (DocumentAttributes .CACHED )) != null ? Boolean .valueOf ((String ) fileContents .get (String .valueOf (DocumentAttributes .CACHED ))) : true ;
263
- doc .field (String .valueOf (DocumentAttributes .CACHED ), cached );
264
- doc .save ();
248
+ DocumentModel document = parser .processFile (sourceFile );
249
+ if (document != null ) {
250
+ document .setSha1 (sha1 );
251
+ document .setRendered (true );
252
+ document .setFile (sourceFile .getPath ());
253
+ document .setSourceUri (uri );
254
+ document .setType (documentType );
255
+
256
+ db .addDocument (document );
265
257
} else {
266
- LOGGER .warn ("{} couldn't be parsed so it has been ignored!" , sourceFile );
258
+ logger .warn ("{} couldn't be parsed so it has been ignored!" , sourceFile );
267
259
}
268
260
} catch (Exception ex ) {
269
261
throw new RuntimeException ("Failed crawling file: " + sourceFile .getPath () + " " + ex .getMessage (), ex );
270
262
}
271
263
}
272
264
273
- private void crawlSourceFile (final File sourceFile , final String sha1 , final String uri ) {
274
- try {
275
- Map <String , Object > fileContents = parser .processFile (sourceFile );
276
- if (fileContents != null ) {
277
- fileContents .put (Attributes .ROOTPATH , getPathToRoot (sourceFile ));
278
- fileContents .put (String .valueOf (DocumentAttributes .SHA1 ), sha1 );
279
- fileContents .put (String .valueOf (DocumentAttributes .RENDERED ), false );
280
- if (fileContents .get (Attributes .TAGS ) != null ) {
281
- // store them as a String[]
282
- String [] tags = (String []) fileContents .get (Attributes .TAGS );
283
- fileContents .put (Attributes .TAGS , tags );
284
- }
285
- fileContents .put (Attributes .FILE , sourceFile .getPath ());
286
- fileContents .put (String .valueOf (DocumentAttributes .SOURCE_URI ), uri );
287
- fileContents .put (Attributes .URI , uri );
288
-
289
- String documentType = (String ) fileContents .get (Attributes .TYPE );
290
- if (fileContents .get (Attributes .STATUS ).equals (Status .PUBLISHED_DATE )) {
291
- if (fileContents .get (Attributes .DATE ) != null && (fileContents .get (Attributes .DATE ) instanceof Date )) {
292
- if (new Date ().after ((Date ) fileContents .get (Attributes .DATE ))) {
293
- fileContents .put (Attributes .STATUS , Status .PUBLISHED );
294
- }
295
- }
296
- }
265
+ private void processSourceFile (final File sourceFile , final String sha1 , final String uri ) {
266
+ DocumentModel document = parser .processFile (sourceFile );
297
267
298
- if (config . getUriWithoutExtension () ) {
299
- fileContents . put ( Attributes . NO_EXTENSION_URI , uri . replace ( "/index.html" , "/" ));
300
- }
268
+ if (document != null ) {
269
+ if ( DocumentTypes . contains ( document . getType ())) {
270
+ addAdditionalDocumentAttributes ( document , sourceFile , sha1 , uri );
301
271
302
272
if (config .getImgPathUpdate ()) {
303
273
// Prevent image source url's from breaking
304
- HtmlUtil .fixImageSourceUrls (fileContents , config );
274
+ HtmlUtil .fixImageSourceUrls (document , config );
305
275
}
306
276
307
- ODocument doc = new ODocument (documentType );
308
- doc .fromMap (fileContents );
309
- boolean cached = fileContents .get (String .valueOf (DocumentAttributes .CACHED )) != null ? Boolean .valueOf ((String ) fileContents .get (String .valueOf (DocumentAttributes .CACHED ))) : true ;
310
- doc .field (String .valueOf (DocumentAttributes .CACHED ), cached );
311
- doc .save ();
277
+ db .addDocument (document );
312
278
} else {
313
- LOGGER .warn ("{} has an invalid header, it has been ignored!" , sourceFile );
279
+ logger .warn ("{} has an unknown document type '{}' and has been ignored!" , sourceFile , document . getType () );
314
280
}
315
- } catch (Exception ex ) {
316
- throw new RuntimeException ("Failed crawling file: " + sourceFile .getPath () + " " + ex .getMessage (), ex );
281
+ } else {
282
+ logger .warn ("{} has an invalid header, it has been ignored!" , sourceFile );
283
+ }
284
+ }
285
+
286
+ private void addAdditionalDocumentAttributes (DocumentModel document , File sourceFile , String sha1 , String uri ) {
287
+ document .setRootPath (getPathToRoot (sourceFile ));
288
+ document .setSha1 (sha1 );
289
+ document .setRendered (false );
290
+ document .setFile (sourceFile .getPath ());
291
+ document .setSourceUri (uri );
292
+ document .setUri (uri );
293
+ document .setCached (true );
294
+
295
+ if (document .getStatus ().equals (ModelAttributes .Status .PUBLISHED_DATE )
296
+ && (document .getDate () != null )
297
+ && new Date ().after (document .getDate ())) {
298
+ document .setStatus (ModelAttributes .Status .PUBLISHED );
299
+ }
300
+
301
+ if (config .getUriWithoutExtension ()) {
302
+ document .setNoExtensionUri (uri .replace ("/index.html" , "/" ));
317
303
}
318
304
}
319
305
320
306
private String getPathToRoot (File sourceFile ) {
321
307
return FileUtil .getUriPathToContentRoot (config , sourceFile );
322
308
}
323
309
324
- private DocumentStatus findDocumentStatus (String docType , String uri , String sha1 ) {
325
- DocumentList match = db .getDocumentStatus (docType , uri );
310
+ private DocumentStatus findDocumentStatus (String uri , String sha1 ) {
311
+ DocumentList < DocumentModel > match = db .getDocumentStatus (uri );
326
312
if (!match .isEmpty ()) {
327
- Map entries = match .get (0 );
328
- String oldHash = ( String ) entries . get ( String . valueOf ( DocumentAttributes . SHA1 ) );
329
- if (!( oldHash .equals (sha1 )) || Boolean . FALSE . equals ( entries . get ( String . valueOf ( DocumentAttributes . RENDERED )) )) {
313
+ DocumentModel document = match .get (0 );
314
+ String oldHash = document . getSha1 ( );
315
+ if (!oldHash .equals (sha1 ) || ! document . getRendered ( )) {
330
316
return DocumentStatus .UPDATED ;
331
317
} else {
332
318
return DocumentStatus .IDENTICAL ;
@@ -336,41 +322,4 @@ private DocumentStatus findDocumentStatus(String docType, String uri, String sha
336
322
}
337
323
}
338
324
339
- public abstract static class Attributes {
340
-
341
- public static final String DATE = "date" ;
342
- public static final String STATUS = "status" ;
343
- public static final String TYPE = "type" ;
344
- public static final String TITLE = "title" ;
345
- public static final String URI = "uri" ;
346
- public static final String FILE = "file" ;
347
- public static final String TAGS = "tags" ;
348
- public static final String TAG = "tag" ;
349
- public static final String ROOTPATH = "rootpath" ;
350
- public static final String ID = "id" ;
351
- public static final String NO_EXTENSION_URI = "noExtensionUri" ;
352
- public static final String ALLTAGS = "alltags" ;
353
- public static final String PUBLISHED_DATE = "published_date" ;
354
- public static final String BODY = "body" ;
355
- public static final String DB = "db" ;
356
- public static final String DATA = "data" ;
357
-
358
- private Attributes () {
359
- }
360
-
361
- /**
362
- * Possible values of the {@link Attributes#STATUS} property
363
- *
364
- * @author ndx
365
- */
366
- public abstract static class Status {
367
- public static final String PUBLISHED_DATE = "published-date" ;
368
- public static final String PUBLISHED = "published" ;
369
- public static final String DRAFT = "draft" ;
370
-
371
- private Status () {
372
- }
373
- }
374
-
375
- }
376
325
}
0 commit comments