| 123456789101112131415161718192021222324252627282930313233343536373839 |
- {
- "crawlWorkCompleted": {
- "enable": true,
- "cron": "50 50 21 * * ?",
- "count": 5000.0,
- "###enable": "是否启用###",
- "###cron": "定时cron表达式.###",
- "###count": "每次处理的数量,默认为5000,同时每次将重爬最旧的10%以提高数据质量.###"
- },
- "crawlWork": {
- "enable": true,
- "cron": "45 45 * * * ?",
- "count": 1000.0,
- "###enable": "是否启用###",
- "###cron": "定时cron表达式.###",
- "###count": "每次处理的数量,默认为1000,同时每次将重爬最旧的25%以提高数据质量.###"
- },
- "crawlCms": {
- "enable": true,
- "cron": "55 55 8/2 * * ?",
- "count": 1000.0,
- "###enable": "是否启用###",
- "###cron": "定时cron表达式.###",
- "###count": "每次处理的数量,默认为1000,同时每次将重爬最旧的10%以提高数据质量.###"
- },
- "extractOffice": true,
- "extractPdf": true,
- "extractText": true,
- "extractImage": false,
- "tessLanguage": "chi_sim",
- "###crawlWorkCompleted": "已完成工作收集器设置.###",
- "###crawlWork": "工作收集器设置.###",
- "###crawlCms": "内容管理收集器设置.###",
- "###extractOffice": "抽取office中的文本.###",
- "###extractPdf": "抽取pdf中的文本.###",
- "###extractText": "抽取文本中的文本.###",
- "###extractImage": "抽取图像中的文本.###",
- "###tessLanguage": "tess使用语言.###"
- }
|