Press n or j to go to the next uncovered block, b, p or k for the previous block.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | 1x 1x 1x | import path from "path"; import fs from "fs"; import pdfParse from "pdf-parse"; import mammoth from "mammoth"; export const parseDocument = async (filePath: string): Promise<string> => { const ext = path.extname(filePath).toLowerCase(); try { switch (ext) { case '.pdf': return await parsePdf(filePath); case '.docx': case '.doc': return await parseWord(filePath); case '.txt': case '.text': return fs.readFileSync(filePath, 'utf-8'); default: throw new Error(`Unsupported file format: ${ext}`); } } catch (error: any) { console.error(`Error parsing document ${filePath}:`, error); throw new Error(`Failed to parse document: ${error.message}`); } }; const parsePdf = async (filePath: string): Promise<string> => { try { const dataBuffer = fs.readFileSync(filePath); const data = await pdfParse(dataBuffer); return data.text; } catch (error: any) { console.error('Error parsing PDF:', error); throw new Error('Failed to parse PDF document'); } }; const parseWord = async (filePath: string): Promise<string> => { try { const result = await mammoth.extractRawText({ path: filePath }); return result.value; } catch (error: any) { console.error('Error parsing Word document:', error); throw new Error('Failed to parse Word document'); } }; |