User Tools

Site Tools


cats:addconverters

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
cats:addconverters [2007/02/07 21:03]
helphand
cats:addconverters [2007/02/07 22:09] (current)
helphand
Line 1: Line 1:
 +^  :!: This Documentation Applies to CATS Version 0.6.1 :!:  ^
 +|  The CATS Team has since released new versions, the material documented here likely will not work on the new versions without modification. ​ |
 +
 +===== Add PDF, RTF, HTM Converters to CATS =====
 +
 +The patch below adds converters to handle PDF, RTF, and HTM format resumes by converting them to text for insertion into the candidate record. Obviously, for the patch to be useful, one must have the converters installed on the system;
 +  * antiword - available from [[http://​www.winfield.demon.nl/​]]
 +  * pdftotext - part of the xpdf package [[http://​www.foolabs.com/​xpdf/​home.html]]
 +  * html2text - available from [[http://​www.mbayer.de/​html2text/​]]
 +  * rtf-converter - available from [[http://​directory.fsf.org/​rtf-converter.html]]
 +
 +<​code>​
 +Index: trunk/​config.php
 +===================================================================
 +--- trunk/​config.php (revision 1)
 ++++ trunk/​config.php (working copy)
 +@@ -38,9 +38,29 @@
 +  * recomended, in which case you should set ANTIWORD_PATH (below) to
 +  * '​C:​\\antiword\\antiword.exe'​.
 +  */
 +-define('​ANTIWORD_PATH',​ '​C:​\\antiword\\antiword.exe'​);​
 ++define('​ANTIWORD_PATH',​ '/​usr/​local/​bin/​antiword'​);​
 + ​define('​ANTIWORD_MAP',​ '​8859-1.txt'​);​
 + 
 ++/* pdftotext settings for the Linux platform. Windows
 ++ * users should set this path to ''​
 ++ */
 ++define('​PDFTOTEXT_PATH',​ '/​usr/​bin/​pdftotext'​);​
 ++
 ++/* htmltotext settings for the Linux platform. Windows
 ++ * users should set this path to ''​
 ++ */
 ++define('​HTMLTOTEXT_PATH',​ '/​usr/​bin/​html2text'​);​
 ++
 ++/* unrtf settings for the Linux platform. Windows
 ++ * users should set this path to ''​
 ++ */
 ++define('​UNRTF_PATH',​ '/​usr/​local/​bin/​rtf-converter'​);​
 ++
 ++/* cat settings for the Linux platform. Windows
 ++ * users should set this path to ''​
 ++ */
 ++define('​CAT_PATH',​ '​cat'​);​
 ++
 + /* Temporary directory. Set this to a directory that is writable by the
 +  * web server. The default should be fine for most systems. Remember to
 +  * use double backslashes (\) to represent one backslash (\) on Windows.
 +@@ -50,7 +70,7 @@
 + /* If User Details and Login Activity pages in the settings module are
 +  * unbearably slow, set this to false.
 +  */
 +-define('​ENABLE_HOSTNAME_LOOKUP',​ true);
 ++define('​ENABLE_HOSTNAME_LOOKUP',​ false);
 + 
 + /* Probably no need to edit anything below this line. */
 + 
 +@@ -101,7 +121,7 @@
 + ​define('​FORGOT_PASSWORD_BODY', ​     'You recently requested that your CATS: Applicant Tracking System password be sent to you. Your current password is %s.');
 + 
 + /* Is this a demo site? */
 +-define('​ENABLE_DEMO_MODE',​ true);
 ++define('​ENABLE_DEMO_MODE',​ false);
 + 
 + /* Automated testing. This is only useful for the CATS core team at the moment;
 +  * don't worry about this yet.
 +Index: trunk/​lib/​MSWordToText.php
 +===================================================================
 +--- trunk/​lib/​MSWordToText.php (revision 1)
 ++++ trunk/​lib/​MSWordToText.php (working copy)
 +@@ -38,10 +38,13 @@
 +     ​private $_fileName = '';​
 +     ​private $_AntiWordCommand = '';​
 +     ​private $_returnCode = -1;
 ++    private $_path_parts = '';​
 ++    ​
 + 
 +-
 +     /**
 +-     * Attempts to convert a Microsoft Word document to plain text.
 ++     * Attempts to convert a Microsoft Word document to plain text on
 ++     * the Windows platform. Attempts various file format conversions
 ++     * for the converters available on the Linux platform.
 +      *
 +      * @param string file name
 +      * @return boolean successful
 +@@ -54,10 +57,25 @@
 +         ​$this->​_linesString = '';​
 +         ​$this->​_rawOutput ​  = '';​
 +         ​$this->​_fileName ​   = $fileName;
 ++        $this->​_path_parts ​ = pathinfo($fileName);​
 ++        $this->​_path_parts['​extension'​] = strtolower($this->​_path_parts['​extension'​]);​
 ++        ​
 + 
 +-        /* Build the AntiWord command string. */
 +-        $this->​_AntiWordCommand = '"'​. ANTIWORD_PATH . '"​ -m ' . ANTIWORD_MAP
 ++        if ($this->​_path_parts['​extension'​] == '​doc'​) {
 ++           /* Build the AntiWord command string. */
 ++           ​$this->​_AntiWordCommand = '"'​. ANTIWORD_PATH . '"​ -m ' . ANTIWORD_MAP
 +             . ' ' . escapeshellarg(realpath($fileName));​
 ++        } else if ($this->​_path_parts['​extension'​] == '​pdf'​ && PDFTOTEXT_PATH) {
 ++           ​$this->​_AntiWordCommand = '"'​. PDFTOTEXT_PATH . '"​ ' . escapeshellarg(realpath($fileName)) . ' -';
 ++        } else if (($this->​_path_parts['​extension'​] == '​htm'​ or $this->​_path_parts['​extension'​] == '​html'​) && HTMLTOTEXT_PATH) {
 ++           ​$this->​_AntiWordCommand = '"'​. HTMLTOTEXT_PATH . '"​ ' . escapeshellarg(realpath($fileName));​
 ++        } else if ($this->​_path_parts['​extension'​] == '​rtf'​ && UNRTF_PATH && HTMLTOTEXT_PATH) {
 ++           ​$this->​_AntiWordCommand = '"'​. UNRTF_PATH . '"​ ' . escapeshellarg(realpath($fileName)) . ' | ' . HTMLTOTEXT_PATH ;
 ++        } else if ($this->​_path_parts['​extension'​] == '​txt'​ && CAT_PATH) {
 ++           ​$this->​_AntiWordCommand = '"'​. CAT_PATH . '"​ ' . escapeshellarg(realpath($fileName)) ;
 ++        }  else {
 ++           ​return false;
 ++        }
 + 
 +         /* Running on Windows? */
 +         if (SystemUtility::​isWindows())
 +
 +</​code>​
 +
 +
 +
  
cats/addconverters.txt ยท Last modified: 2007/02/07 22:09 by helphand