{-# OPTIONS -cpp -fglasgow-exts #-} -- arch-tag: 0852a460-683f-4abb-9108-8205777e2033 module RRegex.PCRE ( #if defined(HAVE_PCRE_H) Regex, -- abstract compile, execute, executeExtract, pcreCaseless, -- case insensitive mathing pcreMultiline, -- ^ and $ match newline as well as beginning and end of string pcreDotall, -- dot matches everything. including newline pcreExtended, pcreAnchored, pcreDollarEndonly, pcreExtra, pcreNotbol, pcreNoteol, pcreUngreedy, -- matches are not greedy by default pcreNotempty, -- refuse to match empty string #endif getVersion ) where import Prelude import Foreign import Foreign.C import Foreign.C.String import Array -- | return version of pcre used or Nothing if pcre is not available. getVersion :: Maybe String #if defined(HAVE_PCRE_H) #include #include #include data PCRE -- | A compiled regular expression newtype Regex = Regex (ForeignPtr PCRE) fi x = fromIntegral x -- | Compiles a regular expression compile :: String -- ^ The regular expression to compile -> Int -- ^ Flags (summed together) -> IO (Either (Int,String) Regex) -- ^ Returns: an error string and offset or the compiled regular expression compile pattern flags = withCString pattern $ \cstr -> alloca $ \errOffset -> alloca $ \errPtr -> do v <- c_pcre_compile cstr (fromIntegral flags) errPtr errOffset nullPtr if v == nullPtr then do es <- peek errPtr >>= peekCString eo <- peek errOffset return $ Left (fi eo,es) else fmap (Right . Regex) (newForeignPtr c_ptr_free v) getNumSubs (pcre_ptr) = --withForeignPtr pcre_fptr $ \pcre_ptr -> alloca $ \st -> do c_pcre_fullinfo pcre_ptr nullPtr (fi pcreInfoCapturecount) (st :: Ptr CInt) peek st -- | Matches a regular expression against a string execute :: Regex -- ^ Compiled regular expression -> String -- ^ String to match against -> Int -- ^ Options -> IO (Maybe (Array Int (Int,Int))) -- ^ Returns: 'Nothing' if the regex did not match the -- string, or: -- 'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions. execute (Regex pcre_fptr) str flags = withCStringLen str $ \(cstr,clen) -> withForeignPtr pcre_fptr $ \pcre_ptr -> do nsub <- getNumSubs pcre_ptr let nsub_int = fromIntegral ((nsub + 1) * 3) allocaBytes (nsub_int * (#const sizeof(int))) $ \p_match -> do r <- c_pcre_exec pcre_ptr nullPtr cstr (fi clen) 0 0 p_match (fi nsub_int) if r < 0 then return Nothing else do ri <- mapM (peekElemOff p_match) [0 .. fi r*2 - 1] let f [] = [] f (a:b:rest) = (fromIntegral a,fromIntegral (b - a)):f rest let ex = fi $ nsub + 1 - r return $ Just (listArray (0,fi nsub) ((f ri) ++ replicate ex (-1,0))) -- | execute match and extract substrings rather than just offsets executeExtract :: Regex -- ^ compiled regular expression -> String -- ^ string to match -> Int -- ^ Flags (summed together) -> IO (Maybe (String, String, (Array Int String))) -- ^ Returns: Nothing if no match, else -- (text before match, text after match, array of matches with 0 being the whole match) executeExtract pcre str flags = do a <- execute pcre str flags case a of Nothing -> return Nothing Just a -> return $ Just (before,after,fmap f a) where (bo, bl) = a ! 0 before = take bo str after = drop (bo + bl) str f (o,l) = take l (drop o str) getVersion = unsafePerformIO $ do s <- c_pcre_version hs <- peekCString s return $ Just hs foreign import ccall unsafe "pcre/pcre.h pcre_compile" c_pcre_compile :: Ptr CChar -> CInt -> Ptr (Ptr CChar) -> Ptr CInt -> Ptr CChar -> IO (Ptr PCRE) foreign import ccall unsafe "pcre/pcre.h pcre_exec" c_pcre_exec :: Ptr PCRE -> Ptr () -> Ptr CChar -> CInt -> CInt -> CInt -> Ptr CInt -> CInt -> IO CInt foreign import ccall unsafe "pcre/pcre.h pcre_fullinfo" c_pcre_fullinfo :: Ptr PCRE -> Ptr () -> CInt -> Ptr a -> IO CInt foreign import ccall unsafe "pcre/pcre.h pcre_version" c_pcre_version :: IO (Ptr CChar) foreign import ccall unsafe "malloc.h &free" c_ptr_free :: FunPtr (Ptr a -> IO ()) #enum Int,, \ PCRE_CASELESS, \ PCRE_MULTILINE, \ PCRE_DOTALL, \ PCRE_EXTENDED, \ PCRE_ANCHORED, \ PCRE_DOLLAR_ENDONLY, \ PCRE_EXTRA, \ PCRE_NOTBOL, \ PCRE_NOTEOL, \ PCRE_UNGREEDY, \ PCRE_NOTEMPTY, \ PCRE_UTF8 #enum Int,, \ PCRE_ERROR_NOMATCH, \ PCRE_ERROR_NULL, \ PCRE_ERROR_BADOPTION, \ PCRE_ERROR_BADMAGIC, \ PCRE_ERROR_UNKNOWN_NODE, \ PCRE_ERROR_NOMEMORY, \ PCRE_ERROR_NOSUBSTRING #enum Int,, \ PCRE_INFO_OPTIONS, \ PCRE_INFO_SIZE, \ PCRE_INFO_CAPTURECOUNT, \ PCRE_INFO_BACKREFMAX, \ PCRE_INFO_FIRSTCHAR, \ PCRE_INFO_FIRSTTABLE, \ PCRE_INFO_LASTLITERAL #else getVersion = Nothing #endif /* HAVE_PCRE_H */ {- example. import PCRE import System import Maybe main = do print getVersion as <- getArgs pc <- compile (unwords as) 0 c <- getContents case pc of Left x -> print x Right pc -> do ml <- mapM (\x -> executeExtract pc x 0) (lines c) mapM_ print (catMaybes ml) -}