2 Commits

Author SHA1 Message Date
2b16a38a66 fix: improve performance 2024-05-13 15:06:23 -05:00
e87d81cc1d fix: generalize to MonadAff 2024-05-13 11:52:09 -05:00
5 changed files with 73 additions and 58 deletions

BIN
bun.lockb

Binary file not shown.

View File

@@ -9,7 +9,7 @@ workspace:
- datetime: ">=6.1.0 <7.0.0" - datetime: ">=6.1.0 <7.0.0"
- effect: ">=4.0.0 <5.0.0" - effect: ">=4.0.0 <5.0.0"
- exceptions: ">=6.0.0 <7.0.0" - exceptions: ">=6.0.0 <7.0.0"
- foldable-traversable - foldable-traversable: ">=6.0.0 <7.0.0"
- foreign: ">=7.0.0 <8.0.0" - foreign: ">=7.0.0 <8.0.0"
- foreign-object: ">=4.1.0 <5.0.0" - foreign-object: ">=4.1.0 <5.0.0"
- integers: ">=6.0.0 <7.0.0" - integers: ">=6.0.0 <7.0.0"
@@ -18,7 +18,7 @@ workspace:
- newtype: ">=5.0.0 <6.0.0" - newtype: ">=5.0.0 <6.0.0"
- node-buffer: ">=9.0.0 <10.0.0" - node-buffer: ">=9.0.0 <10.0.0"
- node-event-emitter: ">=3.0.0 <4.0.0" - node-event-emitter: ">=3.0.0 <4.0.0"
- node-stream-pipes - node-stream-pipes: ">=1.3.0 <2.0.0"
- node-streams: ">=9.0.0 <10.0.0" - node-streams: ">=9.0.0 <10.0.0"
- nullable: ">=6.0.0 <7.0.0" - nullable: ">=6.0.0 <7.0.0"
- numbers: ">=9.0.1 <10.0.0" - numbers: ">=9.0.1 <10.0.0"
@@ -28,11 +28,11 @@ workspace:
- prelude: ">=6.0.1 <7.0.0" - prelude: ">=6.0.1 <7.0.0"
- record: ">=4.0.0 <5.0.0" - record: ">=4.0.0 <5.0.0"
- record-extra: ">=5.0.1 <6.0.0" - record-extra: ">=5.0.1 <6.0.0"
- st - st: ">=6.2.0 <7.0.0"
- strings: ">=6.0.1 <7.0.0" - strings: ">=6.0.1 <7.0.0"
- tailrec: ">=6.1.0 <7.0.0" - tailrec: ">=6.1.0 <7.0.0"
- transformers: ">=6.0.0 <7.0.0" - transformers: ">=6.0.0 <7.0.0"
- tuples - tuples: ">=7.0.0 <8.0.0"
- typelevel-prelude: ">=7.0.0 <8.0.0" - typelevel-prelude: ">=7.0.0 <8.0.0"
- unsafe-coerce: ">=6.0.0 <7.0.0" - unsafe-coerce: ">=6.0.0 <7.0.0"
test_dependencies: test_dependencies:
@@ -120,12 +120,10 @@ workspace:
- typelevel-prelude - typelevel-prelude
- unfoldable - unfoldable
- unicode - unicode
- unordered-collections
- unsafe-coerce - unsafe-coerce
- variant - variant
extra_packages: extra_packages: {}
node-stream-pipes:
git: https://git.orionkindel.com/orion/purescript-node-stream-pipes
ref: v1.0.5
packages: packages:
aff: aff:
type: registry type: registry
@@ -608,9 +606,9 @@ packages:
dependencies: dependencies:
- effect - effect
node-stream-pipes: node-stream-pipes:
type: git type: registry
url: https://git.orionkindel.com/orion/purescript-node-stream-pipes version: 1.3.0
rev: f2f18c3c13ae2f0f5787ccfb3832fc8c653e83ad integrity: sha256-5Jpf0BLn0ExQWYxbTTewai4M8quEmEVHxihc9CM1Juo=
dependencies: dependencies:
- aff - aff
- arrays - arrays
@@ -618,6 +616,8 @@ packages:
- either - either
- exceptions - exceptions
- foldable-traversable - foldable-traversable
- foreign-object
- lists
- maybe - maybe
- mmorph - mmorph
- newtype - newtype
@@ -627,6 +627,7 @@ packages:
- node-path - node-path
- node-streams - node-streams
- node-zlib - node-zlib
- ordered-collections
- parallel - parallel
- pipes - pipes
- prelude - prelude
@@ -634,6 +635,8 @@ packages:
- strings - strings
- tailrec - tailrec
- transformers - transformers
- tuples
- unordered-collections
- unsafe-coerce - unsafe-coerce
node-streams: node-streams:
type: registry type: registry
@@ -1038,6 +1041,21 @@ packages:
- foldable-traversable - foldable-traversable
- maybe - maybe
- strings - strings
unordered-collections:
type: registry
version: 3.1.0
integrity: sha256-H2eQR+ylI+cljz4XzWfEbdF7ee+pnw2IZCeq69AuJ+Q=
dependencies:
- arrays
- enums
- functions
- integers
- lists
- prelude
- record
- tuples
- typelevel-prelude
- unfoldable
unsafe-coerce: unsafe-coerce:
type: registry type: registry
version: 6.0.0 version: 6.0.0

View File

@@ -10,16 +10,14 @@ package:
strict: true strict: true
pedanticPackages: true pedanticPackages: true
dependencies: dependencies:
- foldable-traversable - node-stream-pipes: ">=1.3.0 <2.0.0"
- node-stream-pipes
- st
- tuples
- aff: ">=7.1.0 <8.0.0" - aff: ">=7.1.0 <8.0.0"
- arrays: ">=7.3.0 <8.0.0" - arrays: ">=7.3.0 <8.0.0"
- bifunctors: ">=6.0.0 <7.0.0" - bifunctors: ">=6.0.0 <7.0.0"
- datetime: ">=6.1.0 <7.0.0" - datetime: ">=6.1.0 <7.0.0"
- effect: ">=4.0.0 <5.0.0" - effect: ">=4.0.0 <5.0.0"
- exceptions: ">=6.0.0 <7.0.0" - exceptions: ">=6.0.0 <7.0.0"
- foldable-traversable: ">=6.0.0 <7.0.0"
- foreign: ">=7.0.0 <8.0.0" - foreign: ">=7.0.0 <8.0.0"
- foreign-object: ">=4.1.0 <5.0.0" - foreign-object: ">=4.1.0 <5.0.0"
- integers: ">=6.0.0 <7.0.0" - integers: ">=6.0.0 <7.0.0"
@@ -37,9 +35,11 @@ package:
- prelude: ">=6.0.1 <7.0.0" - prelude: ">=6.0.1 <7.0.0"
- record: ">=4.0.0 <5.0.0" - record: ">=4.0.0 <5.0.0"
- record-extra: ">=5.0.1 <6.0.0" - record-extra: ">=5.0.1 <6.0.0"
- st: ">=6.2.0 <7.0.0"
- strings: ">=6.0.1 <7.0.0" - strings: ">=6.0.1 <7.0.0"
- tailrec: ">=6.1.0 <7.0.0" - tailrec: ">=6.1.0 <7.0.0"
- transformers: ">=6.0.0 <7.0.0" - transformers: ">=6.0.0 <7.0.0"
- tuples: ">=7.0.0 <8.0.0"
- typelevel-prelude: ">=7.0.0 <8.0.0" - typelevel-prelude: ">=7.0.0 <8.0.0"
- unsafe-coerce: ">=6.0.0 <7.0.0" - unsafe-coerce: ">=6.0.0 <7.0.0"
test: test:
@@ -53,7 +53,4 @@ package:
- simple-json - simple-json
- spec - spec
workspace: workspace:
extraPackages: extraPackages: {}
node-stream-pipes:
git: 'https://git.orionkindel.com/orion/purescript-node-stream-pipes'
ref: 'v1.0.5'

View File

@@ -10,5 +10,5 @@ import Test.Spec.Reporter (specReporter)
import Test.Spec.Runner (defaultConfig, runSpec') import Test.Spec.Runner (defaultConfig, runSpec')
main :: Effect Unit main :: Effect Unit
main = launchAff_ $ runSpec' (defaultConfig { timeout = Nothing }) [ specReporter ] do main = launchAff_ $ runSpec' (defaultConfig { failFast = true, timeout = Nothing }) [ specReporter ] do
Test.Pipes.CSV.spec Test.Pipes.CSV.spec

View File

@@ -6,27 +6,31 @@ import Control.Monad.Gen (chooseInt)
import Control.Monad.Rec.Class (Step(..), tailRecM) import Control.Monad.Rec.Class (Step(..), tailRecM)
import Data.Array as Array import Data.Array as Array
import Data.DateTime (DateTime) import Data.DateTime (DateTime)
import Data.Foldable (fold) import Data.Foldable (fold, sum)
import Data.Maybe (Maybe(..), fromJust) import Data.Maybe (Maybe(..), fromJust)
import Data.Newtype (wrap) import Data.Newtype (wrap)
import Data.PreciseDateTime (fromRFC3339String, toDateTimeLossy) import Data.PreciseDateTime (fromRFC3339String, toDateTimeLossy)
import Data.String.CodePoints as String.CodePoints
import Data.Tuple.Nested ((/\))
import Effect.Class (liftEffect) import Effect.Class (liftEffect)
import Effect.Console (log)
import Node.Encoding (Encoding(..)) import Node.Encoding (Encoding(..))
import Partial.Unsafe (unsafePartial) import Partial.Unsafe (unsafePartial)
import Pipes (yield, (>->)) import Pipes (yield, (>->))
import Pipes (each) as Pipes
import Pipes.CSV as Pipes.CSV import Pipes.CSV as Pipes.CSV
import Pipes.Collect as Pipes.Collect import Pipes.Collect as Pipes.Collect
import Pipes.Construct as Pipes.Construct
import Pipes.Node.Buffer as Pipes.Buffer import Pipes.Node.Buffer as Pipes.Buffer
import Pipes.Node.Stream as Pipes.Stream import Pipes.Node.Stream as Pipes.Stream
import Pipes.Prelude (map, toListM) as Pipes import Pipes.Prelude (chain, map, toListM) as Pipes
import Pipes.Util as Pipes.Util import Pipes.Util as Pipes.Util
import Test.QuickCheck.Gen (randomSample') import Test.QuickCheck.Gen (randomSample')
import Test.Spec (Spec, describe, it) import Test.Spec (Spec, before, describe, it)
import Test.Spec.Assertions (shouldEqual) import Test.Spec.Assertions (shouldEqual)
csv :: String csv :: String
csv = """created,flag,foo,id csv =
"""created,flag,foo,id
2020-01-01T00:00:00.0Z,true,a,1 2020-01-01T00:00:00.0Z,true,a,1
2024-02-02T08:00:00.0Z,false,apple,2 2024-02-02T08:00:00.0Z,false,apple,2
1970-01-01T00:00:00.0Z,true,hello,3 1970-01-01T00:00:00.0Z,true,hello,3
@@ -41,48 +45,44 @@ spec =
it "stringify" do it "stringify" do
let let
objs = objs =
[ {id: 1, foo: "a", flag: true, created: dt "2020-01-01T00:00:00Z"} [ { id: 1, foo: "a", flag: true, created: dt "2020-01-01T00:00:00Z" }
, {id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z"} , { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" }
, {id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z"} , { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" }
] ]
csv' <- map fold $ Pipes.Collect.collectArray $ Pipes.Stream.withEOS (Pipes.each objs) >-> Pipes.CSV.stringify >-> Pipes.Stream.unEOS csv' <- map fold $ Pipes.Collect.toArray $ Pipes.Stream.withEOS (Pipes.Construct.eachArray objs) >-> Pipes.CSV.stringify >-> Pipes.Stream.unEOS
csv' `shouldEqual` csv csv' `shouldEqual` csv
describe "parse" do describe "parse" do
it "parses csv" do it "parses csv" do
rows <- map Array.fromFoldable rows <- map Array.fromFoldable
$ Pipes.toListM $ Pipes.toListM
$ Pipes.Stream.withEOS (yield csv) $ Pipes.Stream.withEOS (yield csv)
>-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8) >-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8)
>-> Pipes.CSV.parse >-> Pipes.CSV.parse
>-> Pipes.Stream.unEOS >-> Pipes.Stream.unEOS
rows `shouldEqual` rows `shouldEqual`
[ {id: 1, foo: "a", flag: true, created: dt "2020-01-01T00:00:00Z"} [ { id: 1, foo: "a", flag: true, created: dt "2020-01-01T00:00:00Z" }
, {id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z"} , { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" }
, {id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z"} , { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" }
] ]
it "parses large csv" do before
nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9) (do
let nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9)
csvRows = ["id\n"] <> ((_ <> "\n") <$> show <$> nums) let
csv' = chars = [ "i","d","\n" ] <> join ((\n -> [show n, "\n"]) <$> nums)
let bufs <- Pipes.Collect.toArray
go ix $ Pipes.Stream.withEOS (Pipes.Construct.eachArray chars)
| Just a <- Array.index csvRows ix = yield a $> Loop (ix + 1) >-> Pipes.Util.chunked 1000
| otherwise = pure $ Done unit >-> Pipes.Stream.inEOS (Pipes.map fold >-> Pipes.Buffer.fromString UTF8)
in >-> Pipes.Stream.unEOS
tailRecM go 0 pure $ nums /\ bufs
in16kbChunks = )
Pipes.Util.chunked 16000 $ it "parses large csv" \(nums /\ bufs) -> do
>-> Pipes.Stream.inEOS (Pipes.map fold) rows <-
>-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8) Pipes.Collect.toArray
$ Pipes.Stream.withEOS (Pipes.Construct.eachArray bufs)
>-> Pipes.CSV.parse @(id :: Int)
>-> Pipes.Stream.unEOS
rows <- rows `shouldEqual` ((\id -> { id }) <$> nums)
Pipes.Collect.collectArray
$ Pipes.Stream.withEOS csv'
>-> in16kbChunks
>-> Pipes.CSV.parse
>-> Pipes.Stream.unEOS
rows `shouldEqual` ((\id -> {id}) <$> nums)