Skip to content

Commit c416a32

Browse files
committed
Cleanup memstore (#133)
Simply memstore to metadata store for the stream. We leverage metadata store only for stream metadata now. part of #121
1 parent e4e22e5 commit c416a32

14 files changed

+452
-383
lines changed

server/src/error.rs

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,23 @@
1+
/*
2+
* Parseable Server (C) 2022 Parseable, Inc.
3+
*
4+
* This program is free software: you can redistribute it and/or modify
5+
* it under the terms of the GNU Affero General Public License as
6+
* published by the Free Software Foundation, either version 3 of the
7+
* License, or (at your option) any later version.
8+
*
9+
* This program is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
* GNU Affero General Public License for more details.
13+
*
14+
* You should have received a copy of the GNU Affero General Public License
15+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
*
17+
*/
18+
119
use arrow::error::ArrowError;
20+
use aws_sdk_s3::Error as AWSS3Error;
221
use datafusion::error::DataFusionError;
322
use parquet::errors::ParquetError;
423

@@ -10,6 +29,8 @@ pub enum Error {
1029
Io(std::io::Error),
1130
#[error("serde_json error: {0}")]
1231
Serde(serde_json::Error),
32+
#[error("S3 error: {0}")]
33+
S3(AWSS3Error),
1334
#[error("Event error: {0}")]
1435
Event(EventError),
1536
#[error("Parquet error: {0}")]
@@ -18,17 +39,19 @@ pub enum Error {
1839
Arrow(ArrowError),
1940
#[error("Data Fusion error: {0}")]
2041
DataFusion(DataFusionError),
21-
#[error("logstream name cannot be empty")]
42+
#[error("log stream name cannot be empty")]
2243
EmptyName,
23-
#[error("logstream name cannot contain spaces: {0}")]
44+
#[error("log stream name cannot contain spaces: {0}")]
2445
NameWhiteSpace(String),
25-
#[error("logstream name cannot contain special characters: {0}")]
46+
#[error("log stream name cannot contain special characters: {0}")]
2647
NameSpecialChar(String),
27-
#[error("logstream name cannot contain uppercase characters: {0}")]
48+
#[error("log stream name cannot contain uppercase characters: {0}")]
2849
NameUpperCase(String),
29-
#[error("logstream name cannot be numeric only: {0}")]
50+
#[error("log stream name cannot be numeric only: {0}")]
3051
NameNumericOnly(String),
31-
#[error("logstream name cannot be a sql keyword: {0}")]
52+
#[error("log stream name cannot start with a number: {0}")]
53+
NameCantStartWithNumber(String),
54+
#[error("log stream name cannot be a sql keyword: {0}")]
3255
SQLKeyword(String),
3356
#[error("queries across multiple streams are not supported currently: {0}")]
3457
MultipleStreams(String),
@@ -38,10 +61,16 @@ pub enum Error {
3861
Join(String),
3962
#[error("Missing record batch")]
4063
MissingRecord,
41-
#[error("Missing path information")]
42-
MissingPath,
43-
#[error("Couldn't get lock on MEM_STREAMS")]
64+
#[error("Couldn't get lock on STREAM_INFO")]
4465
StreamLock,
66+
#[error("Metadata not found for log stream: {0}")]
67+
StreamMetaNotFound(String),
68+
#[error("Schema not found for log stream: {0}")]
69+
SchemaNotFound(String),
70+
#[error("Alert config not found for log stream: {0}")]
71+
AlertConfigNotFound(String),
72+
#[error("Invalid alert config: {0}")]
73+
InvalidAlert(String),
4574
}
4675

4776
impl From<std::io::Error> for Error {
@@ -73,3 +102,9 @@ impl From<ArrowError> for Error {
73102
Error::Arrow(e)
74103
}
75104
}
105+
106+
impl From<AWSS3Error> for Error {
107+
fn from(e: AWSS3Error) -> Error {
108+
Error::S3(e)
109+
}
110+
}

server/src/event.rs

Lines changed: 37 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -27,39 +27,39 @@ use std::fs;
2727
use std::io::{BufReader, Cursor, Seek, SeekFrom, Write};
2828
use std::sync::Arc;
2929

30-
use crate::mem_store;
30+
use crate::metadata;
3131
use crate::response;
3232
use crate::storage;
3333
use crate::Error;
3434

35-
// Event holds all values relevant to a single event for a single logstream
35+
// Event holds all values relevant to a single event for a single log stream
3636
pub struct Event {
3737
pub body: String,
3838
pub stream_name: String,
3939
pub path: String,
4040
pub schema: Bytes,
4141
}
4242

43-
// Events holds the schema related to a each event for a single logstream
43+
// Events holds the schema related to a each event for a single log stream
4444
pub struct Schema {
4545
pub arrow_schema: arrow::datatypes::Schema,
4646
pub string_schema: String,
4747
}
4848

4949
impl Event {
5050
pub fn process(&self) -> Result<response::EventResponse, Error> {
51-
// If the .schema file is still empty, this is the first event in this logstream.
51+
// If the .schema file is still empty, this is the first event in this log stream.
5252
if self.schema.is_empty() {
53-
self.initial_event()
53+
self.first_event()
5454
} else {
55-
self.next_event()
55+
self.event()
5656
}
5757
}
5858

59-
// This is called when the first event of a LogStream is received. The first event is
60-
// special because we parse this event to generate the schema for the logstream. This
61-
// schema is then enforced on rest of the events sent to this logstream.
62-
fn initial_event(&self) -> Result<response::EventResponse, Error> {
59+
// This is called when the first event of a log stream is received. The first event is
60+
// special because we parse this event to generate the schema for the log stream. This
61+
// schema is then enforced on rest of the events sent to this log stream.
62+
fn first_event(&self) -> Result<response::EventResponse, Error> {
6363
let mut c = Cursor::new(Vec::new());
6464
let reader = self.body.as_bytes();
6565

@@ -75,39 +75,41 @@ impl Event {
7575
);
7676
let b1 = event.next()?.ok_or(Error::MissingRecord)?;
7777

78-
// Put the event into in memory store
79-
mem_store::MEM_STREAMS::put(
80-
self.stream_name.to_string(),
81-
mem_store::LogStream {
82-
schema: Some(self.infer_schema().string_schema),
83-
rb: Some(b1.clone()),
84-
},
85-
);
86-
8778
// Store record batch to Parquet file on local cache
8879
self.convert_arrow_parquet(b1);
8980

9081
// Put the inferred schema to object store
91-
storage::put_schema(&self.stream_name, self.infer_schema().string_schema).map_err(|e| {
82+
let schema = self.infer_schema().string_schema;
83+
let stream_name = &self.stream_name;
84+
storage::put_schema(stream_name.clone(), schema.clone()).map_err(|e| {
9285
Error::Event(response::EventError {
9386
msg: format!(
94-
"Failed to upload schema for LogStream {} due to err: {}",
87+
"Failed to upload schema for log stream {} due to err: {}",
9588
self.stream_name, e
9689
),
9790
})
9891
})?;
9992

93+
if let Err(e) = metadata::STREAM_INFO.set_schema(stream_name.to_string(), schema) {
94+
return Err(Error::Event(response::EventError {
95+
msg: format!(
96+
"Failed to set schema for log stream {} due to err: {}",
97+
stream_name, e
98+
),
99+
}));
100+
}
101+
100102
Ok(response::EventResponse {
101103
msg: format!(
102-
"Intial Event recieved for LogStream {}, schema uploaded successfully",
104+
"Intial Event recieved for log stream {}, schema uploaded successfully",
103105
self.stream_name
104106
),
105107
})
106108
}
107109

108-
// next_event process all events after the 1st event. Concatenates record batches
110+
// event process all events after the 1st event. Concatenates record batches
109111
// and puts them in memory store for each event.
110-
fn next_event(&self) -> Result<response::EventResponse, Error> {
112+
fn event(&self) -> Result<response::EventResponse, Error> {
111113
let mut c = Cursor::new(Vec::new());
112114
let reader = self.body.as_bytes();
113115
c.write_all(reader).unwrap();
@@ -119,31 +121,22 @@ impl Event {
119121
1024,
120122
None,
121123
);
122-
let next_event_rb = event.next().unwrap().unwrap();
124+
let _next_event_rb = event.next().unwrap().unwrap();
123125

124-
let rb = mem_store::MEM_STREAMS::get_rb(self.stream_name.clone())?;
126+
// TODO -- Read existing data file and append the record and write it back
127+
// let vec = vec![next_event_rb.clone(), rb];
128+
// let new_batch = RecordBatch::concat(&next_event_rb.schema(), &vec);
125129

126-
let vec = vec![next_event_rb.clone(), rb];
127-
let new_batch = RecordBatch::concat(&next_event_rb.schema(), &vec);
128-
129-
let rb = new_batch.map_err(|e| {
130-
Error::Event(response::EventError {
131-
msg: format!("Error recieved for LogStream {}, {}", &self.stream_name, e),
132-
})
133-
})?;
134-
135-
mem_store::MEM_STREAMS::put(
136-
self.stream_name.clone(),
137-
mem_store::LogStream {
138-
schema: Some(mem_store::MEM_STREAMS::get_schema(self.stream_name.clone())),
139-
rb: Some(rb.clone()),
140-
},
141-
);
130+
// let rb = new_batch.map_err(|e| {
131+
// Error::Event(response::EventError {
132+
// msg: format!("Error recieved for log stream {}, {}", &self.stream_name, e),
133+
// })
134+
// })?;
142135

143-
self.convert_arrow_parquet(rb);
136+
// self.convert_arrow_parquet(rb);
144137

145138
Ok(response::EventResponse {
146-
msg: format!("Event recieved for LogStream {}", &self.stream_name),
139+
msg: format!("Event recieved for log stream {}", &self.stream_name),
147140
})
148141
}
149142

0 commit comments

Comments
 (0)