I have a table that contains billions of records. I want to select records in a batch using the c# while
loop. I used Offset
and Fetch Next
, but it is taking too long to get results. If the table contains below 100k records, it works fine. What is the best way to batch select records?
Sample Code
x
int Count = 500000 // Select Count(id) from table
int OffSet = 1; //Next Time 100000
int FetchNext = 100000; //Next Time 200000
while (OffSet < Count)
{
_strsqlcommand = "Select id from table ORDER BY id OFFSET " + OffSet + " ROWS FETCH NEXT " + FetchNext + " ROWS ONLY"
_sqlCommand = new SqlCommand(_strsqlcommand, _sqlConnection);
_sqlDataReader = _sqlCommand.ExecuteReader();
OffSet += FetchNext;
}
Advertisement
Answer
If your Id is a number and your network Bandwidth is good enough:
- First, find the maximum and minimum value of
Id
. - Second, create a range of numbers to be used in your where clause (Sorting on many records is a high-cost operation. Using
Where Id > @A and Id<@B
is way faster thanfetch next
). - Use TPL to extract data from the database.
namespace BatchLoad
{
class Program
{
static void Main(string[] args)
{
var start = DateTime.Now;
int MinId = 169328112;
int MaxId = 505690099;
int BatchCount = 1000000;
List<QueryExe> Alllist=new List<QueryExe>(BatchCount);
var stack =new ConcurrentStack<int>();
int i = MinId;
int index = 0;
while(i<MaxId+1)
{
int minid = i;
int maxid = i + BatchCount;
string q = $"SELECT [Id] FROM YourTable with(nolock) WHERE Id>={minid} and Id<{maxid} ";
string c = "Data Source=.;Initial Catalog=YourDatabase;Persist Security Info=True;User ID=??;Password=!!!;MultipleActiveResultSets=True";
i = maxid;
Alllist.Add(new QueryExe(q,c, index));
index++;
}
long ProccessCount = 0;
Parallel.ForEach(Alllist, new ParallelOptions {MaxDegreeOfParallelism = 100}, command =>
{
Task.Yield();
var temp = command.GetId();
if(temp?.Count>0)stack.PushRange(temp.ToArray());
Interlocked.Add(ref ProccessCount,1);
var donitems = Interlocked.Read(ref ProccessCount);
if (donitems %10 == 0)
{
Console.WriteLine($"{donitems} / {Alllist.Count} TotalM={stack.Count/1000000} Total={stack.Count}");
}
});
GC.Collect();
Console.WriteLine($"Done>{DateTime.Now.Subtract(start).TotalMilliseconds} count ={stack.Count/1000000}");
Console.ReadLine();
}
}
public class QueryExe
{
private string Q = "";
private string C = "";
private int i = 0;
public QueryExecutor(string Q, string C, int i)
{
this.Q = Q;
this.C = C;
this.i = i;
}
public List<int> GetId()
{
var result = new List<int>();
try
{
SqlConnection conn = new SqlConnection(C);
SqlCommand command = new SqlCommand(this.Q, conn);
command.CommandTimeout = 180;
using (conn)
{
conn.Open();
using (SqlDataReader reader = command.ExecuteReader())
{
while (reader.Read())
{
result.Add(reader.GetInt32(0));
}
}
}
}
catch (Exception ex)
{
Console.WriteLine($"Exception>{i}");
}
return result;
}
}
}